All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 2/2] Enable fscache as an optional feature of ceph.
@ 2013-05-23 21:55 Milosz Tanski
  2013-05-28 17:11 ` Sage Weil
  0 siblings, 1 reply; 13+ messages in thread
From: Milosz Tanski @ 2013-05-23 21:55 UTC (permalink / raw)
  To: ceph-devel, linux-cachefs

Enable fscache as an optional feature of ceph.

Adding support for fscache to the Ceph filesystem. This would bring it to on
par with some of the other network filesystems in Linux (like NFS, AFS, etc...)

This exploits the existing Ceph cache & lazyio capabilities.

Signed-off-by: Milosz Tanski <milosz@adfin.com>
---
 fs/ceph/Kconfig  |    9 ++++++
 fs/ceph/Makefile |    2 ++
 fs/ceph/addr.c   |   85 ++++++++++++++++++++++++++++++++++++++++--------------
 fs/ceph/caps.c   |   21 +++++++++++++-
 fs/ceph/file.c   |    9 ++++++
 fs/ceph/inode.c  |   25 ++++++++++++++--
 fs/ceph/super.c  |   25 ++++++++++++++--
 fs/ceph/super.h  |   12 ++++++++
 8 files changed, 162 insertions(+), 26 deletions(-)

diff --git a/fs/ceph/Kconfig b/fs/ceph/Kconfig
index 49bc782..ac9a2ef 100644
--- a/fs/ceph/Kconfig
+++ b/fs/ceph/Kconfig
@@ -16,3 +16,12 @@ config CEPH_FS

   If unsure, say N.

+if CEPH_FS
+config CEPH_FSCACHE
+ bool "Enable Ceph client caching support"
+ depends on CEPH_FS=m && FSCACHE || CEPH_FS=y && FSCACHE=y
+ help
+  Choose Y here to enable persistent, read-only local
+  caching support for Ceph clients using FS-Cache
+
+endif
diff --git a/fs/ceph/Makefile b/fs/ceph/Makefile
index bd35212..0af0678 100644
--- a/fs/ceph/Makefile
+++ b/fs/ceph/Makefile
@@ -9,3 +9,5 @@ ceph-y := super.o inode.o dir.o file.o locks.o addr.o ioctl.o \
  mds_client.o mdsmap.o strings.o ceph_frag.o \
  debugfs.o

+ceph-$(CONFIG_CEPH_FSCACHE) += cache.o
+
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 3e68ac1..fd3a1cc 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -11,6 +11,7 @@

 #include "super.h"
 #include "mds_client.h"
+#include "cache.h"
 #include <linux/ceph/osd_client.h>

 /*
@@ -149,11 +150,26 @@ static void ceph_invalidatepage(struct page
*page, unsigned long offset)
  struct ceph_inode_info *ci;
  struct ceph_snap_context *snapc = page_snap_context(page);

- BUG_ON(!PageLocked(page));
- BUG_ON(!PagePrivate(page));
  BUG_ON(!page->mapping);

  inode = page->mapping->host;
+ ci = ceph_inode(inode);
+
+ if (offset != 0) {
+ dout("%p invalidatepage %p idx %lu partial dirty page\n",
+     inode, page, page->index);
+ return;
+ }
+
+#ifdef CONFIG_CEPH_FSCACHE
+ if (PageFsCache(page))
+ ceph_invalidate_fscache_page(inode, page);
+#endif
+
+ if (!PagePrivate(page))
+ return;
+
+ BUG_ON(!PageLocked(page));

  /*
  * We can get non-dirty pages here due to races between
@@ -163,31 +179,32 @@ static void ceph_invalidatepage(struct page
*page, unsigned long offset)
  if (!PageDirty(page))
  pr_err("%p invalidatepage %p page not dirty\n", inode, page);

- if (offset == 0)
- ClearPageChecked(page);
+ ClearPageChecked(page);

- ci = ceph_inode(inode);
- if (offset == 0) {
- dout("%p invalidatepage %p idx %lu full dirty page %lu\n",
-     inode, page, page->index, offset);
- ceph_put_wrbuffer_cap_refs(ci, 1, snapc);
- ceph_put_snap_context(snapc);
- page->private = 0;
- ClearPagePrivate(page);
- } else {
- dout("%p invalidatepage %p idx %lu partial dirty page\n",
-     inode, page, page->index);
- }
+ dout("%p invalidatepage %p idx %lu full dirty page %lu\n",
+     inode, page, page->index, offset);
+
+ ceph_put_wrbuffer_cap_refs(ci, 1, snapc);
+ ceph_put_snap_context(snapc);
+ page->private = 0;
+ ClearPagePrivate(page);
 }

-/* just a sanity check */
 static int ceph_releasepage(struct page *page, gfp_t g)
 {
  struct inode *inode = page->mapping ? page->mapping->host : NULL;
  dout("%p releasepage %p idx %lu\n", inode, page, page->index);
  WARN_ON(PageDirty(page));
- WARN_ON(PagePrivate(page));
- return 0;
+
+#ifdef CONFIG_CEPH_FSCACHE
+ /* Can we release the page from the cache? */
+ if (PageFsCache(page) && ceph_release_fscache_page(page, g) == 0)
+ return 0;
+#endif
+ if (PagePrivate(page))
+ return 0;
+
+ return 1;
 }

 /*
@@ -197,11 +214,18 @@ static int readpage_nounlock(struct file *filp,
struct page *page)
 {
  struct inode *inode = file_inode(filp);
  struct ceph_inode_info *ci = ceph_inode(inode);
- struct ceph_osd_client *osdc =
+ struct ceph_osd_client *osdc =
  &ceph_inode_to_client(inode)->client->osdc;
  int err = 0;
  u64 len = PAGE_CACHE_SIZE;

+#ifdef CONFIG_CEPH_FSCACHE
+ err = ceph_readpage_from_fscache(inode, page);
+
+ if (err == 0)
+ goto out;
+#endif
+
  dout("readpage inode %p file %p page %p index %lu\n",
      inode, filp, page, page->index);
  err = ceph_osdc_readpages(osdc, ceph_vino(inode), &ci->i_layout,
@@ -219,6 +243,10 @@ static int readpage_nounlock(struct file *filp,
struct page *page)
  }
  SetPageUptodate(page);

+#ifdef CONFIG_CEPH_FSCACHE
+ ceph_readpage_to_fscache(inode, page);
+#endif
+
 out:
  return err < 0 ? err : 0;
 }
@@ -262,6 +290,9 @@ static void finish_read(struct ceph_osd_request
*req, struct ceph_msg *msg)
  flush_dcache_page(page);
  SetPageUptodate(page);
  unlock_page(page);
+#ifdef CONFIG_CEPH_FSCACHE
+ ceph_readpage_to_fscache(inode, page);
+#endif
  page_cache_release(page);
  bytes -= PAGE_CACHE_SIZE;
  }
@@ -330,7 +361,7 @@ static int start_read(struct inode *inode, struct
list_head *page_list, int max)
  page = list_entry(page_list->prev, struct page, lru);
  BUG_ON(PageLocked(page));
  list_del(&page->lru);
-
+
  dout("start_read %p adding %p idx %lu\n", inode, page,
      page->index);
  if (add_to_page_cache_lru(page, &inode->i_data, page->index,
@@ -377,6 +408,14 @@ static int ceph_readpages(struct file *file,
struct address_space *mapping,
  int rc = 0;
  int max = 0;

+#ifdef CONFIG_CEPH_FSCACHE
+ rc = ceph_readpages_from_fscache(mapping->host, mapping, page_list,
+ &nr_pages);
+
+ if (rc == 0)
+ goto out;
+#endif
+
  if (fsc->mount_options->rsize >= PAGE_CACHE_SIZE)
  max = (fsc->mount_options->rsize + PAGE_CACHE_SIZE - 1)
  >> PAGE_SHIFT;
@@ -490,6 +529,10 @@ static int writepage_nounlock(struct page *page,
struct writeback_control *wbc)
     CONGESTION_ON_THRESH(fsc->mount_options->congestion_kb))
  set_bdi_congested(&fsc->backing_dev_info, BLK_RW_ASYNC);

+#ifdef CONFIG_CEPH_FSCACHE
+ ceph_readpage_to_fscache(inode, page);
+#endif
+
  set_page_writeback(page);
  err = ceph_osdc_writepages(osdc, ceph_vino(inode),
    &ci->i_layout, snapc,
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index da0f9b8..7e8d8d3 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -10,6 +10,7 @@

 #include "super.h"
 #include "mds_client.h"
+#include "cache.h"
 #include <linux/ceph/decode.h>
 #include <linux/ceph/messenger.h>

@@ -486,8 +487,14 @@ static void __check_cap_issue(struct
ceph_inode_info *ci, struct ceph_cap *cap,
  * i_rdcache_gen.
  */
  if ((issued & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) &&
-    (had & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) == 0)
+    (had & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) == 0) {
  ci->i_rdcache_gen++;
+#ifdef CONFIG_CEPH_FSCACHE
+ /* Invalidate the cache for the whole file. */
+ dout("Invalidating inode data cache: %p", &ci->vfs_inode);
+ fscache_invalidate(ci->fscache);
+#endif
+ }

  /*
  * if we are newly issued FILE_SHARED, mark dir not complete; we
@@ -2356,6 +2363,12 @@ static void handle_cap_grant(struct inode
*inode, struct ceph_mds_caps *grant,
  if (((cap->issued & ~newcaps) & CEPH_CAP_FILE_CACHE) &&
     (newcaps & CEPH_CAP_FILE_LAZYIO) == 0 &&
     !ci->i_wrbuffer_ref) {
+
+#ifdef CONFIG_CEPH_FSCACHE
+ /* Close the fscache on inode */
+ ceph_fscache_unregister_inode_cookie(ci);
+#endif
+
  if (try_nonblocking_invalidate(inode) == 0) {
  revoked_rdcache = 1;
  } else {
@@ -2425,6 +2438,12 @@ static void handle_cap_grant(struct inode
*inode, struct ceph_mds_caps *grant,
  wake = 1;
  }

+#ifdef CONFIG_CEPH_FSCACHE
+ /* Register cache (if needed); perform this after amny size change. */
+ if ((issued & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)))
+ ceph_fscache_register_inode_cookie(session->s_mdsc->fsc, ci);
+#endif
+
  /* check cap bits */
  wanted = __ceph_caps_wanted(ci);
  used = __ceph_caps_used(ci);
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 656e169..e7ecc04 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -11,6 +11,7 @@

 #include "super.h"
 #include "mds_client.h"
+#include "cache.h"

 /*
  * Ceph file operations
@@ -67,10 +68,17 @@ out:
 static int ceph_init_file(struct inode *inode, struct file *file, int fmode)
 {
  struct ceph_file_info *cf;
+ struct ceph_inode_info *ci = ceph_inode(inode);
+ struct ceph_fs_client *fsc = ceph_sb_to_client(inode->i_sb);
  int ret = 0;

  switch (inode->i_mode & S_IFMT) {
  case S_IFREG:
+#ifdef CONFIG_CEPH_FSCACHE
+ spin_lock(&ci->i_ceph_lock);
+ ceph_fscache_register_inode_cookie(fsc, ci);
+ spin_lock(&ci->i_ceph_lock);
+#endif
  case S_IFDIR:
  dout("init_file %p %p 0%o (regular)\n", inode, file,
      inode->i_mode);
@@ -181,6 +189,7 @@ int ceph_open(struct inode *inode, struct file *file)
  spin_unlock(&ci->i_ceph_lock);
  return ceph_init_file(inode, file, fmode);
  }
+
  spin_unlock(&ci->i_ceph_lock);

  dout("open fmode %d wants %s\n", fmode, ceph_cap_string(wanted));
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index be0f7e2..620b84c 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -12,6 +12,7 @@

 #include "super.h"
 #include "mds_client.h"
+#include "cache.h"
 #include <linux/ceph/decode.h>

 /*
@@ -377,6 +378,10 @@ struct inode *ceph_alloc_inode(struct super_block *sb)

  INIT_WORK(&ci->i_vmtruncate_work, ceph_vmtruncate_work);

+#ifdef CONFIG_CEPH_FSCACHE
+ ci->fscache = NULL;
+#endif
+
  return &ci->vfs_inode;
 }

@@ -396,6 +401,10 @@ void ceph_destroy_inode(struct inode *inode)

  dout("destroy_inode %p ino %llx.%llx\n", inode, ceph_vinop(inode));

+#ifdef CONFIG_CEPH_FSCACHE
+ ceph_fscache_unregister_inode_cookie(ci);
+#endif
+
  ceph_queue_caps_release(inode);

  /*
@@ -430,7 +439,6 @@ void ceph_destroy_inode(struct inode *inode)
  call_rcu(&inode->i_rcu, ceph_i_callback);
 }

-
 /*
  * Helpers to fill in size, ctime, mtime, and atime.  We have to be
  * careful because either the client or MDS may have more up to date
@@ -633,6 +641,14 @@ static int fill_inode(struct inode *inode,
     le32_to_cpu(info->time_warp_seq),
     &ctime, &mtime, &atime);

+#ifdef CONFIG_CEPH_FSCACHE
+ /* Notify the cache that size has changed */
+ if (queue_trunc && ci->fscache) {
+ pr_info("size changed inode: %p cap flags\n", &ci->vfs_inode);
+ fscache_attr_changed(ci->fscache);
+ }
+#endif
+
  /* only update max_size on auth cap */
  if ((info->cap.flags & CEPH_CAP_FLAG_AUTH) &&
     ci->i_max_size != le64_to_cpu(info->max_size)) {
@@ -1066,7 +1082,7 @@ int ceph_fill_trace(struct super_block *sb,
struct ceph_mds_request *req,
  * complete.
  */
  ceph_set_dentry_offset(req->r_old_dentry);
- dout("dn %p gets new offset %lld\n", req->r_old_dentry,
+ dout("dn %p gets new offset %lld\n", req->r_old_dentry,
      ceph_dentry(req->r_old_dentry)->offset);

  dn = req->r_old_dentry;  /* use old_dentry */
@@ -1430,6 +1446,11 @@ static void ceph_invalidate_work(struct
work_struct *work)
  orig_gen = ci->i_rdcache_gen;
  spin_unlock(&ci->i_ceph_lock);

+#ifdef CONFIG_CEPH_FSCACHE
+ pr_info("cache invalidating inode: %p cap flags\n", &ci->vfs_inode);
+ fscache_invalidate(ci->fscache);
+#endif
+
  truncate_inode_pages(&inode->i_data, 0);

  spin_lock(&ci->i_ceph_lock);
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index 7d377c9..7847ef7 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -17,6 +17,7 @@

 #include "super.h"
 #include "mds_client.h"
+#include "cache.h"

 #include <linux/ceph/ceph_features.h>
 #include <linux/ceph/decode.h>
@@ -530,6 +531,11 @@ static struct ceph_fs_client
*create_fs_client(struct ceph_mount_options *fsopt,
  if (!fsc->wb_pagevec_pool)
  goto fail_trunc_wq;

+#ifdef CONFIG_CEPH_FSCACHE
+ /* fscache */
+ ceph_fscache_register_fsid_cookie(fsc);
+#endif
+
  /* caps */
  fsc->min_caps = fsopt->max_readdir;

@@ -554,6 +560,10 @@ static void destroy_fs_client(struct ceph_fs_client *fsc)
 {
  dout("destroy_fs_client %p\n", fsc);

+#ifdef CONFIG_CEPH_FSCACHE
+ ceph_fscache_unregister_fsid_cookie(fsc);
+#endif
+
  destroy_workqueue(fsc->wb_wq);
  destroy_workqueue(fsc->pg_inv_wq);
  destroy_workqueue(fsc->trunc_wq);
@@ -588,6 +598,8 @@ static void ceph_inode_init_once(void *foo)

 static int __init init_caches(void)
 {
+ int error = -ENOMEM;
+
  ceph_inode_cachep = kmem_cache_create("ceph_inode_info",
       sizeof(struct ceph_inode_info),
       __alignof__(struct ceph_inode_info),
@@ -611,15 +623,19 @@ static int __init init_caches(void)
  if (ceph_file_cachep == NULL)
  goto bad_file;

- return 0;
+#ifdef CONFIG_CEPH_FSCACHE
+ if ((error = fscache_register_netfs(&ceph_cache_netfs)))
+ goto bad_file;
+#endif

+ return 0;
 bad_file:
  kmem_cache_destroy(ceph_dentry_cachep);
 bad_dentry:
  kmem_cache_destroy(ceph_cap_cachep);
 bad_cap:
  kmem_cache_destroy(ceph_inode_cachep);
- return -ENOMEM;
+ return error;
 }

 static void destroy_caches(void)
@@ -629,10 +645,15 @@ static void destroy_caches(void)
  * destroy cache.
  */
  rcu_barrier();
+
  kmem_cache_destroy(ceph_inode_cachep);
  kmem_cache_destroy(ceph_cap_cachep);
  kmem_cache_destroy(ceph_dentry_cachep);
  kmem_cache_destroy(ceph_file_cachep);
+
+#ifdef CONFIG_CEPH_FSCACHE
+ fscache_unregister_netfs(&ceph_cache_netfs);
+#endif
 }


diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 8696be2..2980337 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -16,6 +16,10 @@

 #include <linux/ceph/libceph.h>

+#ifdef CONFIG_CEPH_FSCACHE
+#include <linux/fscache.h>
+#endif
+
 /* f_type in struct statfs */
 #define CEPH_SUPER_MAGIC 0x00c36400

@@ -90,6 +94,10 @@ struct ceph_fs_client {
  struct dentry *debugfs_bdi;
  struct dentry *debugfs_mdsc, *debugfs_mdsmap;
 #endif
+
+#ifdef CONFIG_CEPH_FSCACHE
+ struct fscache_cookie *fscache;
+#endif
 };


@@ -319,6 +327,10 @@ struct ceph_inode_info {

  struct work_struct i_vmtruncate_work;

+#ifdef CONFIG_CEPH_FSCACHE
+ struct fscache_cookie *fscache;
+#endif
+
  struct inode vfs_inode; /* at end */
 };

--
1.7.9.5

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* Re: [PATCH 2/2] Enable fscache as an optional feature of ceph.
  2013-05-23 21:55 [PATCH 2/2] Enable fscache as an optional feature of ceph Milosz Tanski
@ 2013-05-28 17:11 ` Sage Weil
  2013-05-29 18:06   ` Milosz Tanski
  0 siblings, 1 reply; 13+ messages in thread
From: Sage Weil @ 2013-05-28 17:11 UTC (permalink / raw)
  To: Milosz Tanski; +Cc: ceph-devel, linux-cachefs

Hi Milosz,

Just a heads up that I hope to take a closer look at the patch this 
afternoon or tomorrow.  Just catching up after the long weekend.

Thanks!
sage


On Thu, 23 May 2013, Milosz Tanski wrote:

> Enable fscache as an optional feature of ceph.
> 
> Adding support for fscache to the Ceph filesystem. This would bring it to on
> par with some of the other network filesystems in Linux (like NFS, AFS, etc...)
> 
> This exploits the existing Ceph cache & lazyio capabilities.
> 
> Signed-off-by: Milosz Tanski <milosz@adfin.com>
> ---
>  fs/ceph/Kconfig  |    9 ++++++
>  fs/ceph/Makefile |    2 ++
>  fs/ceph/addr.c   |   85 ++++++++++++++++++++++++++++++++++++++++--------------
>  fs/ceph/caps.c   |   21 +++++++++++++-
>  fs/ceph/file.c   |    9 ++++++
>  fs/ceph/inode.c  |   25 ++++++++++++++--
>  fs/ceph/super.c  |   25 ++++++++++++++--
>  fs/ceph/super.h  |   12 ++++++++
>  8 files changed, 162 insertions(+), 26 deletions(-)
> 
> diff --git a/fs/ceph/Kconfig b/fs/ceph/Kconfig
> index 49bc782..ac9a2ef 100644
> --- a/fs/ceph/Kconfig
> +++ b/fs/ceph/Kconfig
> @@ -16,3 +16,12 @@ config CEPH_FS
> 
>    If unsure, say N.
> 
> +if CEPH_FS
> +config CEPH_FSCACHE
> + bool "Enable Ceph client caching support"
> + depends on CEPH_FS=m && FSCACHE || CEPH_FS=y && FSCACHE=y
> + help
> +  Choose Y here to enable persistent, read-only local
> +  caching support for Ceph clients using FS-Cache
> +
> +endif
> diff --git a/fs/ceph/Makefile b/fs/ceph/Makefile
> index bd35212..0af0678 100644
> --- a/fs/ceph/Makefile
> +++ b/fs/ceph/Makefile
> @@ -9,3 +9,5 @@ ceph-y := super.o inode.o dir.o file.o locks.o addr.o ioctl.o \
>   mds_client.o mdsmap.o strings.o ceph_frag.o \
>   debugfs.o
> 
> +ceph-$(CONFIG_CEPH_FSCACHE) += cache.o
> +
> diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
> index 3e68ac1..fd3a1cc 100644
> --- a/fs/ceph/addr.c
> +++ b/fs/ceph/addr.c
> @@ -11,6 +11,7 @@
> 
>  #include "super.h"
>  #include "mds_client.h"
> +#include "cache.h"
>  #include <linux/ceph/osd_client.h>
> 
>  /*
> @@ -149,11 +150,26 @@ static void ceph_invalidatepage(struct page
> *page, unsigned long offset)
>   struct ceph_inode_info *ci;
>   struct ceph_snap_context *snapc = page_snap_context(page);
> 
> - BUG_ON(!PageLocked(page));
> - BUG_ON(!PagePrivate(page));
>   BUG_ON(!page->mapping);
> 
>   inode = page->mapping->host;
> + ci = ceph_inode(inode);
> +
> + if (offset != 0) {
> + dout("%p invalidatepage %p idx %lu partial dirty page\n",
> +     inode, page, page->index);
> + return;
> + }
> +
> +#ifdef CONFIG_CEPH_FSCACHE
> + if (PageFsCache(page))
> + ceph_invalidate_fscache_page(inode, page);
> +#endif
> +
> + if (!PagePrivate(page))
> + return;
> +
> + BUG_ON(!PageLocked(page));
> 
>   /*
>   * We can get non-dirty pages here due to races between
> @@ -163,31 +179,32 @@ static void ceph_invalidatepage(struct page
> *page, unsigned long offset)
>   if (!PageDirty(page))
>   pr_err("%p invalidatepage %p page not dirty\n", inode, page);
> 
> - if (offset == 0)
> - ClearPageChecked(page);
> + ClearPageChecked(page);
> 
> - ci = ceph_inode(inode);
> - if (offset == 0) {
> - dout("%p invalidatepage %p idx %lu full dirty page %lu\n",
> -     inode, page, page->index, offset);
> - ceph_put_wrbuffer_cap_refs(ci, 1, snapc);
> - ceph_put_snap_context(snapc);
> - page->private = 0;
> - ClearPagePrivate(page);
> - } else {
> - dout("%p invalidatepage %p idx %lu partial dirty page\n",
> -     inode, page, page->index);
> - }
> + dout("%p invalidatepage %p idx %lu full dirty page %lu\n",
> +     inode, page, page->index, offset);
> +
> + ceph_put_wrbuffer_cap_refs(ci, 1, snapc);
> + ceph_put_snap_context(snapc);
> + page->private = 0;
> + ClearPagePrivate(page);
>  }
> 
> -/* just a sanity check */
>  static int ceph_releasepage(struct page *page, gfp_t g)
>  {
>   struct inode *inode = page->mapping ? page->mapping->host : NULL;
>   dout("%p releasepage %p idx %lu\n", inode, page, page->index);
>   WARN_ON(PageDirty(page));
> - WARN_ON(PagePrivate(page));
> - return 0;
> +
> +#ifdef CONFIG_CEPH_FSCACHE
> + /* Can we release the page from the cache? */
> + if (PageFsCache(page) && ceph_release_fscache_page(page, g) == 0)
> + return 0;
> +#endif
> + if (PagePrivate(page))
> + return 0;
> +
> + return 1;
>  }
> 
>  /*
> @@ -197,11 +214,18 @@ static int readpage_nounlock(struct file *filp,
> struct page *page)
>  {
>   struct inode *inode = file_inode(filp);
>   struct ceph_inode_info *ci = ceph_inode(inode);
> - struct ceph_osd_client *osdc =
> + struct ceph_osd_client *osdc =
>   &ceph_inode_to_client(inode)->client->osdc;
>   int err = 0;
>   u64 len = PAGE_CACHE_SIZE;
> 
> +#ifdef CONFIG_CEPH_FSCACHE
> + err = ceph_readpage_from_fscache(inode, page);
> +
> + if (err == 0)
> + goto out;
> +#endif
> +
>   dout("readpage inode %p file %p page %p index %lu\n",
>       inode, filp, page, page->index);
>   err = ceph_osdc_readpages(osdc, ceph_vino(inode), &ci->i_layout,
> @@ -219,6 +243,10 @@ static int readpage_nounlock(struct file *filp,
> struct page *page)
>   }
>   SetPageUptodate(page);
> 
> +#ifdef CONFIG_CEPH_FSCACHE
> + ceph_readpage_to_fscache(inode, page);
> +#endif
> +
>  out:
>   return err < 0 ? err : 0;
>  }
> @@ -262,6 +290,9 @@ static void finish_read(struct ceph_osd_request
> *req, struct ceph_msg *msg)
>   flush_dcache_page(page);
>   SetPageUptodate(page);
>   unlock_page(page);
> +#ifdef CONFIG_CEPH_FSCACHE
> + ceph_readpage_to_fscache(inode, page);
> +#endif
>   page_cache_release(page);
>   bytes -= PAGE_CACHE_SIZE;
>   }
> @@ -330,7 +361,7 @@ static int start_read(struct inode *inode, struct
> list_head *page_list, int max)
>   page = list_entry(page_list->prev, struct page, lru);
>   BUG_ON(PageLocked(page));
>   list_del(&page->lru);
> -
> +
>   dout("start_read %p adding %p idx %lu\n", inode, page,
>       page->index);
>   if (add_to_page_cache_lru(page, &inode->i_data, page->index,
> @@ -377,6 +408,14 @@ static int ceph_readpages(struct file *file,
> struct address_space *mapping,
>   int rc = 0;
>   int max = 0;
> 
> +#ifdef CONFIG_CEPH_FSCACHE
> + rc = ceph_readpages_from_fscache(mapping->host, mapping, page_list,
> + &nr_pages);
> +
> + if (rc == 0)
> + goto out;
> +#endif
> +
>   if (fsc->mount_options->rsize >= PAGE_CACHE_SIZE)
>   max = (fsc->mount_options->rsize + PAGE_CACHE_SIZE - 1)
>   >> PAGE_SHIFT;
> @@ -490,6 +529,10 @@ static int writepage_nounlock(struct page *page,
> struct writeback_control *wbc)
>      CONGESTION_ON_THRESH(fsc->mount_options->congestion_kb))
>   set_bdi_congested(&fsc->backing_dev_info, BLK_RW_ASYNC);
> 
> +#ifdef CONFIG_CEPH_FSCACHE
> + ceph_readpage_to_fscache(inode, page);
> +#endif
> +
>   set_page_writeback(page);
>   err = ceph_osdc_writepages(osdc, ceph_vino(inode),
>     &ci->i_layout, snapc,
> diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
> index da0f9b8..7e8d8d3 100644
> --- a/fs/ceph/caps.c
> +++ b/fs/ceph/caps.c
> @@ -10,6 +10,7 @@
> 
>  #include "super.h"
>  #include "mds_client.h"
> +#include "cache.h"
>  #include <linux/ceph/decode.h>
>  #include <linux/ceph/messenger.h>
> 
> @@ -486,8 +487,14 @@ static void __check_cap_issue(struct
> ceph_inode_info *ci, struct ceph_cap *cap,
>   * i_rdcache_gen.
>   */
>   if ((issued & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) &&
> -    (had & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) == 0)
> +    (had & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) == 0) {
>   ci->i_rdcache_gen++;
> +#ifdef CONFIG_CEPH_FSCACHE
> + /* Invalidate the cache for the whole file. */
> + dout("Invalidating inode data cache: %p", &ci->vfs_inode);
> + fscache_invalidate(ci->fscache);
> +#endif
> + }
> 
>   /*
>   * if we are newly issued FILE_SHARED, mark dir not complete; we
> @@ -2356,6 +2363,12 @@ static void handle_cap_grant(struct inode
> *inode, struct ceph_mds_caps *grant,
>   if (((cap->issued & ~newcaps) & CEPH_CAP_FILE_CACHE) &&
>      (newcaps & CEPH_CAP_FILE_LAZYIO) == 0 &&
>      !ci->i_wrbuffer_ref) {
> +
> +#ifdef CONFIG_CEPH_FSCACHE
> + /* Close the fscache on inode */
> + ceph_fscache_unregister_inode_cookie(ci);
> +#endif
> +
>   if (try_nonblocking_invalidate(inode) == 0) {
>   revoked_rdcache = 1;
>   } else {
> @@ -2425,6 +2438,12 @@ static void handle_cap_grant(struct inode
> *inode, struct ceph_mds_caps *grant,
>   wake = 1;
>   }
> 
> +#ifdef CONFIG_CEPH_FSCACHE
> + /* Register cache (if needed); perform this after amny size change. */
> + if ((issued & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)))
> + ceph_fscache_register_inode_cookie(session->s_mdsc->fsc, ci);
> +#endif
> +
>   /* check cap bits */
>   wanted = __ceph_caps_wanted(ci);
>   used = __ceph_caps_used(ci);
> diff --git a/fs/ceph/file.c b/fs/ceph/file.c
> index 656e169..e7ecc04 100644
> --- a/fs/ceph/file.c
> +++ b/fs/ceph/file.c
> @@ -11,6 +11,7 @@
> 
>  #include "super.h"
>  #include "mds_client.h"
> +#include "cache.h"
> 
>  /*
>   * Ceph file operations
> @@ -67,10 +68,17 @@ out:
>  static int ceph_init_file(struct inode *inode, struct file *file, int fmode)
>  {
>   struct ceph_file_info *cf;
> + struct ceph_inode_info *ci = ceph_inode(inode);
> + struct ceph_fs_client *fsc = ceph_sb_to_client(inode->i_sb);
>   int ret = 0;
> 
>   switch (inode->i_mode & S_IFMT) {
>   case S_IFREG:
> +#ifdef CONFIG_CEPH_FSCACHE
> + spin_lock(&ci->i_ceph_lock);
> + ceph_fscache_register_inode_cookie(fsc, ci);
> + spin_lock(&ci->i_ceph_lock);
> +#endif
>   case S_IFDIR:
>   dout("init_file %p %p 0%o (regular)\n", inode, file,
>       inode->i_mode);
> @@ -181,6 +189,7 @@ int ceph_open(struct inode *inode, struct file *file)
>   spin_unlock(&ci->i_ceph_lock);
>   return ceph_init_file(inode, file, fmode);
>   }
> +
>   spin_unlock(&ci->i_ceph_lock);
> 
>   dout("open fmode %d wants %s\n", fmode, ceph_cap_string(wanted));
> diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
> index be0f7e2..620b84c 100644
> --- a/fs/ceph/inode.c
> +++ b/fs/ceph/inode.c
> @@ -12,6 +12,7 @@
> 
>  #include "super.h"
>  #include "mds_client.h"
> +#include "cache.h"
>  #include <linux/ceph/decode.h>
> 
>  /*
> @@ -377,6 +378,10 @@ struct inode *ceph_alloc_inode(struct super_block *sb)
> 
>   INIT_WORK(&ci->i_vmtruncate_work, ceph_vmtruncate_work);
> 
> +#ifdef CONFIG_CEPH_FSCACHE
> + ci->fscache = NULL;
> +#endif
> +
>   return &ci->vfs_inode;
>  }
> 
> @@ -396,6 +401,10 @@ void ceph_destroy_inode(struct inode *inode)
> 
>   dout("destroy_inode %p ino %llx.%llx\n", inode, ceph_vinop(inode));
> 
> +#ifdef CONFIG_CEPH_FSCACHE
> + ceph_fscache_unregister_inode_cookie(ci);
> +#endif
> +
>   ceph_queue_caps_release(inode);
> 
>   /*
> @@ -430,7 +439,6 @@ void ceph_destroy_inode(struct inode *inode)
>   call_rcu(&inode->i_rcu, ceph_i_callback);
>  }
> 
> -
>  /*
>   * Helpers to fill in size, ctime, mtime, and atime.  We have to be
>   * careful because either the client or MDS may have more up to date
> @@ -633,6 +641,14 @@ static int fill_inode(struct inode *inode,
>      le32_to_cpu(info->time_warp_seq),
>      &ctime, &mtime, &atime);
> 
> +#ifdef CONFIG_CEPH_FSCACHE
> + /* Notify the cache that size has changed */
> + if (queue_trunc && ci->fscache) {
> + pr_info("size changed inode: %p cap flags\n", &ci->vfs_inode);
> + fscache_attr_changed(ci->fscache);
> + }
> +#endif
> +
>   /* only update max_size on auth cap */
>   if ((info->cap.flags & CEPH_CAP_FLAG_AUTH) &&
>      ci->i_max_size != le64_to_cpu(info->max_size)) {
> @@ -1066,7 +1082,7 @@ int ceph_fill_trace(struct super_block *sb,
> struct ceph_mds_request *req,
>   * complete.
>   */
>   ceph_set_dentry_offset(req->r_old_dentry);
> - dout("dn %p gets new offset %lld\n", req->r_old_dentry,
> + dout("dn %p gets new offset %lld\n", req->r_old_dentry,
>       ceph_dentry(req->r_old_dentry)->offset);
> 
>   dn = req->r_old_dentry;  /* use old_dentry */
> @@ -1430,6 +1446,11 @@ static void ceph_invalidate_work(struct
> work_struct *work)
>   orig_gen = ci->i_rdcache_gen;
>   spin_unlock(&ci->i_ceph_lock);
> 
> +#ifdef CONFIG_CEPH_FSCACHE
> + pr_info("cache invalidating inode: %p cap flags\n", &ci->vfs_inode);
> + fscache_invalidate(ci->fscache);
> +#endif
> +
>   truncate_inode_pages(&inode->i_data, 0);
> 
>   spin_lock(&ci->i_ceph_lock);
> diff --git a/fs/ceph/super.c b/fs/ceph/super.c
> index 7d377c9..7847ef7 100644
> --- a/fs/ceph/super.c
> +++ b/fs/ceph/super.c
> @@ -17,6 +17,7 @@
> 
>  #include "super.h"
>  #include "mds_client.h"
> +#include "cache.h"
> 
>  #include <linux/ceph/ceph_features.h>
>  #include <linux/ceph/decode.h>
> @@ -530,6 +531,11 @@ static struct ceph_fs_client
> *create_fs_client(struct ceph_mount_options *fsopt,
>   if (!fsc->wb_pagevec_pool)
>   goto fail_trunc_wq;
> 
> +#ifdef CONFIG_CEPH_FSCACHE
> + /* fscache */
> + ceph_fscache_register_fsid_cookie(fsc);
> +#endif
> +
>   /* caps */
>   fsc->min_caps = fsopt->max_readdir;
> 
> @@ -554,6 +560,10 @@ static void destroy_fs_client(struct ceph_fs_client *fsc)
>  {
>   dout("destroy_fs_client %p\n", fsc);
> 
> +#ifdef CONFIG_CEPH_FSCACHE
> + ceph_fscache_unregister_fsid_cookie(fsc);
> +#endif
> +
>   destroy_workqueue(fsc->wb_wq);
>   destroy_workqueue(fsc->pg_inv_wq);
>   destroy_workqueue(fsc->trunc_wq);
> @@ -588,6 +598,8 @@ static void ceph_inode_init_once(void *foo)
> 
>  static int __init init_caches(void)
>  {
> + int error = -ENOMEM;
> +
>   ceph_inode_cachep = kmem_cache_create("ceph_inode_info",
>        sizeof(struct ceph_inode_info),
>        __alignof__(struct ceph_inode_info),
> @@ -611,15 +623,19 @@ static int __init init_caches(void)
>   if (ceph_file_cachep == NULL)
>   goto bad_file;
> 
> - return 0;
> +#ifdef CONFIG_CEPH_FSCACHE
> + if ((error = fscache_register_netfs(&ceph_cache_netfs)))
> + goto bad_file;
> +#endif
> 
> + return 0;
>  bad_file:
>   kmem_cache_destroy(ceph_dentry_cachep);
>  bad_dentry:
>   kmem_cache_destroy(ceph_cap_cachep);
>  bad_cap:
>   kmem_cache_destroy(ceph_inode_cachep);
> - return -ENOMEM;
> + return error;
>  }
> 
>  static void destroy_caches(void)
> @@ -629,10 +645,15 @@ static void destroy_caches(void)
>   * destroy cache.
>   */
>   rcu_barrier();
> +
>   kmem_cache_destroy(ceph_inode_cachep);
>   kmem_cache_destroy(ceph_cap_cachep);
>   kmem_cache_destroy(ceph_dentry_cachep);
>   kmem_cache_destroy(ceph_file_cachep);
> +
> +#ifdef CONFIG_CEPH_FSCACHE
> + fscache_unregister_netfs(&ceph_cache_netfs);
> +#endif
>  }
> 
> 
> diff --git a/fs/ceph/super.h b/fs/ceph/super.h
> index 8696be2..2980337 100644
> --- a/fs/ceph/super.h
> +++ b/fs/ceph/super.h
> @@ -16,6 +16,10 @@
> 
>  #include <linux/ceph/libceph.h>
> 
> +#ifdef CONFIG_CEPH_FSCACHE
> +#include <linux/fscache.h>
> +#endif
> +
>  /* f_type in struct statfs */
>  #define CEPH_SUPER_MAGIC 0x00c36400
> 
> @@ -90,6 +94,10 @@ struct ceph_fs_client {
>   struct dentry *debugfs_bdi;
>   struct dentry *debugfs_mdsc, *debugfs_mdsmap;
>  #endif
> +
> +#ifdef CONFIG_CEPH_FSCACHE
> + struct fscache_cookie *fscache;
> +#endif
>  };
> 
> 
> @@ -319,6 +327,10 @@ struct ceph_inode_info {
> 
>   struct work_struct i_vmtruncate_work;
> 
> +#ifdef CONFIG_CEPH_FSCACHE
> + struct fscache_cookie *fscache;
> +#endif
> +
>   struct inode vfs_inode; /* at end */
>  };
> 
> --
> 1.7.9.5
> --
> To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 
> 

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 2/2] Enable fscache as an optional feature of ceph.
  2013-05-28 17:11 ` Sage Weil
@ 2013-05-29 18:06   ` Milosz Tanski
  2013-06-05 16:26     ` Milosz Tanski
  2013-06-17 13:16     ` Elso Andras
  0 siblings, 2 replies; 13+ messages in thread
From: Milosz Tanski @ 2013-05-29 18:06 UTC (permalink / raw)
  To: Sage Weil; +Cc: ceph-devel, linux-cachefs

Sage,

Thanks for taking a look at this. No worries about the timing.

I added two extra changes into my branch located here:
https://bitbucket.org/adfin/linux-fs/commits/branch/forceph. The first
one is a fix for kernel deadlock. The second one makes fsc cache a
non-default mount option (akin to NFS).

Finally, I observed an occasional oops in the fscache that's fixed in
David's branch that's waiting to get into mainline. The fix for the
issue is here: http://git.kernel.org/cgit/linux/kernel/git/dhowells/linux-fs.git/commit/?h=fscache&id=82958c45e35963c93fc6cbe6a27752e2d97e9f9a.
I can only cause that issue by forcing the kernel to drop it's caches
in some cases.

Let me know if you any other feedback, or if I can help in anyway.

Thanks,
- Milosz

On Tue, May 28, 2013 at 1:11 PM, Sage Weil <sage@inktank.com> wrote:
> Hi Milosz,
>
> Just a heads up that I hope to take a closer look at the patch this
> afternoon or tomorrow.  Just catching up after the long weekend.
>
> Thanks!
> sage
>
>
> On Thu, 23 May 2013, Milosz Tanski wrote:
>
>> Enable fscache as an optional feature of ceph.
>>
>> Adding support for fscache to the Ceph filesystem. This would bring it to on
>> par with some of the other network filesystems in Linux (like NFS, AFS, etc...)
>>
>> This exploits the existing Ceph cache & lazyio capabilities.
>>
>> Signed-off-by: Milosz Tanski <milosz@adfin.com>
>> ---
>>  fs/ceph/Kconfig  |    9 ++++++
>>  fs/ceph/Makefile |    2 ++
>>  fs/ceph/addr.c   |   85 ++++++++++++++++++++++++++++++++++++++++--------------
>>  fs/ceph/caps.c   |   21 +++++++++++++-
>>  fs/ceph/file.c   |    9 ++++++
>>  fs/ceph/inode.c  |   25 ++++++++++++++--
>>  fs/ceph/super.c  |   25 ++++++++++++++--
>>  fs/ceph/super.h  |   12 ++++++++
>>  8 files changed, 162 insertions(+), 26 deletions(-)
>>
>> diff --git a/fs/ceph/Kconfig b/fs/ceph/Kconfig
>> index 49bc782..ac9a2ef 100644
>> --- a/fs/ceph/Kconfig
>> +++ b/fs/ceph/Kconfig
>> @@ -16,3 +16,12 @@ config CEPH_FS
>>
>>    If unsure, say N.
>>
>> +if CEPH_FS
>> +config CEPH_FSCACHE
>> + bool "Enable Ceph client caching support"
>> + depends on CEPH_FS=m && FSCACHE || CEPH_FS=y && FSCACHE=y
>> + help
>> +  Choose Y here to enable persistent, read-only local
>> +  caching support for Ceph clients using FS-Cache
>> +
>> +endif
>> diff --git a/fs/ceph/Makefile b/fs/ceph/Makefile
>> index bd35212..0af0678 100644
>> --- a/fs/ceph/Makefile
>> +++ b/fs/ceph/Makefile
>> @@ -9,3 +9,5 @@ ceph-y := super.o inode.o dir.o file.o locks.o addr.o ioctl.o \
>>   mds_client.o mdsmap.o strings.o ceph_frag.o \
>>   debugfs.o
>>
>> +ceph-$(CONFIG_CEPH_FSCACHE) += cache.o
>> +
>> diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
>> index 3e68ac1..fd3a1cc 100644
>> --- a/fs/ceph/addr.c
>> +++ b/fs/ceph/addr.c
>> @@ -11,6 +11,7 @@
>>
>>  #include "super.h"
>>  #include "mds_client.h"
>> +#include "cache.h"
>>  #include <linux/ceph/osd_client.h>
>>
>>  /*
>> @@ -149,11 +150,26 @@ static void ceph_invalidatepage(struct page
>> *page, unsigned long offset)
>>   struct ceph_inode_info *ci;
>>   struct ceph_snap_context *snapc = page_snap_context(page);
>>
>> - BUG_ON(!PageLocked(page));
>> - BUG_ON(!PagePrivate(page));
>>   BUG_ON(!page->mapping);
>>
>>   inode = page->mapping->host;
>> + ci = ceph_inode(inode);
>> +
>> + if (offset != 0) {
>> + dout("%p invalidatepage %p idx %lu partial dirty page\n",
>> +     inode, page, page->index);
>> + return;
>> + }
>> +
>> +#ifdef CONFIG_CEPH_FSCACHE
>> + if (PageFsCache(page))
>> + ceph_invalidate_fscache_page(inode, page);
>> +#endif
>> +
>> + if (!PagePrivate(page))
>> + return;
>> +
>> + BUG_ON(!PageLocked(page));
>>
>>   /*
>>   * We can get non-dirty pages here due to races between
>> @@ -163,31 +179,32 @@ static void ceph_invalidatepage(struct page
>> *page, unsigned long offset)
>>   if (!PageDirty(page))
>>   pr_err("%p invalidatepage %p page not dirty\n", inode, page);
>>
>> - if (offset == 0)
>> - ClearPageChecked(page);
>> + ClearPageChecked(page);
>>
>> - ci = ceph_inode(inode);
>> - if (offset == 0) {
>> - dout("%p invalidatepage %p idx %lu full dirty page %lu\n",
>> -     inode, page, page->index, offset);
>> - ceph_put_wrbuffer_cap_refs(ci, 1, snapc);
>> - ceph_put_snap_context(snapc);
>> - page->private = 0;
>> - ClearPagePrivate(page);
>> - } else {
>> - dout("%p invalidatepage %p idx %lu partial dirty page\n",
>> -     inode, page, page->index);
>> - }
>> + dout("%p invalidatepage %p idx %lu full dirty page %lu\n",
>> +     inode, page, page->index, offset);
>> +
>> + ceph_put_wrbuffer_cap_refs(ci, 1, snapc);
>> + ceph_put_snap_context(snapc);
>> + page->private = 0;
>> + ClearPagePrivate(page);
>>  }
>>
>> -/* just a sanity check */
>>  static int ceph_releasepage(struct page *page, gfp_t g)
>>  {
>>   struct inode *inode = page->mapping ? page->mapping->host : NULL;
>>   dout("%p releasepage %p idx %lu\n", inode, page, page->index);
>>   WARN_ON(PageDirty(page));
>> - WARN_ON(PagePrivate(page));
>> - return 0;
>> +
>> +#ifdef CONFIG_CEPH_FSCACHE
>> + /* Can we release the page from the cache? */
>> + if (PageFsCache(page) && ceph_release_fscache_page(page, g) == 0)
>> + return 0;
>> +#endif
>> + if (PagePrivate(page))
>> + return 0;
>> +
>> + return 1;
>>  }
>>
>>  /*
>> @@ -197,11 +214,18 @@ static int readpage_nounlock(struct file *filp,
>> struct page *page)
>>  {
>>   struct inode *inode = file_inode(filp);
>>   struct ceph_inode_info *ci = ceph_inode(inode);
>> - struct ceph_osd_client *osdc =
>> + struct ceph_osd_client *osdc =
>>   &ceph_inode_to_client(inode)->client->osdc;
>>   int err = 0;
>>   u64 len = PAGE_CACHE_SIZE;
>>
>> +#ifdef CONFIG_CEPH_FSCACHE
>> + err = ceph_readpage_from_fscache(inode, page);
>> +
>> + if (err == 0)
>> + goto out;
>> +#endif
>> +
>>   dout("readpage inode %p file %p page %p index %lu\n",
>>       inode, filp, page, page->index);
>>   err = ceph_osdc_readpages(osdc, ceph_vino(inode), &ci->i_layout,
>> @@ -219,6 +243,10 @@ static int readpage_nounlock(struct file *filp,
>> struct page *page)
>>   }
>>   SetPageUptodate(page);
>>
>> +#ifdef CONFIG_CEPH_FSCACHE
>> + ceph_readpage_to_fscache(inode, page);
>> +#endif
>> +
>>  out:
>>   return err < 0 ? err : 0;
>>  }
>> @@ -262,6 +290,9 @@ static void finish_read(struct ceph_osd_request
>> *req, struct ceph_msg *msg)
>>   flush_dcache_page(page);
>>   SetPageUptodate(page);
>>   unlock_page(page);
>> +#ifdef CONFIG_CEPH_FSCACHE
>> + ceph_readpage_to_fscache(inode, page);
>> +#endif
>>   page_cache_release(page);
>>   bytes -= PAGE_CACHE_SIZE;
>>   }
>> @@ -330,7 +361,7 @@ static int start_read(struct inode *inode, struct
>> list_head *page_list, int max)
>>   page = list_entry(page_list->prev, struct page, lru);
>>   BUG_ON(PageLocked(page));
>>   list_del(&page->lru);
>> -
>> +
>>   dout("start_read %p adding %p idx %lu\n", inode, page,
>>       page->index);
>>   if (add_to_page_cache_lru(page, &inode->i_data, page->index,
>> @@ -377,6 +408,14 @@ static int ceph_readpages(struct file *file,
>> struct address_space *mapping,
>>   int rc = 0;
>>   int max = 0;
>>
>> +#ifdef CONFIG_CEPH_FSCACHE
>> + rc = ceph_readpages_from_fscache(mapping->host, mapping, page_list,
>> + &nr_pages);
>> +
>> + if (rc == 0)
>> + goto out;
>> +#endif
>> +
>>   if (fsc->mount_options->rsize >= PAGE_CACHE_SIZE)
>>   max = (fsc->mount_options->rsize + PAGE_CACHE_SIZE - 1)
>>   >> PAGE_SHIFT;
>> @@ -490,6 +529,10 @@ static int writepage_nounlock(struct page *page,
>> struct writeback_control *wbc)
>>      CONGESTION_ON_THRESH(fsc->mount_options->congestion_kb))
>>   set_bdi_congested(&fsc->backing_dev_info, BLK_RW_ASYNC);
>>
>> +#ifdef CONFIG_CEPH_FSCACHE
>> + ceph_readpage_to_fscache(inode, page);
>> +#endif
>> +
>>   set_page_writeback(page);
>>   err = ceph_osdc_writepages(osdc, ceph_vino(inode),
>>     &ci->i_layout, snapc,
>> diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
>> index da0f9b8..7e8d8d3 100644
>> --- a/fs/ceph/caps.c
>> +++ b/fs/ceph/caps.c
>> @@ -10,6 +10,7 @@
>>
>>  #include "super.h"
>>  #include "mds_client.h"
>> +#include "cache.h"
>>  #include <linux/ceph/decode.h>
>>  #include <linux/ceph/messenger.h>
>>
>> @@ -486,8 +487,14 @@ static void __check_cap_issue(struct
>> ceph_inode_info *ci, struct ceph_cap *cap,
>>   * i_rdcache_gen.
>>   */
>>   if ((issued & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) &&
>> -    (had & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) == 0)
>> +    (had & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) == 0) {
>>   ci->i_rdcache_gen++;
>> +#ifdef CONFIG_CEPH_FSCACHE
>> + /* Invalidate the cache for the whole file. */
>> + dout("Invalidating inode data cache: %p", &ci->vfs_inode);
>> + fscache_invalidate(ci->fscache);
>> +#endif
>> + }
>>
>>   /*
>>   * if we are newly issued FILE_SHARED, mark dir not complete; we
>> @@ -2356,6 +2363,12 @@ static void handle_cap_grant(struct inode
>> *inode, struct ceph_mds_caps *grant,
>>   if (((cap->issued & ~newcaps) & CEPH_CAP_FILE_CACHE) &&
>>      (newcaps & CEPH_CAP_FILE_LAZYIO) == 0 &&
>>      !ci->i_wrbuffer_ref) {
>> +
>> +#ifdef CONFIG_CEPH_FSCACHE
>> + /* Close the fscache on inode */
>> + ceph_fscache_unregister_inode_cookie(ci);
>> +#endif
>> +
>>   if (try_nonblocking_invalidate(inode) == 0) {
>>   revoked_rdcache = 1;
>>   } else {
>> @@ -2425,6 +2438,12 @@ static void handle_cap_grant(struct inode
>> *inode, struct ceph_mds_caps *grant,
>>   wake = 1;
>>   }
>>
>> +#ifdef CONFIG_CEPH_FSCACHE
>> + /* Register cache (if needed); perform this after amny size change. */
>> + if ((issued & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)))
>> + ceph_fscache_register_inode_cookie(session->s_mdsc->fsc, ci);
>> +#endif
>> +
>>   /* check cap bits */
>>   wanted = __ceph_caps_wanted(ci);
>>   used = __ceph_caps_used(ci);
>> diff --git a/fs/ceph/file.c b/fs/ceph/file.c
>> index 656e169..e7ecc04 100644
>> --- a/fs/ceph/file.c
>> +++ b/fs/ceph/file.c
>> @@ -11,6 +11,7 @@
>>
>>  #include "super.h"
>>  #include "mds_client.h"
>> +#include "cache.h"
>>
>>  /*
>>   * Ceph file operations
>> @@ -67,10 +68,17 @@ out:
>>  static int ceph_init_file(struct inode *inode, struct file *file, int fmode)
>>  {
>>   struct ceph_file_info *cf;
>> + struct ceph_inode_info *ci = ceph_inode(inode);
>> + struct ceph_fs_client *fsc = ceph_sb_to_client(inode->i_sb);
>>   int ret = 0;
>>
>>   switch (inode->i_mode & S_IFMT) {
>>   case S_IFREG:
>> +#ifdef CONFIG_CEPH_FSCACHE
>> + spin_lock(&ci->i_ceph_lock);
>> + ceph_fscache_register_inode_cookie(fsc, ci);
>> + spin_lock(&ci->i_ceph_lock);
>> +#endif
>>   case S_IFDIR:
>>   dout("init_file %p %p 0%o (regular)\n", inode, file,
>>       inode->i_mode);
>> @@ -181,6 +189,7 @@ int ceph_open(struct inode *inode, struct file *file)
>>   spin_unlock(&ci->i_ceph_lock);
>>   return ceph_init_file(inode, file, fmode);
>>   }
>> +
>>   spin_unlock(&ci->i_ceph_lock);
>>
>>   dout("open fmode %d wants %s\n", fmode, ceph_cap_string(wanted));
>> diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
>> index be0f7e2..620b84c 100644
>> --- a/fs/ceph/inode.c
>> +++ b/fs/ceph/inode.c
>> @@ -12,6 +12,7 @@
>>
>>  #include "super.h"
>>  #include "mds_client.h"
>> +#include "cache.h"
>>  #include <linux/ceph/decode.h>
>>
>>  /*
>> @@ -377,6 +378,10 @@ struct inode *ceph_alloc_inode(struct super_block *sb)
>>
>>   INIT_WORK(&ci->i_vmtruncate_work, ceph_vmtruncate_work);
>>
>> +#ifdef CONFIG_CEPH_FSCACHE
>> + ci->fscache = NULL;
>> +#endif
>> +
>>   return &ci->vfs_inode;
>>  }
>>
>> @@ -396,6 +401,10 @@ void ceph_destroy_inode(struct inode *inode)
>>
>>   dout("destroy_inode %p ino %llx.%llx\n", inode, ceph_vinop(inode));
>>
>> +#ifdef CONFIG_CEPH_FSCACHE
>> + ceph_fscache_unregister_inode_cookie(ci);
>> +#endif
>> +
>>   ceph_queue_caps_release(inode);
>>
>>   /*
>> @@ -430,7 +439,6 @@ void ceph_destroy_inode(struct inode *inode)
>>   call_rcu(&inode->i_rcu, ceph_i_callback);
>>  }
>>
>> -
>>  /*
>>   * Helpers to fill in size, ctime, mtime, and atime.  We have to be
>>   * careful because either the client or MDS may have more up to date
>> @@ -633,6 +641,14 @@ static int fill_inode(struct inode *inode,
>>      le32_to_cpu(info->time_warp_seq),
>>      &ctime, &mtime, &atime);
>>
>> +#ifdef CONFIG_CEPH_FSCACHE
>> + /* Notify the cache that size has changed */
>> + if (queue_trunc && ci->fscache) {
>> + pr_info("size changed inode: %p cap flags\n", &ci->vfs_inode);
>> + fscache_attr_changed(ci->fscache);
>> + }
>> +#endif
>> +
>>   /* only update max_size on auth cap */
>>   if ((info->cap.flags & CEPH_CAP_FLAG_AUTH) &&
>>      ci->i_max_size != le64_to_cpu(info->max_size)) {
>> @@ -1066,7 +1082,7 @@ int ceph_fill_trace(struct super_block *sb,
>> struct ceph_mds_request *req,
>>   * complete.
>>   */
>>   ceph_set_dentry_offset(req->r_old_dentry);
>> - dout("dn %p gets new offset %lld\n", req->r_old_dentry,
>> + dout("dn %p gets new offset %lld\n", req->r_old_dentry,
>>       ceph_dentry(req->r_old_dentry)->offset);
>>
>>   dn = req->r_old_dentry;  /* use old_dentry */
>> @@ -1430,6 +1446,11 @@ static void ceph_invalidate_work(struct
>> work_struct *work)
>>   orig_gen = ci->i_rdcache_gen;
>>   spin_unlock(&ci->i_ceph_lock);
>>
>> +#ifdef CONFIG_CEPH_FSCACHE
>> + pr_info("cache invalidating inode: %p cap flags\n", &ci->vfs_inode);
>> + fscache_invalidate(ci->fscache);
>> +#endif
>> +
>>   truncate_inode_pages(&inode->i_data, 0);
>>
>>   spin_lock(&ci->i_ceph_lock);
>> diff --git a/fs/ceph/super.c b/fs/ceph/super.c
>> index 7d377c9..7847ef7 100644
>> --- a/fs/ceph/super.c
>> +++ b/fs/ceph/super.c
>> @@ -17,6 +17,7 @@
>>
>>  #include "super.h"
>>  #include "mds_client.h"
>> +#include "cache.h"
>>
>>  #include <linux/ceph/ceph_features.h>
>>  #include <linux/ceph/decode.h>
>> @@ -530,6 +531,11 @@ static struct ceph_fs_client
>> *create_fs_client(struct ceph_mount_options *fsopt,
>>   if (!fsc->wb_pagevec_pool)
>>   goto fail_trunc_wq;
>>
>> +#ifdef CONFIG_CEPH_FSCACHE
>> + /* fscache */
>> + ceph_fscache_register_fsid_cookie(fsc);
>> +#endif
>> +
>>   /* caps */
>>   fsc->min_caps = fsopt->max_readdir;
>>
>> @@ -554,6 +560,10 @@ static void destroy_fs_client(struct ceph_fs_client *fsc)
>>  {
>>   dout("destroy_fs_client %p\n", fsc);
>>
>> +#ifdef CONFIG_CEPH_FSCACHE
>> + ceph_fscache_unregister_fsid_cookie(fsc);
>> +#endif
>> +
>>   destroy_workqueue(fsc->wb_wq);
>>   destroy_workqueue(fsc->pg_inv_wq);
>>   destroy_workqueue(fsc->trunc_wq);
>> @@ -588,6 +598,8 @@ static void ceph_inode_init_once(void *foo)
>>
>>  static int __init init_caches(void)
>>  {
>> + int error = -ENOMEM;
>> +
>>   ceph_inode_cachep = kmem_cache_create("ceph_inode_info",
>>        sizeof(struct ceph_inode_info),
>>        __alignof__(struct ceph_inode_info),
>> @@ -611,15 +623,19 @@ static int __init init_caches(void)
>>   if (ceph_file_cachep == NULL)
>>   goto bad_file;
>>
>> - return 0;
>> +#ifdef CONFIG_CEPH_FSCACHE
>> + if ((error = fscache_register_netfs(&ceph_cache_netfs)))
>> + goto bad_file;
>> +#endif
>>
>> + return 0;
>>  bad_file:
>>   kmem_cache_destroy(ceph_dentry_cachep);
>>  bad_dentry:
>>   kmem_cache_destroy(ceph_cap_cachep);
>>  bad_cap:
>>   kmem_cache_destroy(ceph_inode_cachep);
>> - return -ENOMEM;
>> + return error;
>>  }
>>
>>  static void destroy_caches(void)
>> @@ -629,10 +645,15 @@ static void destroy_caches(void)
>>   * destroy cache.
>>   */
>>   rcu_barrier();
>> +
>>   kmem_cache_destroy(ceph_inode_cachep);
>>   kmem_cache_destroy(ceph_cap_cachep);
>>   kmem_cache_destroy(ceph_dentry_cachep);
>>   kmem_cache_destroy(ceph_file_cachep);
>> +
>> +#ifdef CONFIG_CEPH_FSCACHE
>> + fscache_unregister_netfs(&ceph_cache_netfs);
>> +#endif
>>  }
>>
>>
>> diff --git a/fs/ceph/super.h b/fs/ceph/super.h
>> index 8696be2..2980337 100644
>> --- a/fs/ceph/super.h
>> +++ b/fs/ceph/super.h
>> @@ -16,6 +16,10 @@
>>
>>  #include <linux/ceph/libceph.h>
>>
>> +#ifdef CONFIG_CEPH_FSCACHE
>> +#include <linux/fscache.h>
>> +#endif
>> +
>>  /* f_type in struct statfs */
>>  #define CEPH_SUPER_MAGIC 0x00c36400
>>
>> @@ -90,6 +94,10 @@ struct ceph_fs_client {
>>   struct dentry *debugfs_bdi;
>>   struct dentry *debugfs_mdsc, *debugfs_mdsmap;
>>  #endif
>> +
>> +#ifdef CONFIG_CEPH_FSCACHE
>> + struct fscache_cookie *fscache;
>> +#endif
>>  };
>>
>>
>> @@ -319,6 +327,10 @@ struct ceph_inode_info {
>>
>>   struct work_struct i_vmtruncate_work;
>>
>> +#ifdef CONFIG_CEPH_FSCACHE
>> + struct fscache_cookie *fscache;
>> +#endif
>> +
>>   struct inode vfs_inode; /* at end */
>>  };
>>
>> --
>> 1.7.9.5
>> --
>> To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
>> the body of a message to majordomo@vger.kernel.org
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>>
>>

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 2/2] Enable fscache as an optional feature of ceph.
  2013-05-29 18:06   ` Milosz Tanski
@ 2013-06-05 16:26     ` Milosz Tanski
  2013-06-17 13:16     ` Elso Andras
  1 sibling, 0 replies; 13+ messages in thread
From: Milosz Tanski @ 2013-06-05 16:26 UTC (permalink / raw)
  To: Sage Weil; +Cc: ceph-devel, linux-cachefs

Sage & co,

I have another update on my progress and testing.

I've put a couple more fixes in the git branch as we continue to use
and test this. They are small in size but are big fixes interms of
observed issues. I plan on rebasing of them.

Additionally, I've been following David's fscache patches trek into
mainline. They aren't there yet, but they are in linux-next, and since
they important fixes I'm hopefully about them getting into 3.11.

Finally, I've been doing some performance testing to make sure I've
gotten all the due diligence. Here's some takeaways:

- The performance improvement (regression) is proportional to speed of
the local fscache device. It's best used on fast RAID or SSD drives.
- There's a big drop in network trafic to the OSDs. This frees up
bandwidth for other clients. This becomes more noticeable if you have
multiple OSDs per machine, or a busy network. I imagine this might be
less of an issue with a 10gigE network.

Here's some quick and dirty perf number from my test cluster. The test
custer has two OSD and a single client with fscache on a raid0
partition (two drives). In my test I'm cating all the files in the
linux source tree to /dev/null.

Fscache disabled, just clones repo, page cache dropped:

mtanski@build-box-1eeab875:/mnt.ceph/mtanski/linux-fs$ time find -type
f | xargs cat | pv > /dev/null
1.15GB 0:01:07 [17.6MB/s]
real    1m7.269s
user    0m0.604s
sys     0m6.568s

Fsacache enabled, just cloned repo, page cache dropped:

mtanski@build-box-1eeab875:/mnt.ceph/mtanski/linux-fs$ time find -type
f | xargs cat | pv > /dev/null
1.15GB 0:00:35 [33.3MB/s]
real    0m35.553s
user    0m0.720s
sys     0m8.484s

As a side note, about 8 seconds of both runs is eaten up by the find
and the MDS overhead for this fairly large tree.

Hope this is helpful,
- Milosz

On Wed, May 29, 2013 at 2:06 PM, Milosz Tanski <milosz@adfin.com> wrote:
> Sage,
>
> Thanks for taking a look at this. No worries about the timing.
>
> I added two extra changes into my branch located here:
> https://bitbucket.org/adfin/linux-fs/commits/branch/forceph. The first
> one is a fix for kernel deadlock. The second one makes fsc cache a
> non-default mount option (akin to NFS).
>
> Finally, I observed an occasional oops in the fscache that's fixed in
> David's branch that's waiting to get into mainline. The fix for the
> issue is here: http://git.kernel.org/cgit/linux/kernel/git/dhowells/linux-fs.git/commit/?h=fscache&id=82958c45e35963c93fc6cbe6a27752e2d97e9f9a.
> I can only cause that issue by forcing the kernel to drop it's caches
> in some cases.
>
> Let me know if you any other feedback, or if I can help in anyway.
>
> Thanks,
> - Milosz
>
> On Tue, May 28, 2013 at 1:11 PM, Sage Weil <sage@inktank.com> wrote:
>> Hi Milosz,
>>
>> Just a heads up that I hope to take a closer look at the patch this
>> afternoon or tomorrow.  Just catching up after the long weekend.
>>
>> Thanks!
>> sage
>>
>>
>> On Thu, 23 May 2013, Milosz Tanski wrote:
>>
>>> Enable fscache as an optional feature of ceph.
>>>
>>> Adding support for fscache to the Ceph filesystem. This would bring it to on
>>> par with some of the other network filesystems in Linux (like NFS, AFS, etc...)
>>>
>>> This exploits the existing Ceph cache & lazyio capabilities.
>>>
>>> Signed-off-by: Milosz Tanski <milosz@adfin.com>
>>> ---
>>>  fs/ceph/Kconfig  |    9 ++++++
>>>  fs/ceph/Makefile |    2 ++
>>>  fs/ceph/addr.c   |   85 ++++++++++++++++++++++++++++++++++++++++--------------
>>>  fs/ceph/caps.c   |   21 +++++++++++++-
>>>  fs/ceph/file.c   |    9 ++++++
>>>  fs/ceph/inode.c  |   25 ++++++++++++++--
>>>  fs/ceph/super.c  |   25 ++++++++++++++--
>>>  fs/ceph/super.h  |   12 ++++++++
>>>  8 files changed, 162 insertions(+), 26 deletions(-)
>>>
>>> diff --git a/fs/ceph/Kconfig b/fs/ceph/Kconfig
>>> index 49bc782..ac9a2ef 100644
>>> --- a/fs/ceph/Kconfig
>>> +++ b/fs/ceph/Kconfig
>>> @@ -16,3 +16,12 @@ config CEPH_FS
>>>
>>>    If unsure, say N.
>>>
>>> +if CEPH_FS
>>> +config CEPH_FSCACHE
>>> + bool "Enable Ceph client caching support"
>>> + depends on CEPH_FS=m && FSCACHE || CEPH_FS=y && FSCACHE=y
>>> + help
>>> +  Choose Y here to enable persistent, read-only local
>>> +  caching support for Ceph clients using FS-Cache
>>> +
>>> +endif
>>> diff --git a/fs/ceph/Makefile b/fs/ceph/Makefile
>>> index bd35212..0af0678 100644
>>> --- a/fs/ceph/Makefile
>>> +++ b/fs/ceph/Makefile
>>> @@ -9,3 +9,5 @@ ceph-y := super.o inode.o dir.o file.o locks.o addr.o ioctl.o \
>>>   mds_client.o mdsmap.o strings.o ceph_frag.o \
>>>   debugfs.o
>>>
>>> +ceph-$(CONFIG_CEPH_FSCACHE) += cache.o
>>> +
>>> diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
>>> index 3e68ac1..fd3a1cc 100644
>>> --- a/fs/ceph/addr.c
>>> +++ b/fs/ceph/addr.c
>>> @@ -11,6 +11,7 @@
>>>
>>>  #include "super.h"
>>>  #include "mds_client.h"
>>> +#include "cache.h"
>>>  #include <linux/ceph/osd_client.h>
>>>
>>>  /*
>>> @@ -149,11 +150,26 @@ static void ceph_invalidatepage(struct page
>>> *page, unsigned long offset)
>>>   struct ceph_inode_info *ci;
>>>   struct ceph_snap_context *snapc = page_snap_context(page);
>>>
>>> - BUG_ON(!PageLocked(page));
>>> - BUG_ON(!PagePrivate(page));
>>>   BUG_ON(!page->mapping);
>>>
>>>   inode = page->mapping->host;
>>> + ci = ceph_inode(inode);
>>> +
>>> + if (offset != 0) {
>>> + dout("%p invalidatepage %p idx %lu partial dirty page\n",
>>> +     inode, page, page->index);
>>> + return;
>>> + }
>>> +
>>> +#ifdef CONFIG_CEPH_FSCACHE
>>> + if (PageFsCache(page))
>>> + ceph_invalidate_fscache_page(inode, page);
>>> +#endif
>>> +
>>> + if (!PagePrivate(page))
>>> + return;
>>> +
>>> + BUG_ON(!PageLocked(page));
>>>
>>>   /*
>>>   * We can get non-dirty pages here due to races between
>>> @@ -163,31 +179,32 @@ static void ceph_invalidatepage(struct page
>>> *page, unsigned long offset)
>>>   if (!PageDirty(page))
>>>   pr_err("%p invalidatepage %p page not dirty\n", inode, page);
>>>
>>> - if (offset == 0)
>>> - ClearPageChecked(page);
>>> + ClearPageChecked(page);
>>>
>>> - ci = ceph_inode(inode);
>>> - if (offset == 0) {
>>> - dout("%p invalidatepage %p idx %lu full dirty page %lu\n",
>>> -     inode, page, page->index, offset);
>>> - ceph_put_wrbuffer_cap_refs(ci, 1, snapc);
>>> - ceph_put_snap_context(snapc);
>>> - page->private = 0;
>>> - ClearPagePrivate(page);
>>> - } else {
>>> - dout("%p invalidatepage %p idx %lu partial dirty page\n",
>>> -     inode, page, page->index);
>>> - }
>>> + dout("%p invalidatepage %p idx %lu full dirty page %lu\n",
>>> +     inode, page, page->index, offset);
>>> +
>>> + ceph_put_wrbuffer_cap_refs(ci, 1, snapc);
>>> + ceph_put_snap_context(snapc);
>>> + page->private = 0;
>>> + ClearPagePrivate(page);
>>>  }
>>>
>>> -/* just a sanity check */
>>>  static int ceph_releasepage(struct page *page, gfp_t g)
>>>  {
>>>   struct inode *inode = page->mapping ? page->mapping->host : NULL;
>>>   dout("%p releasepage %p idx %lu\n", inode, page, page->index);
>>>   WARN_ON(PageDirty(page));
>>> - WARN_ON(PagePrivate(page));
>>> - return 0;
>>> +
>>> +#ifdef CONFIG_CEPH_FSCACHE
>>> + /* Can we release the page from the cache? */
>>> + if (PageFsCache(page) && ceph_release_fscache_page(page, g) == 0)
>>> + return 0;
>>> +#endif
>>> + if (PagePrivate(page))
>>> + return 0;
>>> +
>>> + return 1;
>>>  }
>>>
>>>  /*
>>> @@ -197,11 +214,18 @@ static int readpage_nounlock(struct file *filp,
>>> struct page *page)
>>>  {
>>>   struct inode *inode = file_inode(filp);
>>>   struct ceph_inode_info *ci = ceph_inode(inode);
>>> - struct ceph_osd_client *osdc =
>>> + struct ceph_osd_client *osdc =
>>>   &ceph_inode_to_client(inode)->client->osdc;
>>>   int err = 0;
>>>   u64 len = PAGE_CACHE_SIZE;
>>>
>>> +#ifdef CONFIG_CEPH_FSCACHE
>>> + err = ceph_readpage_from_fscache(inode, page);
>>> +
>>> + if (err == 0)
>>> + goto out;
>>> +#endif
>>> +
>>>   dout("readpage inode %p file %p page %p index %lu\n",
>>>       inode, filp, page, page->index);
>>>   err = ceph_osdc_readpages(osdc, ceph_vino(inode), &ci->i_layout,
>>> @@ -219,6 +243,10 @@ static int readpage_nounlock(struct file *filp,
>>> struct page *page)
>>>   }
>>>   SetPageUptodate(page);
>>>
>>> +#ifdef CONFIG_CEPH_FSCACHE
>>> + ceph_readpage_to_fscache(inode, page);
>>> +#endif
>>> +
>>>  out:
>>>   return err < 0 ? err : 0;
>>>  }
>>> @@ -262,6 +290,9 @@ static void finish_read(struct ceph_osd_request
>>> *req, struct ceph_msg *msg)
>>>   flush_dcache_page(page);
>>>   SetPageUptodate(page);
>>>   unlock_page(page);
>>> +#ifdef CONFIG_CEPH_FSCACHE
>>> + ceph_readpage_to_fscache(inode, page);
>>> +#endif
>>>   page_cache_release(page);
>>>   bytes -= PAGE_CACHE_SIZE;
>>>   }
>>> @@ -330,7 +361,7 @@ static int start_read(struct inode *inode, struct
>>> list_head *page_list, int max)
>>>   page = list_entry(page_list->prev, struct page, lru);
>>>   BUG_ON(PageLocked(page));
>>>   list_del(&page->lru);
>>> -
>>> +
>>>   dout("start_read %p adding %p idx %lu\n", inode, page,
>>>       page->index);
>>>   if (add_to_page_cache_lru(page, &inode->i_data, page->index,
>>> @@ -377,6 +408,14 @@ static int ceph_readpages(struct file *file,
>>> struct address_space *mapping,
>>>   int rc = 0;
>>>   int max = 0;
>>>
>>> +#ifdef CONFIG_CEPH_FSCACHE
>>> + rc = ceph_readpages_from_fscache(mapping->host, mapping, page_list,
>>> + &nr_pages);
>>> +
>>> + if (rc == 0)
>>> + goto out;
>>> +#endif
>>> +
>>>   if (fsc->mount_options->rsize >= PAGE_CACHE_SIZE)
>>>   max = (fsc->mount_options->rsize + PAGE_CACHE_SIZE - 1)
>>>   >> PAGE_SHIFT;
>>> @@ -490,6 +529,10 @@ static int writepage_nounlock(struct page *page,
>>> struct writeback_control *wbc)
>>>      CONGESTION_ON_THRESH(fsc->mount_options->congestion_kb))
>>>   set_bdi_congested(&fsc->backing_dev_info, BLK_RW_ASYNC);
>>>
>>> +#ifdef CONFIG_CEPH_FSCACHE
>>> + ceph_readpage_to_fscache(inode, page);
>>> +#endif
>>> +
>>>   set_page_writeback(page);
>>>   err = ceph_osdc_writepages(osdc, ceph_vino(inode),
>>>     &ci->i_layout, snapc,
>>> diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
>>> index da0f9b8..7e8d8d3 100644
>>> --- a/fs/ceph/caps.c
>>> +++ b/fs/ceph/caps.c
>>> @@ -10,6 +10,7 @@
>>>
>>>  #include "super.h"
>>>  #include "mds_client.h"
>>> +#include "cache.h"
>>>  #include <linux/ceph/decode.h>
>>>  #include <linux/ceph/messenger.h>
>>>
>>> @@ -486,8 +487,14 @@ static void __check_cap_issue(struct
>>> ceph_inode_info *ci, struct ceph_cap *cap,
>>>   * i_rdcache_gen.
>>>   */
>>>   if ((issued & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) &&
>>> -    (had & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) == 0)
>>> +    (had & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) == 0) {
>>>   ci->i_rdcache_gen++;
>>> +#ifdef CONFIG_CEPH_FSCACHE
>>> + /* Invalidate the cache for the whole file. */
>>> + dout("Invalidating inode data cache: %p", &ci->vfs_inode);
>>> + fscache_invalidate(ci->fscache);
>>> +#endif
>>> + }
>>>
>>>   /*
>>>   * if we are newly issued FILE_SHARED, mark dir not complete; we
>>> @@ -2356,6 +2363,12 @@ static void handle_cap_grant(struct inode
>>> *inode, struct ceph_mds_caps *grant,
>>>   if (((cap->issued & ~newcaps) & CEPH_CAP_FILE_CACHE) &&
>>>      (newcaps & CEPH_CAP_FILE_LAZYIO) == 0 &&
>>>      !ci->i_wrbuffer_ref) {
>>> +
>>> +#ifdef CONFIG_CEPH_FSCACHE
>>> + /* Close the fscache on inode */
>>> + ceph_fscache_unregister_inode_cookie(ci);
>>> +#endif
>>> +
>>>   if (try_nonblocking_invalidate(inode) == 0) {
>>>   revoked_rdcache = 1;
>>>   } else {
>>> @@ -2425,6 +2438,12 @@ static void handle_cap_grant(struct inode
>>> *inode, struct ceph_mds_caps *grant,
>>>   wake = 1;
>>>   }
>>>
>>> +#ifdef CONFIG_CEPH_FSCACHE
>>> + /* Register cache (if needed); perform this after amny size change. */
>>> + if ((issued & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)))
>>> + ceph_fscache_register_inode_cookie(session->s_mdsc->fsc, ci);
>>> +#endif
>>> +
>>>   /* check cap bits */
>>>   wanted = __ceph_caps_wanted(ci);
>>>   used = __ceph_caps_used(ci);
>>> diff --git a/fs/ceph/file.c b/fs/ceph/file.c
>>> index 656e169..e7ecc04 100644
>>> --- a/fs/ceph/file.c
>>> +++ b/fs/ceph/file.c
>>> @@ -11,6 +11,7 @@
>>>
>>>  #include "super.h"
>>>  #include "mds_client.h"
>>> +#include "cache.h"
>>>
>>>  /*
>>>   * Ceph file operations
>>> @@ -67,10 +68,17 @@ out:
>>>  static int ceph_init_file(struct inode *inode, struct file *file, int fmode)
>>>  {
>>>   struct ceph_file_info *cf;
>>> + struct ceph_inode_info *ci = ceph_inode(inode);
>>> + struct ceph_fs_client *fsc = ceph_sb_to_client(inode->i_sb);
>>>   int ret = 0;
>>>
>>>   switch (inode->i_mode & S_IFMT) {
>>>   case S_IFREG:
>>> +#ifdef CONFIG_CEPH_FSCACHE
>>> + spin_lock(&ci->i_ceph_lock);
>>> + ceph_fscache_register_inode_cookie(fsc, ci);
>>> + spin_lock(&ci->i_ceph_lock);
>>> +#endif
>>>   case S_IFDIR:
>>>   dout("init_file %p %p 0%o (regular)\n", inode, file,
>>>       inode->i_mode);
>>> @@ -181,6 +189,7 @@ int ceph_open(struct inode *inode, struct file *file)
>>>   spin_unlock(&ci->i_ceph_lock);
>>>   return ceph_init_file(inode, file, fmode);
>>>   }
>>> +
>>>   spin_unlock(&ci->i_ceph_lock);
>>>
>>>   dout("open fmode %d wants %s\n", fmode, ceph_cap_string(wanted));
>>> diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
>>> index be0f7e2..620b84c 100644
>>> --- a/fs/ceph/inode.c
>>> +++ b/fs/ceph/inode.c
>>> @@ -12,6 +12,7 @@
>>>
>>>  #include "super.h"
>>>  #include "mds_client.h"
>>> +#include "cache.h"
>>>  #include <linux/ceph/decode.h>
>>>
>>>  /*
>>> @@ -377,6 +378,10 @@ struct inode *ceph_alloc_inode(struct super_block *sb)
>>>
>>>   INIT_WORK(&ci->i_vmtruncate_work, ceph_vmtruncate_work);
>>>
>>> +#ifdef CONFIG_CEPH_FSCACHE
>>> + ci->fscache = NULL;
>>> +#endif
>>> +
>>>   return &ci->vfs_inode;
>>>  }
>>>
>>> @@ -396,6 +401,10 @@ void ceph_destroy_inode(struct inode *inode)
>>>
>>>   dout("destroy_inode %p ino %llx.%llx\n", inode, ceph_vinop(inode));
>>>
>>> +#ifdef CONFIG_CEPH_FSCACHE
>>> + ceph_fscache_unregister_inode_cookie(ci);
>>> +#endif
>>> +
>>>   ceph_queue_caps_release(inode);
>>>
>>>   /*
>>> @@ -430,7 +439,6 @@ void ceph_destroy_inode(struct inode *inode)
>>>   call_rcu(&inode->i_rcu, ceph_i_callback);
>>>  }
>>>
>>> -
>>>  /*
>>>   * Helpers to fill in size, ctime, mtime, and atime.  We have to be
>>>   * careful because either the client or MDS may have more up to date
>>> @@ -633,6 +641,14 @@ static int fill_inode(struct inode *inode,
>>>      le32_to_cpu(info->time_warp_seq),
>>>      &ctime, &mtime, &atime);
>>>
>>> +#ifdef CONFIG_CEPH_FSCACHE
>>> + /* Notify the cache that size has changed */
>>> + if (queue_trunc && ci->fscache) {
>>> + pr_info("size changed inode: %p cap flags\n", &ci->vfs_inode);
>>> + fscache_attr_changed(ci->fscache);
>>> + }
>>> +#endif
>>> +
>>>   /* only update max_size on auth cap */
>>>   if ((info->cap.flags & CEPH_CAP_FLAG_AUTH) &&
>>>      ci->i_max_size != le64_to_cpu(info->max_size)) {
>>> @@ -1066,7 +1082,7 @@ int ceph_fill_trace(struct super_block *sb,
>>> struct ceph_mds_request *req,
>>>   * complete.
>>>   */
>>>   ceph_set_dentry_offset(req->r_old_dentry);
>>> - dout("dn %p gets new offset %lld\n", req->r_old_dentry,
>>> + dout("dn %p gets new offset %lld\n", req->r_old_dentry,
>>>       ceph_dentry(req->r_old_dentry)->offset);
>>>
>>>   dn = req->r_old_dentry;  /* use old_dentry */
>>> @@ -1430,6 +1446,11 @@ static void ceph_invalidate_work(struct
>>> work_struct *work)
>>>   orig_gen = ci->i_rdcache_gen;
>>>   spin_unlock(&ci->i_ceph_lock);
>>>
>>> +#ifdef CONFIG_CEPH_FSCACHE
>>> + pr_info("cache invalidating inode: %p cap flags\n", &ci->vfs_inode);
>>> + fscache_invalidate(ci->fscache);
>>> +#endif
>>> +
>>>   truncate_inode_pages(&inode->i_data, 0);
>>>
>>>   spin_lock(&ci->i_ceph_lock);
>>> diff --git a/fs/ceph/super.c b/fs/ceph/super.c
>>> index 7d377c9..7847ef7 100644
>>> --- a/fs/ceph/super.c
>>> +++ b/fs/ceph/super.c
>>> @@ -17,6 +17,7 @@
>>>
>>>  #include "super.h"
>>>  #include "mds_client.h"
>>> +#include "cache.h"
>>>
>>>  #include <linux/ceph/ceph_features.h>
>>>  #include <linux/ceph/decode.h>
>>> @@ -530,6 +531,11 @@ static struct ceph_fs_client
>>> *create_fs_client(struct ceph_mount_options *fsopt,
>>>   if (!fsc->wb_pagevec_pool)
>>>   goto fail_trunc_wq;
>>>
>>> +#ifdef CONFIG_CEPH_FSCACHE
>>> + /* fscache */
>>> + ceph_fscache_register_fsid_cookie(fsc);
>>> +#endif
>>> +
>>>   /* caps */
>>>   fsc->min_caps = fsopt->max_readdir;
>>>
>>> @@ -554,6 +560,10 @@ static void destroy_fs_client(struct ceph_fs_client *fsc)
>>>  {
>>>   dout("destroy_fs_client %p\n", fsc);
>>>
>>> +#ifdef CONFIG_CEPH_FSCACHE
>>> + ceph_fscache_unregister_fsid_cookie(fsc);
>>> +#endif
>>> +
>>>   destroy_workqueue(fsc->wb_wq);
>>>   destroy_workqueue(fsc->pg_inv_wq);
>>>   destroy_workqueue(fsc->trunc_wq);
>>> @@ -588,6 +598,8 @@ static void ceph_inode_init_once(void *foo)
>>>
>>>  static int __init init_caches(void)
>>>  {
>>> + int error = -ENOMEM;
>>> +
>>>   ceph_inode_cachep = kmem_cache_create("ceph_inode_info",
>>>        sizeof(struct ceph_inode_info),
>>>        __alignof__(struct ceph_inode_info),
>>> @@ -611,15 +623,19 @@ static int __init init_caches(void)
>>>   if (ceph_file_cachep == NULL)
>>>   goto bad_file;
>>>
>>> - return 0;
>>> +#ifdef CONFIG_CEPH_FSCACHE
>>> + if ((error = fscache_register_netfs(&ceph_cache_netfs)))
>>> + goto bad_file;
>>> +#endif
>>>
>>> + return 0;
>>>  bad_file:
>>>   kmem_cache_destroy(ceph_dentry_cachep);
>>>  bad_dentry:
>>>   kmem_cache_destroy(ceph_cap_cachep);
>>>  bad_cap:
>>>   kmem_cache_destroy(ceph_inode_cachep);
>>> - return -ENOMEM;
>>> + return error;
>>>  }
>>>
>>>  static void destroy_caches(void)
>>> @@ -629,10 +645,15 @@ static void destroy_caches(void)
>>>   * destroy cache.
>>>   */
>>>   rcu_barrier();
>>> +
>>>   kmem_cache_destroy(ceph_inode_cachep);
>>>   kmem_cache_destroy(ceph_cap_cachep);
>>>   kmem_cache_destroy(ceph_dentry_cachep);
>>>   kmem_cache_destroy(ceph_file_cachep);
>>> +
>>> +#ifdef CONFIG_CEPH_FSCACHE
>>> + fscache_unregister_netfs(&ceph_cache_netfs);
>>> +#endif
>>>  }
>>>
>>>
>>> diff --git a/fs/ceph/super.h b/fs/ceph/super.h
>>> index 8696be2..2980337 100644
>>> --- a/fs/ceph/super.h
>>> +++ b/fs/ceph/super.h
>>> @@ -16,6 +16,10 @@
>>>
>>>  #include <linux/ceph/libceph.h>
>>>
>>> +#ifdef CONFIG_CEPH_FSCACHE
>>> +#include <linux/fscache.h>
>>> +#endif
>>> +
>>>  /* f_type in struct statfs */
>>>  #define CEPH_SUPER_MAGIC 0x00c36400
>>>
>>> @@ -90,6 +94,10 @@ struct ceph_fs_client {
>>>   struct dentry *debugfs_bdi;
>>>   struct dentry *debugfs_mdsc, *debugfs_mdsmap;
>>>  #endif
>>> +
>>> +#ifdef CONFIG_CEPH_FSCACHE
>>> + struct fscache_cookie *fscache;
>>> +#endif
>>>  };
>>>
>>>
>>> @@ -319,6 +327,10 @@ struct ceph_inode_info {
>>>
>>>   struct work_struct i_vmtruncate_work;
>>>
>>> +#ifdef CONFIG_CEPH_FSCACHE
>>> + struct fscache_cookie *fscache;
>>> +#endif
>>> +
>>>   struct inode vfs_inode; /* at end */
>>>  };
>>>
>>> --
>>> 1.7.9.5
>>> --
>>> To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
>>> the body of a message to majordomo@vger.kernel.org
>>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>>>
>>>

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 2/2] Enable fscache as an optional feature of ceph.
  2013-05-29 18:06   ` Milosz Tanski
  2013-06-05 16:26     ` Milosz Tanski
@ 2013-06-17 13:16     ` Elso Andras
       [not found]       ` <CANP1eJHZskoMVa3KBGMHvxEfNcAJQdDK4ou47meaBDYzPVa=xg@mail.gmail.com>
  1 sibling, 1 reply; 13+ messages in thread
From: Elso Andras @ 2013-06-17 13:16 UTC (permalink / raw)
  To: ceph-devel

Hi,

I tested your patches on a ubuntu lucid system, but ubuntu raring
kernel (3.8), but with for-linus branch from ceph-client and your
fscache. There was no probs in heavy load.
But i dont see any difference with/without fscache on our "test" case
(mp4 video streaming, ~5500 connections):
with fscache: http://imageshack.us/photo/my-images/109/xg5a.png/
without fscache: http://imageshack.us/photo/my-images/5/xak.png/

Elbandi

2013/5/29 Milosz Tanski <milosz@adfin.com>:
> Sage,
>
> Thanks for taking a look at this. No worries about the timing.
>
> I added two extra changes into my branch located here:
> https://bitbucket.org/adfin/linux-fs/commits/branch/forceph. The first
> one is a fix for kernel deadlock. The second one makes fsc cache a
> non-default mount option (akin to NFS).
>
> Finally, I observed an occasional oops in the fscache that's fixed in
> David's branch that's waiting to get into mainline. The fix for the
> issue is here: http://git.kernel.org/cgit/linux/kernel/git/dhowells/linux-fs.git/commit/?h=fscache&id=82958c45e35963c93fc6cbe6a27752e2d97e9f9a.
> I can only cause that issue by forcing the kernel to drop it's caches
> in some cases.
>
> Let me know if you any other feedback, or if I can help in anyway.
>
> Thanks,
> - Milosz
>
> On Tue, May 28, 2013 at 1:11 PM, Sage Weil <sage@inktank.com> wrote:
>> Hi Milosz,
>>
>> Just a heads up that I hope to take a closer look at the patch this
>> afternoon or tomorrow.  Just catching up after the long weekend.
>>
>> Thanks!
>> sage
>>
>>
>> On Thu, 23 May 2013, Milosz Tanski wrote:
>>
>>> Enable fscache as an optional feature of ceph.
>>>
>>> Adding support for fscache to the Ceph filesystem. This would bring it to on
>>> par with some of the other network filesystems in Linux (like NFS, AFS, etc...)
>>>
>>> This exploits the existing Ceph cache & lazyio capabilities.
>>>
>>> Signed-off-by: Milosz Tanski <milosz@adfin.com>
>>> ---
>>>  fs/ceph/Kconfig  |    9 ++++++
>>>  fs/ceph/Makefile |    2 ++
>>>  fs/ceph/addr.c   |   85 ++++++++++++++++++++++++++++++++++++++++--------------
>>>  fs/ceph/caps.c   |   21 +++++++++++++-
>>>  fs/ceph/file.c   |    9 ++++++
>>>  fs/ceph/inode.c  |   25 ++++++++++++++--
>>>  fs/ceph/super.c  |   25 ++++++++++++++--
>>>  fs/ceph/super.h  |   12 ++++++++
>>>  8 files changed, 162 insertions(+), 26 deletions(-)
>>>
>>> diff --git a/fs/ceph/Kconfig b/fs/ceph/Kconfig
>>> index 49bc782..ac9a2ef 100644
>>> --- a/fs/ceph/Kconfig
>>> +++ b/fs/ceph/Kconfig
>>> @@ -16,3 +16,12 @@ config CEPH_FS
>>>
>>>    If unsure, say N.
>>>
>>> +if CEPH_FS
>>> +config CEPH_FSCACHE
>>> + bool "Enable Ceph client caching support"
>>> + depends on CEPH_FS=m && FSCACHE || CEPH_FS=y && FSCACHE=y
>>> + help
>>> +  Choose Y here to enable persistent, read-only local
>>> +  caching support for Ceph clients using FS-Cache
>>> +
>>> +endif
>>> diff --git a/fs/ceph/Makefile b/fs/ceph/Makefile
>>> index bd35212..0af0678 100644
>>> --- a/fs/ceph/Makefile
>>> +++ b/fs/ceph/Makefile
>>> @@ -9,3 +9,5 @@ ceph-y := super.o inode.o dir.o file.o locks.o addr.o ioctl.o \
>>>   mds_client.o mdsmap.o strings.o ceph_frag.o \
>>>   debugfs.o
>>>
>>> +ceph-$(CONFIG_CEPH_FSCACHE) += cache.o
>>> +
>>> diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
>>> index 3e68ac1..fd3a1cc 100644
>>> --- a/fs/ceph/addr.c
>>> +++ b/fs/ceph/addr.c
>>> @@ -11,6 +11,7 @@
>>>
>>>  #include "super.h"
>>>  #include "mds_client.h"
>>> +#include "cache.h"
>>>  #include <linux/ceph/osd_client.h>
>>>
>>>  /*
>>> @@ -149,11 +150,26 @@ static void ceph_invalidatepage(struct page
>>> *page, unsigned long offset)
>>>   struct ceph_inode_info *ci;
>>>   struct ceph_snap_context *snapc = page_snap_context(page);
>>>
>>> - BUG_ON(!PageLocked(page));
>>> - BUG_ON(!PagePrivate(page));
>>>   BUG_ON(!page->mapping);
>>>
>>>   inode = page->mapping->host;
>>> + ci = ceph_inode(inode);
>>> +
>>> + if (offset != 0) {
>>> + dout("%p invalidatepage %p idx %lu partial dirty page\n",
>>> +     inode, page, page->index);
>>> + return;
>>> + }
>>> +
>>> +#ifdef CONFIG_CEPH_FSCACHE
>>> + if (PageFsCache(page))
>>> + ceph_invalidate_fscache_page(inode, page);
>>> +#endif
>>> +
>>> + if (!PagePrivate(page))
>>> + return;
>>> +
>>> + BUG_ON(!PageLocked(page));
>>>
>>>   /*
>>>   * We can get non-dirty pages here due to races between
>>> @@ -163,31 +179,32 @@ static void ceph_invalidatepage(struct page
>>> *page, unsigned long offset)
>>>   if (!PageDirty(page))
>>>   pr_err("%p invalidatepage %p page not dirty\n", inode, page);
>>>
>>> - if (offset == 0)
>>> - ClearPageChecked(page);
>>> + ClearPageChecked(page);
>>>
>>> - ci = ceph_inode(inode);
>>> - if (offset == 0) {
>>> - dout("%p invalidatepage %p idx %lu full dirty page %lu\n",
>>> -     inode, page, page->index, offset);
>>> - ceph_put_wrbuffer_cap_refs(ci, 1, snapc);
>>> - ceph_put_snap_context(snapc);
>>> - page->private = 0;
>>> - ClearPagePrivate(page);
>>> - } else {
>>> - dout("%p invalidatepage %p idx %lu partial dirty page\n",
>>> -     inode, page, page->index);
>>> - }
>>> + dout("%p invalidatepage %p idx %lu full dirty page %lu\n",
>>> +     inode, page, page->index, offset);
>>> +
>>> + ceph_put_wrbuffer_cap_refs(ci, 1, snapc);
>>> + ceph_put_snap_context(snapc);
>>> + page->private = 0;
>>> + ClearPagePrivate(page);
>>>  }
>>>
>>> -/* just a sanity check */
>>>  static int ceph_releasepage(struct page *page, gfp_t g)
>>>  {
>>>   struct inode *inode = page->mapping ? page->mapping->host : NULL;
>>>   dout("%p releasepage %p idx %lu\n", inode, page, page->index);
>>>   WARN_ON(PageDirty(page));
>>> - WARN_ON(PagePrivate(page));
>>> - return 0;
>>> +
>>> +#ifdef CONFIG_CEPH_FSCACHE
>>> + /* Can we release the page from the cache? */
>>> + if (PageFsCache(page) && ceph_release_fscache_page(page, g) == 0)
>>> + return 0;
>>> +#endif
>>> + if (PagePrivate(page))
>>> + return 0;
>>> +
>>> + return 1;
>>>  }
>>>
>>>  /*
>>> @@ -197,11 +214,18 @@ static int readpage_nounlock(struct file *filp,
>>> struct page *page)
>>>  {
>>>   struct inode *inode = file_inode(filp);
>>>   struct ceph_inode_info *ci = ceph_inode(inode);
>>> - struct ceph_osd_client *osdc =
>>> + struct ceph_osd_client *osdc =
>>>   &ceph_inode_to_client(inode)->client->osdc;
>>>   int err = 0;
>>>   u64 len = PAGE_CACHE_SIZE;
>>>
>>> +#ifdef CONFIG_CEPH_FSCACHE
>>> + err = ceph_readpage_from_fscache(inode, page);
>>> +
>>> + if (err == 0)
>>> + goto out;
>>> +#endif
>>> +
>>>   dout("readpage inode %p file %p page %p index %lu\n",
>>>       inode, filp, page, page->index);
>>>   err = ceph_osdc_readpages(osdc, ceph_vino(inode), &ci->i_layout,
>>> @@ -219,6 +243,10 @@ static int readpage_nounlock(struct file *filp,
>>> struct page *page)
>>>   }
>>>   SetPageUptodate(page);
>>>
>>> +#ifdef CONFIG_CEPH_FSCACHE
>>> + ceph_readpage_to_fscache(inode, page);
>>> +#endif
>>> +
>>>  out:
>>>   return err < 0 ? err : 0;
>>>  }
>>> @@ -262,6 +290,9 @@ static void finish_read(struct ceph_osd_request
>>> *req, struct ceph_msg *msg)
>>>   flush_dcache_page(page);
>>>   SetPageUptodate(page);
>>>   unlock_page(page);
>>> +#ifdef CONFIG_CEPH_FSCACHE
>>> + ceph_readpage_to_fscache(inode, page);
>>> +#endif
>>>   page_cache_release(page);
>>>   bytes -= PAGE_CACHE_SIZE;
>>>   }
>>> @@ -330,7 +361,7 @@ static int start_read(struct inode *inode, struct
>>> list_head *page_list, int max)
>>>   page = list_entry(page_list->prev, struct page, lru);
>>>   BUG_ON(PageLocked(page));
>>>   list_del(&page->lru);
>>> -
>>> +
>>>   dout("start_read %p adding %p idx %lu\n", inode, page,
>>>       page->index);
>>>   if (add_to_page_cache_lru(page, &inode->i_data, page->index,
>>> @@ -377,6 +408,14 @@ static int ceph_readpages(struct file *file,
>>> struct address_space *mapping,
>>>   int rc = 0;
>>>   int max = 0;
>>>
>>> +#ifdef CONFIG_CEPH_FSCACHE
>>> + rc = ceph_readpages_from_fscache(mapping->host, mapping, page_list,
>>> + &nr_pages);
>>> +
>>> + if (rc == 0)
>>> + goto out;
>>> +#endif
>>> +
>>>   if (fsc->mount_options->rsize >= PAGE_CACHE_SIZE)
>>>   max = (fsc->mount_options->rsize + PAGE_CACHE_SIZE - 1)
>>>   >> PAGE_SHIFT;
>>> @@ -490,6 +529,10 @@ static int writepage_nounlock(struct page *page,
>>> struct writeback_control *wbc)
>>>      CONGESTION_ON_THRESH(fsc->mount_options->congestion_kb))
>>>   set_bdi_congested(&fsc->backing_dev_info, BLK_RW_ASYNC);
>>>
>>> +#ifdef CONFIG_CEPH_FSCACHE
>>> + ceph_readpage_to_fscache(inode, page);
>>> +#endif
>>> +
>>>   set_page_writeback(page);
>>>   err = ceph_osdc_writepages(osdc, ceph_vino(inode),
>>>     &ci->i_layout, snapc,
>>> diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
>>> index da0f9b8..7e8d8d3 100644
>>> --- a/fs/ceph/caps.c
>>> +++ b/fs/ceph/caps.c
>>> @@ -10,6 +10,7 @@
>>>
>>>  #include "super.h"
>>>  #include "mds_client.h"
>>> +#include "cache.h"
>>>  #include <linux/ceph/decode.h>
>>>  #include <linux/ceph/messenger.h>
>>>
>>> @@ -486,8 +487,14 @@ static void __check_cap_issue(struct
>>> ceph_inode_info *ci, struct ceph_cap *cap,
>>>   * i_rdcache_gen.
>>>   */
>>>   if ((issued & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) &&
>>> -    (had & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) == 0)
>>> +    (had & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) == 0) {
>>>   ci->i_rdcache_gen++;
>>> +#ifdef CONFIG_CEPH_FSCACHE
>>> + /* Invalidate the cache for the whole file. */
>>> + dout("Invalidating inode data cache: %p", &ci->vfs_inode);
>>> + fscache_invalidate(ci->fscache);
>>> +#endif
>>> + }
>>>
>>>   /*
>>>   * if we are newly issued FILE_SHARED, mark dir not complete; we
>>> @@ -2356,6 +2363,12 @@ static void handle_cap_grant(struct inode
>>> *inode, struct ceph_mds_caps *grant,
>>>   if (((cap->issued & ~newcaps) & CEPH_CAP_FILE_CACHE) &&
>>>      (newcaps & CEPH_CAP_FILE_LAZYIO) == 0 &&
>>>      !ci->i_wrbuffer_ref) {
>>> +
>>> +#ifdef CONFIG_CEPH_FSCACHE
>>> + /* Close the fscache on inode */
>>> + ceph_fscache_unregister_inode_cookie(ci);
>>> +#endif
>>> +
>>>   if (try_nonblocking_invalidate(inode) == 0) {
>>>   revoked_rdcache = 1;
>>>   } else {
>>> @@ -2425,6 +2438,12 @@ static void handle_cap_grant(struct inode
>>> *inode, struct ceph_mds_caps *grant,
>>>   wake = 1;
>>>   }
>>>
>>> +#ifdef CONFIG_CEPH_FSCACHE
>>> + /* Register cache (if needed); perform this after amny size change. */
>>> + if ((issued & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)))
>>> + ceph_fscache_register_inode_cookie(session->s_mdsc->fsc, ci);
>>> +#endif
>>> +
>>>   /* check cap bits */
>>>   wanted = __ceph_caps_wanted(ci);
>>>   used = __ceph_caps_used(ci);
>>> diff --git a/fs/ceph/file.c b/fs/ceph/file.c
>>> index 656e169..e7ecc04 100644
>>> --- a/fs/ceph/file.c
>>> +++ b/fs/ceph/file.c
>>> @@ -11,6 +11,7 @@
>>>
>>>  #include "super.h"
>>>  #include "mds_client.h"
>>> +#include "cache.h"
>>>
>>>  /*
>>>   * Ceph file operations
>>> @@ -67,10 +68,17 @@ out:
>>>  static int ceph_init_file(struct inode *inode, struct file *file, int fmode)
>>>  {
>>>   struct ceph_file_info *cf;
>>> + struct ceph_inode_info *ci = ceph_inode(inode);
>>> + struct ceph_fs_client *fsc = ceph_sb_to_client(inode->i_sb);
>>>   int ret = 0;
>>>
>>>   switch (inode->i_mode & S_IFMT) {
>>>   case S_IFREG:
>>> +#ifdef CONFIG_CEPH_FSCACHE
>>> + spin_lock(&ci->i_ceph_lock);
>>> + ceph_fscache_register_inode_cookie(fsc, ci);
>>> + spin_lock(&ci->i_ceph_lock);
>>> +#endif
>>>   case S_IFDIR:
>>>   dout("init_file %p %p 0%o (regular)\n", inode, file,
>>>       inode->i_mode);
>>> @@ -181,6 +189,7 @@ int ceph_open(struct inode *inode, struct file *file)
>>>   spin_unlock(&ci->i_ceph_lock);
>>>   return ceph_init_file(inode, file, fmode);
>>>   }
>>> +
>>>   spin_unlock(&ci->i_ceph_lock);
>>>
>>>   dout("open fmode %d wants %s\n", fmode, ceph_cap_string(wanted));
>>> diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
>>> index be0f7e2..620b84c 100644
>>> --- a/fs/ceph/inode.c
>>> +++ b/fs/ceph/inode.c
>>> @@ -12,6 +12,7 @@
>>>
>>>  #include "super.h"
>>>  #include "mds_client.h"
>>> +#include "cache.h"
>>>  #include <linux/ceph/decode.h>
>>>
>>>  /*
>>> @@ -377,6 +378,10 @@ struct inode *ceph_alloc_inode(struct super_block *sb)
>>>
>>>   INIT_WORK(&ci->i_vmtruncate_work, ceph_vmtruncate_work);
>>>
>>> +#ifdef CONFIG_CEPH_FSCACHE
>>> + ci->fscache = NULL;
>>> +#endif
>>> +
>>>   return &ci->vfs_inode;
>>>  }
>>>
>>> @@ -396,6 +401,10 @@ void ceph_destroy_inode(struct inode *inode)
>>>
>>>   dout("destroy_inode %p ino %llx.%llx\n", inode, ceph_vinop(inode));
>>>
>>> +#ifdef CONFIG_CEPH_FSCACHE
>>> + ceph_fscache_unregister_inode_cookie(ci);
>>> +#endif
>>> +
>>>   ceph_queue_caps_release(inode);
>>>
>>>   /*
>>> @@ -430,7 +439,6 @@ void ceph_destroy_inode(struct inode *inode)
>>>   call_rcu(&inode->i_rcu, ceph_i_callback);
>>>  }
>>>
>>> -
>>>  /*
>>>   * Helpers to fill in size, ctime, mtime, and atime.  We have to be
>>>   * careful because either the client or MDS may have more up to date
>>> @@ -633,6 +641,14 @@ static int fill_inode(struct inode *inode,
>>>      le32_to_cpu(info->time_warp_seq),
>>>      &ctime, &mtime, &atime);
>>>
>>> +#ifdef CONFIG_CEPH_FSCACHE
>>> + /* Notify the cache that size has changed */
>>> + if (queue_trunc && ci->fscache) {
>>> + pr_info("size changed inode: %p cap flags\n", &ci->vfs_inode);
>>> + fscache_attr_changed(ci->fscache);
>>> + }
>>> +#endif
>>> +
>>>   /* only update max_size on auth cap */
>>>   if ((info->cap.flags & CEPH_CAP_FLAG_AUTH) &&
>>>      ci->i_max_size != le64_to_cpu(info->max_size)) {
>>> @@ -1066,7 +1082,7 @@ int ceph_fill_trace(struct super_block *sb,
>>> struct ceph_mds_request *req,
>>>   * complete.
>>>   */
>>>   ceph_set_dentry_offset(req->r_old_dentry);
>>> - dout("dn %p gets new offset %lld\n", req->r_old_dentry,
>>> + dout("dn %p gets new offset %lld\n", req->r_old_dentry,
>>>       ceph_dentry(req->r_old_dentry)->offset);
>>>
>>>   dn = req->r_old_dentry;  /* use old_dentry */
>>> @@ -1430,6 +1446,11 @@ static void ceph_invalidate_work(struct
>>> work_struct *work)
>>>   orig_gen = ci->i_rdcache_gen;
>>>   spin_unlock(&ci->i_ceph_lock);
>>>
>>> +#ifdef CONFIG_CEPH_FSCACHE
>>> + pr_info("cache invalidating inode: %p cap flags\n", &ci->vfs_inode);
>>> + fscache_invalidate(ci->fscache);
>>> +#endif
>>> +
>>>   truncate_inode_pages(&inode->i_data, 0);
>>>
>>>   spin_lock(&ci->i_ceph_lock);
>>> diff --git a/fs/ceph/super.c b/fs/ceph/super.c
>>> index 7d377c9..7847ef7 100644
>>> --- a/fs/ceph/super.c
>>> +++ b/fs/ceph/super.c
>>> @@ -17,6 +17,7 @@
>>>
>>>  #include "super.h"
>>>  #include "mds_client.h"
>>> +#include "cache.h"
>>>
>>>  #include <linux/ceph/ceph_features.h>
>>>  #include <linux/ceph/decode.h>
>>> @@ -530,6 +531,11 @@ static struct ceph_fs_client
>>> *create_fs_client(struct ceph_mount_options *fsopt,
>>>   if (!fsc->wb_pagevec_pool)
>>>   goto fail_trunc_wq;
>>>
>>> +#ifdef CONFIG_CEPH_FSCACHE
>>> + /* fscache */
>>> + ceph_fscache_register_fsid_cookie(fsc);
>>> +#endif
>>> +
>>>   /* caps */
>>>   fsc->min_caps = fsopt->max_readdir;
>>>
>>> @@ -554,6 +560,10 @@ static void destroy_fs_client(struct ceph_fs_client *fsc)
>>>  {
>>>   dout("destroy_fs_client %p\n", fsc);
>>>
>>> +#ifdef CONFIG_CEPH_FSCACHE
>>> + ceph_fscache_unregister_fsid_cookie(fsc);
>>> +#endif
>>> +
>>>   destroy_workqueue(fsc->wb_wq);
>>>   destroy_workqueue(fsc->pg_inv_wq);
>>>   destroy_workqueue(fsc->trunc_wq);
>>> @@ -588,6 +598,8 @@ static void ceph_inode_init_once(void *foo)
>>>
>>>  static int __init init_caches(void)
>>>  {
>>> + int error = -ENOMEM;
>>> +
>>>   ceph_inode_cachep = kmem_cache_create("ceph_inode_info",
>>>        sizeof(struct ceph_inode_info),
>>>        __alignof__(struct ceph_inode_info),
>>> @@ -611,15 +623,19 @@ static int __init init_caches(void)
>>>   if (ceph_file_cachep == NULL)
>>>   goto bad_file;
>>>
>>> - return 0;
>>> +#ifdef CONFIG_CEPH_FSCACHE
>>> + if ((error = fscache_register_netfs(&ceph_cache_netfs)))
>>> + goto bad_file;
>>> +#endif
>>>
>>> + return 0;
>>>  bad_file:
>>>   kmem_cache_destroy(ceph_dentry_cachep);
>>>  bad_dentry:
>>>   kmem_cache_destroy(ceph_cap_cachep);
>>>  bad_cap:
>>>   kmem_cache_destroy(ceph_inode_cachep);
>>> - return -ENOMEM;
>>> + return error;
>>>  }
>>>
>>>  static void destroy_caches(void)
>>> @@ -629,10 +645,15 @@ static void destroy_caches(void)
>>>   * destroy cache.
>>>   */
>>>   rcu_barrier();
>>> +
>>>   kmem_cache_destroy(ceph_inode_cachep);
>>>   kmem_cache_destroy(ceph_cap_cachep);
>>>   kmem_cache_destroy(ceph_dentry_cachep);
>>>   kmem_cache_destroy(ceph_file_cachep);
>>> +
>>> +#ifdef CONFIG_CEPH_FSCACHE
>>> + fscache_unregister_netfs(&ceph_cache_netfs);
>>> +#endif
>>>  }
>>>
>>>
>>> diff --git a/fs/ceph/super.h b/fs/ceph/super.h
>>> index 8696be2..2980337 100644
>>> --- a/fs/ceph/super.h
>>> +++ b/fs/ceph/super.h
>>> @@ -16,6 +16,10 @@
>>>
>>>  #include <linux/ceph/libceph.h>
>>>
>>> +#ifdef CONFIG_CEPH_FSCACHE
>>> +#include <linux/fscache.h>
>>> +#endif
>>> +
>>>  /* f_type in struct statfs */
>>>  #define CEPH_SUPER_MAGIC 0x00c36400
>>>
>>> @@ -90,6 +94,10 @@ struct ceph_fs_client {
>>>   struct dentry *debugfs_bdi;
>>>   struct dentry *debugfs_mdsc, *debugfs_mdsmap;
>>>  #endif
>>> +
>>> +#ifdef CONFIG_CEPH_FSCACHE
>>> + struct fscache_cookie *fscache;
>>> +#endif
>>>  };
>>>
>>>
>>> @@ -319,6 +327,10 @@ struct ceph_inode_info {
>>>
>>>   struct work_struct i_vmtruncate_work;
>>>
>>> +#ifdef CONFIG_CEPH_FSCACHE
>>> + struct fscache_cookie *fscache;
>>> +#endif
>>> +
>>>   struct inode vfs_inode; /* at end */
>>>  };
>>>
>>> --
>>> 1.7.9.5
>>> --
>>> To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
>>> the body of a message to majordomo@vger.kernel.org
>>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>>>
>>>
> --
> To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Fwd: [PATCH 2/2] Enable fscache as an optional feature of ceph.
       [not found]       ` <CANP1eJHZskoMVa3KBGMHvxEfNcAJQdDK4ou47meaBDYzPVa=xg@mail.gmail.com>
@ 2013-06-17 14:31         ` Milosz Tanski
  2013-06-17 15:47         ` Elso Andras
  1 sibling, 0 replies; 13+ messages in thread
From: Milosz Tanski @ 2013-06-17 14:31 UTC (permalink / raw)
  To: Elso Andras; +Cc: ceph-devel

Elbandi,

Can you give me some info about your test case so I can figure out
what's going on.

1) In the graphs you attached what am I looking at? My best guess is
that it's traffic on a 10gigE card, but I can't tell from the graph
since there's no labels.
2) Can you give me more info about your serving case. What application
are you using to serve the video (http server)? Are you serving static
mp4 files from Ceph filesystem?
3) What's the hardware, most importantly how big is your partition
that cachefilesd is on and what kind of disk are you hosting it on
(rotating, SSD)?
4) Statistics from fscache. Can you paste the output
/proc/fs/fscache/stats and /proc/fs/fscache/histogram.
5) dmesg lines for ceph/fscache/cachefiles like:

[2049099.198234] CacheFiles: Loaded
[2049099.541721] FS-Cache: Cache "mycache" added (type cachefiles)
[2049099.541727] CacheFiles: File cache on md0 registered
[2049120.650897] Key type ceph registered
[2049120.651015] libceph: loaded (mon/osd proto 15/24)
[2049120.673202] FS-Cache: Netfs 'ceph' registered for caching
[2049120.673207] ceph: loaded (mds proto 32)
[2049120.680919] libceph: client6473 fsid e23a1bfc-8328-46bf-bc59-1209df3f5434
[2049120.683397] libceph: mon0 10.0.5.226:6789 session established

I think with these answers I'll be better able to diagnose what's
going on for you.

- Milosz

On Mon, Jun 17, 2013 at 9:16 AM, Elso Andras <elso.andras@gmail.com> wrote:
>
> Hi,
>
> I tested your patches on a ubuntu lucid system, but ubuntu raring
> kernel (3.8), but with for-linus branch from ceph-client and your
> fscache. There was no probs in heavy load.
> But i dont see any difference with/without fscache on our "test" case
> (mp4 video streaming, ~5500 connections):
> with fscache: http://imageshack.us/photo/my-images/109/xg5a.png/
> without fscache: http://imageshack.us/photo/my-images/5/xak.png/
>
> Elbandi
>
> 2013/5/29 Milosz Tanski <milosz@adfin.com>:
> > Sage,
> >
> > Thanks for taking a look at this. No worries about the timing.
> >
> > I added two extra changes into my branch located here:
> > https://bitbucket.org/adfin/linux-fs/commits/branch/forceph. The first
> > one is a fix for kernel deadlock. The second one makes fsc cache a
> > non-default mount option (akin to NFS).
> >
> > Finally, I observed an occasional oops in the fscache that's fixed in
> > David's branch that's waiting to get into mainline. The fix for the
> > issue is here: http://git.kernel.org/cgit/linux/kernel/git/dhowells/linux-fs.git/commit/?h=fscache&id=82958c45e35963c93fc6cbe6a27752e2d97e9f9a.
> > I can only cause that issue by forcing the kernel to drop it's caches
> > in some cases.
> >
> > Let me know if you any other feedback, or if I can help in anyway.
> >
> > Thanks,
> > - Milosz
> >
> > On Tue, May 28, 2013 at 1:11 PM, Sage Weil <sage@inktank.com> wrote:
> >> Hi Milosz,
> >>
> >> Just a heads up that I hope to take a closer look at the patch this
> >> afternoon or tomorrow.  Just catching up after the long weekend.
> >>
> >> Thanks!
> >> sage
> >>
> >>
> >> On Thu, 23 May 2013, Milosz Tanski wrote:
> >>
> >>> Enable fscache as an optional feature of ceph.
> >>>
> >>> Adding support for fscache to the Ceph filesystem. This would bring it to on
> >>> par with some of the other network filesystems in Linux (like NFS, AFS, etc...)
> >>>
> >>> This exploits the existing Ceph cache & lazyio capabilities.
> >>>
> >>> Signed-off-by: Milosz Tanski <milosz@adfin.com>
> >>> ---
> >>>  fs/ceph/Kconfig  |    9 ++++++
> >>>  fs/ceph/Makefile |    2 ++
> >>>  fs/ceph/addr.c   |   85 ++++++++++++++++++++++++++++++++++++++++--------------
> >>>  fs/ceph/caps.c   |   21 +++++++++++++-
> >>>  fs/ceph/file.c   |    9 ++++++
> >>>  fs/ceph/inode.c  |   25 ++++++++++++++--
> >>>  fs/ceph/super.c  |   25 ++++++++++++++--
> >>>  fs/ceph/super.h  |   12 ++++++++
> >>>  8 files changed, 162 insertions(+), 26 deletions(-)
> >>>
> >>> diff --git a/fs/ceph/Kconfig b/fs/ceph/Kconfig
> >>> index 49bc782..ac9a2ef 100644
> >>> --- a/fs/ceph/Kconfig
> >>> +++ b/fs/ceph/Kconfig
> >>> @@ -16,3 +16,12 @@ config CEPH_FS
> >>>
> >>>    If unsure, say N.
> >>>
> >>> +if CEPH_FS
> >>> +config CEPH_FSCACHE
> >>> + bool "Enable Ceph client caching support"
> >>> + depends on CEPH_FS=m && FSCACHE || CEPH_FS=y && FSCACHE=y
> >>> + help
> >>> +  Choose Y here to enable persistent, read-only local
> >>> +  caching support for Ceph clients using FS-Cache
> >>> +
> >>> +endif
> >>> diff --git a/fs/ceph/Makefile b/fs/ceph/Makefile
> >>> index bd35212..0af0678 100644
> >>> --- a/fs/ceph/Makefile
> >>> +++ b/fs/ceph/Makefile
> >>> @@ -9,3 +9,5 @@ ceph-y := super.o inode.o dir.o file.o locks.o addr.o ioctl.o \
> >>>   mds_client.o mdsmap.o strings.o ceph_frag.o \
> >>>   debugfs.o
> >>>
> >>> +ceph-$(CONFIG_CEPH_FSCACHE) += cache.o
> >>> +
> >>> diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
> >>> index 3e68ac1..fd3a1cc 100644
> >>> --- a/fs/ceph/addr.c
> >>> +++ b/fs/ceph/addr.c
> >>> @@ -11,6 +11,7 @@
> >>>
> >>>  #include "super.h"
> >>>  #include "mds_client.h"
> >>> +#include "cache.h"
> >>>  #include <linux/ceph/osd_client.h>
> >>>
> >>>  /*
> >>> @@ -149,11 +150,26 @@ static void ceph_invalidatepage(struct page
> >>> *page, unsigned long offset)
> >>>   struct ceph_inode_info *ci;
> >>>   struct ceph_snap_context *snapc = page_snap_context(page);
> >>>
> >>> - BUG_ON(!PageLocked(page));
> >>> - BUG_ON(!PagePrivate(page));
> >>>   BUG_ON(!page->mapping);
> >>>
> >>>   inode = page->mapping->host;
> >>> + ci = ceph_inode(inode);
> >>> +
> >>> + if (offset != 0) {
> >>> + dout("%p invalidatepage %p idx %lu partial dirty page\n",
> >>> +     inode, page, page->index);
> >>> + return;
> >>> + }
> >>> +
> >>> +#ifdef CONFIG_CEPH_FSCACHE
> >>> + if (PageFsCache(page))
> >>> + ceph_invalidate_fscache_page(inode, page);
> >>> +#endif
> >>> +
> >>> + if (!PagePrivate(page))
> >>> + return;
> >>> +
> >>> + BUG_ON(!PageLocked(page));
> >>>
> >>>   /*
> >>>   * We can get non-dirty pages here due to races between
> >>> @@ -163,31 +179,32 @@ static void ceph_invalidatepage(struct page
> >>> *page, unsigned long offset)
> >>>   if (!PageDirty(page))
> >>>   pr_err("%p invalidatepage %p page not dirty\n", inode, page);
> >>>
> >>> - if (offset == 0)
> >>> - ClearPageChecked(page);
> >>> + ClearPageChecked(page);
> >>>
> >>> - ci = ceph_inode(inode);
> >>> - if (offset == 0) {
> >>> - dout("%p invalidatepage %p idx %lu full dirty page %lu\n",
> >>> -     inode, page, page->index, offset);
> >>> - ceph_put_wrbuffer_cap_refs(ci, 1, snapc);
> >>> - ceph_put_snap_context(snapc);
> >>> - page->private = 0;
> >>> - ClearPagePrivate(page);
> >>> - } else {
> >>> - dout("%p invalidatepage %p idx %lu partial dirty page\n",
> >>> -     inode, page, page->index);
> >>> - }
> >>> + dout("%p invalidatepage %p idx %lu full dirty page %lu\n",
> >>> +     inode, page, page->index, offset);
> >>> +
> >>> + ceph_put_wrbuffer_cap_refs(ci, 1, snapc);
> >>> + ceph_put_snap_context(snapc);
> >>> + page->private = 0;
> >>> + ClearPagePrivate(page);
> >>>  }
> >>>
> >>> -/* just a sanity check */
> >>>  static int ceph_releasepage(struct page *page, gfp_t g)
> >>>  {
> >>>   struct inode *inode = page->mapping ? page->mapping->host : NULL;
> >>>   dout("%p releasepage %p idx %lu\n", inode, page, page->index);
> >>>   WARN_ON(PageDirty(page));
> >>> - WARN_ON(PagePrivate(page));
> >>> - return 0;
> >>> +
> >>> +#ifdef CONFIG_CEPH_FSCACHE
> >>> + /* Can we release the page from the cache? */
> >>> + if (PageFsCache(page) && ceph_release_fscache_page(page, g) == 0)
> >>> + return 0;
> >>> +#endif
> >>> + if (PagePrivate(page))
> >>> + return 0;
> >>> +
> >>> + return 1;
> >>>  }
> >>>
> >>>  /*
> >>> @@ -197,11 +214,18 @@ static int readpage_nounlock(struct file *filp,
> >>> struct page *page)
> >>>  {
> >>>   struct inode *inode = file_inode(filp);
> >>>   struct ceph_inode_info *ci = ceph_inode(inode);
> >>> - struct ceph_osd_client *osdc =
> >>> + struct ceph_osd_client *osdc =
> >>>   &ceph_inode_to_client(inode)->client->osdc;
> >>>   int err = 0;
> >>>   u64 len = PAGE_CACHE_SIZE;
> >>>
> >>> +#ifdef CONFIG_CEPH_FSCACHE
> >>> + err = ceph_readpage_from_fscache(inode, page);
> >>> +
> >>> + if (err == 0)
> >>> + goto out;
> >>> +#endif
> >>> +
> >>>   dout("readpage inode %p file %p page %p index %lu\n",
> >>>       inode, filp, page, page->index);
> >>>   err = ceph_osdc_readpages(osdc, ceph_vino(inode), &ci->i_layout,
> >>> @@ -219,6 +243,10 @@ static int readpage_nounlock(struct file *filp,
> >>> struct page *page)
> >>>   }
> >>>   SetPageUptodate(page);
> >>>
> >>> +#ifdef CONFIG_CEPH_FSCACHE
> >>> + ceph_readpage_to_fscache(inode, page);
> >>> +#endif
> >>> +
> >>>  out:
> >>>   return err < 0 ? err : 0;
> >>>  }
> >>> @@ -262,6 +290,9 @@ static void finish_read(struct ceph_osd_request
> >>> *req, struct ceph_msg *msg)
> >>>   flush_dcache_page(page);
> >>>   SetPageUptodate(page);
> >>>   unlock_page(page);
> >>> +#ifdef CONFIG_CEPH_FSCACHE
> >>> + ceph_readpage_to_fscache(inode, page);
> >>> +#endif
> >>>   page_cache_release(page);
> >>>   bytes -= PAGE_CACHE_SIZE;
> >>>   }
> >>> @@ -330,7 +361,7 @@ static int start_read(struct inode *inode, struct
> >>> list_head *page_list, int max)
> >>>   page = list_entry(page_list->prev, struct page, lru);
> >>>   BUG_ON(PageLocked(page));
> >>>   list_del(&page->lru);
> >>> -
> >>> +
> >>>   dout("start_read %p adding %p idx %lu\n", inode, page,
> >>>       page->index);
> >>>   if (add_to_page_cache_lru(page, &inode->i_data, page->index,
> >>> @@ -377,6 +408,14 @@ static int ceph_readpages(struct file *file,
> >>> struct address_space *mapping,
> >>>   int rc = 0;
> >>>   int max = 0;
> >>>
> >>> +#ifdef CONFIG_CEPH_FSCACHE
> >>> + rc = ceph_readpages_from_fscache(mapping->host, mapping, page_list,
> >>> + &nr_pages);
> >>> +
> >>> + if (rc == 0)
> >>> + goto out;
> >>> +#endif
> >>> +
> >>>   if (fsc->mount_options->rsize >= PAGE_CACHE_SIZE)
> >>>   max = (fsc->mount_options->rsize + PAGE_CACHE_SIZE - 1)
> >>>   >> PAGE_SHIFT;
> >>> @@ -490,6 +529,10 @@ static int writepage_nounlock(struct page *page,
> >>> struct writeback_control *wbc)
> >>>      CONGESTION_ON_THRESH(fsc->mount_options->congestion_kb))
> >>>   set_bdi_congested(&fsc->backing_dev_info, BLK_RW_ASYNC);
> >>>
> >>> +#ifdef CONFIG_CEPH_FSCACHE
> >>> + ceph_readpage_to_fscache(inode, page);
> >>> +#endif
> >>> +
> >>>   set_page_writeback(page);
> >>>   err = ceph_osdc_writepages(osdc, ceph_vino(inode),
> >>>     &ci->i_layout, snapc,
> >>> diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
> >>> index da0f9b8..7e8d8d3 100644
> >>> --- a/fs/ceph/caps.c
> >>> +++ b/fs/ceph/caps.c
> >>> @@ -10,6 +10,7 @@
> >>>
> >>>  #include "super.h"
> >>>  #include "mds_client.h"
> >>> +#include "cache.h"
> >>>  #include <linux/ceph/decode.h>
> >>>  #include <linux/ceph/messenger.h>
> >>>
> >>> @@ -486,8 +487,14 @@ static void __check_cap_issue(struct
> >>> ceph_inode_info *ci, struct ceph_cap *cap,
> >>>   * i_rdcache_gen.
> >>>   */
> >>>   if ((issued & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) &&
> >>> -    (had & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) == 0)
> >>> +    (had & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) == 0) {
> >>>   ci->i_rdcache_gen++;
> >>> +#ifdef CONFIG_CEPH_FSCACHE
> >>> + /* Invalidate the cache for the whole file. */
> >>> + dout("Invalidating inode data cache: %p", &ci->vfs_inode);
> >>> + fscache_invalidate(ci->fscache);
> >>> +#endif
> >>> + }
> >>>
> >>>   /*
> >>>   * if we are newly issued FILE_SHARED, mark dir not complete; we
> >>> @@ -2356,6 +2363,12 @@ static void handle_cap_grant(struct inode
> >>> *inode, struct ceph_mds_caps *grant,
> >>>   if (((cap->issued & ~newcaps) & CEPH_CAP_FILE_CACHE) &&
> >>>      (newcaps & CEPH_CAP_FILE_LAZYIO) == 0 &&
> >>>      !ci->i_wrbuffer_ref) {
> >>> +
> >>> +#ifdef CONFIG_CEPH_FSCACHE
> >>> + /* Close the fscache on inode */
> >>> + ceph_fscache_unregister_inode_cookie(ci);
> >>> +#endif
> >>> +
> >>>   if (try_nonblocking_invalidate(inode) == 0) {
> >>>   revoked_rdcache = 1;
> >>>   } else {
> >>> @@ -2425,6 +2438,12 @@ static void handle_cap_grant(struct inode
> >>> *inode, struct ceph_mds_caps *grant,
> >>>   wake = 1;
> >>>   }
> >>>
> >>> +#ifdef CONFIG_CEPH_FSCACHE
> >>> + /* Register cache (if needed); perform this after amny size change. */
> >>> + if ((issued & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)))
> >>> + ceph_fscache_register_inode_cookie(session->s_mdsc->fsc, ci);
> >>> +#endif
> >>> +
> >>>   /* check cap bits */
> >>>   wanted = __ceph_caps_wanted(ci);
> >>>   used = __ceph_caps_used(ci);
> >>> diff --git a/fs/ceph/file.c b/fs/ceph/file.c
> >>> index 656e169..e7ecc04 100644
> >>> --- a/fs/ceph/file.c
> >>> +++ b/fs/ceph/file.c
> >>> @@ -11,6 +11,7 @@
> >>>
> >>>  #include "super.h"
> >>>  #include "mds_client.h"
> >>> +#include "cache.h"
> >>>
> >>>  /*
> >>>   * Ceph file operations
> >>> @@ -67,10 +68,17 @@ out:
> >>>  static int ceph_init_file(struct inode *inode, struct file *file, int fmode)
> >>>  {
> >>>   struct ceph_file_info *cf;
> >>> + struct ceph_inode_info *ci = ceph_inode(inode);
> >>> + struct ceph_fs_client *fsc = ceph_sb_to_client(inode->i_sb);
> >>>   int ret = 0;
> >>>
> >>>   switch (inode->i_mode & S_IFMT) {
> >>>   case S_IFREG:
> >>> +#ifdef CONFIG_CEPH_FSCACHE
> >>> + spin_lock(&ci->i_ceph_lock);
> >>> + ceph_fscache_register_inode_cookie(fsc, ci);
> >>> + spin_lock(&ci->i_ceph_lock);
> >>> +#endif
> >>>   case S_IFDIR:
> >>>   dout("init_file %p %p 0%o (regular)\n", inode, file,
> >>>       inode->i_mode);
> >>> @@ -181,6 +189,7 @@ int ceph_open(struct inode *inode, struct file *file)
> >>>   spin_unlock(&ci->i_ceph_lock);
> >>>   return ceph_init_file(inode, file, fmode);
> >>>   }
> >>> +
> >>>   spin_unlock(&ci->i_ceph_lock);
> >>>
> >>>   dout("open fmode %d wants %s\n", fmode, ceph_cap_string(wanted));
> >>> diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
> >>> index be0f7e2..620b84c 100644
> >>> --- a/fs/ceph/inode.c
> >>> +++ b/fs/ceph/inode.c
> >>> @@ -12,6 +12,7 @@
> >>>
> >>>  #include "super.h"
> >>>  #include "mds_client.h"
> >>> +#include "cache.h"
> >>>  #include <linux/ceph/decode.h>
> >>>
> >>>  /*
> >>> @@ -377,6 +378,10 @@ struct inode *ceph_alloc_inode(struct super_block *sb)
> >>>
> >>>   INIT_WORK(&ci->i_vmtruncate_work, ceph_vmtruncate_work);
> >>>
> >>> +#ifdef CONFIG_CEPH_FSCACHE
> >>> + ci->fscache = NULL;
> >>> +#endif
> >>> +
> >>>   return &ci->vfs_inode;
> >>>  }
> >>>
> >>> @@ -396,6 +401,10 @@ void ceph_destroy_inode(struct inode *inode)
> >>>
> >>>   dout("destroy_inode %p ino %llx.%llx\n", inode, ceph_vinop(inode));
> >>>
> >>> +#ifdef CONFIG_CEPH_FSCACHE
> >>> + ceph_fscache_unregister_inode_cookie(ci);
> >>> +#endif
> >>> +
> >>>   ceph_queue_caps_release(inode);
> >>>
> >>>   /*
> >>> @@ -430,7 +439,6 @@ void ceph_destroy_inode(struct inode *inode)
> >>>   call_rcu(&inode->i_rcu, ceph_i_callback);
> >>>  }
> >>>
> >>> -
> >>>  /*
> >>>   * Helpers to fill in size, ctime, mtime, and atime.  We have to be
> >>>   * careful because either the client or MDS may have more up to date
> >>> @@ -633,6 +641,14 @@ static int fill_inode(struct inode *inode,
> >>>      le32_to_cpu(info->time_warp_seq),
> >>>      &ctime, &mtime, &atime);
> >>>
> >>> +#ifdef CONFIG_CEPH_FSCACHE
> >>> + /* Notify the cache that size has changed */
> >>> + if (queue_trunc && ci->fscache) {
> >>> + pr_info("size changed inode: %p cap flags\n", &ci->vfs_inode);
> >>> + fscache_attr_changed(ci->fscache);
> >>> + }
> >>> +#endif
> >>> +
> >>>   /* only update max_size on auth cap */
> >>>   if ((info->cap.flags & CEPH_CAP_FLAG_AUTH) &&
> >>>      ci->i_max_size != le64_to_cpu(info->max_size)) {
> >>> @@ -1066,7 +1082,7 @@ int ceph_fill_trace(struct super_block *sb,
> >>> struct ceph_mds_request *req,
> >>>   * complete.
> >>>   */
> >>>   ceph_set_dentry_offset(req->r_old_dentry);
> >>> - dout("dn %p gets new offset %lld\n", req->r_old_dentry,
> >>> + dout("dn %p gets new offset %lld\n", req->r_old_dentry,
> >>>       ceph_dentry(req->r_old_dentry)->offset);
> >>>
> >>>   dn = req->r_old_dentry;  /* use old_dentry */
> >>> @@ -1430,6 +1446,11 @@ static void ceph_invalidate_work(struct
> >>> work_struct *work)
> >>>   orig_gen = ci->i_rdcache_gen;
> >>>   spin_unlock(&ci->i_ceph_lock);
> >>>
> >>> +#ifdef CONFIG_CEPH_FSCACHE
> >>> + pr_info("cache invalidating inode: %p cap flags\n", &ci->vfs_inode);
> >>> + fscache_invalidate(ci->fscache);
> >>> +#endif
> >>> +
> >>>   truncate_inode_pages(&inode->i_data, 0);
> >>>
> >>>   spin_lock(&ci->i_ceph_lock);
> >>> diff --git a/fs/ceph/super.c b/fs/ceph/super.c
> >>> index 7d377c9..7847ef7 100644
> >>> --- a/fs/ceph/super.c
> >>> +++ b/fs/ceph/super.c
> >>> @@ -17,6 +17,7 @@
> >>>
> >>>  #include "super.h"
> >>>  #include "mds_client.h"
> >>> +#include "cache.h"
> >>>
> >>>  #include <linux/ceph/ceph_features.h>
> >>>  #include <linux/ceph/decode.h>
> >>> @@ -530,6 +531,11 @@ static struct ceph_fs_client
> >>> *create_fs_client(struct ceph_mount_options *fsopt,
> >>>   if (!fsc->wb_pagevec_pool)
> >>>   goto fail_trunc_wq;
> >>>
> >>> +#ifdef CONFIG_CEPH_FSCACHE
> >>> + /* fscache */
> >>> + ceph_fscache_register_fsid_cookie(fsc);
> >>> +#endif
> >>> +
> >>>   /* caps */
> >>>   fsc->min_caps = fsopt->max_readdir;
> >>>
> >>> @@ -554,6 +560,10 @@ static void destroy_fs_client(struct ceph_fs_client *fsc)
> >>>  {
> >>>   dout("destroy_fs_client %p\n", fsc);
> >>>
> >>> +#ifdef CONFIG_CEPH_FSCACHE
> >>> + ceph_fscache_unregister_fsid_cookie(fsc);
> >>> +#endif
> >>> +
> >>>   destroy_workqueue(fsc->wb_wq);
> >>>   destroy_workqueue(fsc->pg_inv_wq);
> >>>   destroy_workqueue(fsc->trunc_wq);
> >>> @@ -588,6 +598,8 @@ static void ceph_inode_init_once(void *foo)
> >>>
> >>>  static int __init init_caches(void)
> >>>  {
> >>> + int error = -ENOMEM;
> >>> +
> >>>   ceph_inode_cachep = kmem_cache_create("ceph_inode_info",
> >>>        sizeof(struct ceph_inode_info),
> >>>        __alignof__(struct ceph_inode_info),
> >>> @@ -611,15 +623,19 @@ static int __init init_caches(void)
> >>>   if (ceph_file_cachep == NULL)
> >>>   goto bad_file;
> >>>
> >>> - return 0;
> >>> +#ifdef CONFIG_CEPH_FSCACHE
> >>> + if ((error = fscache_register_netfs(&ceph_cache_netfs)))
> >>> + goto bad_file;
> >>> +#endif
> >>>
> >>> + return 0;
> >>>  bad_file:
> >>>   kmem_cache_destroy(ceph_dentry_cachep);
> >>>  bad_dentry:
> >>>   kmem_cache_destroy(ceph_cap_cachep);
> >>>  bad_cap:
> >>>   kmem_cache_destroy(ceph_inode_cachep);
> >>> - return -ENOMEM;
> >>> + return error;
> >>>  }
> >>>
> >>>  static void destroy_caches(void)
> >>> @@ -629,10 +645,15 @@ static void destroy_caches(void)
> >>>   * destroy cache.
> >>>   */
> >>>   rcu_barrier();
> >>> +
> >>>   kmem_cache_destroy(ceph_inode_cachep);
> >>>   kmem_cache_destroy(ceph_cap_cachep);
> >>>   kmem_cache_destroy(ceph_dentry_cachep);
> >>>   kmem_cache_destroy(ceph_file_cachep);
> >>> +
> >>> +#ifdef CONFIG_CEPH_FSCACHE
> >>> + fscache_unregister_netfs(&ceph_cache_netfs);
> >>> +#endif
> >>>  }
> >>>
> >>>
> >>> diff --git a/fs/ceph/super.h b/fs/ceph/super.h
> >>> index 8696be2..2980337 100644
> >>> --- a/fs/ceph/super.h
> >>> +++ b/fs/ceph/super.h
> >>> @@ -16,6 +16,10 @@
> >>>
> >>>  #include <linux/ceph/libceph.h>
> >>>
> >>> +#ifdef CONFIG_CEPH_FSCACHE
> >>> +#include <linux/fscache.h>
> >>> +#endif
> >>> +
> >>>  /* f_type in struct statfs */
> >>>  #define CEPH_SUPER_MAGIC 0x00c36400
> >>>
> >>> @@ -90,6 +94,10 @@ struct ceph_fs_client {
> >>>   struct dentry *debugfs_bdi;
> >>>   struct dentry *debugfs_mdsc, *debugfs_mdsmap;
> >>>  #endif
> >>> +
> >>> +#ifdef CONFIG_CEPH_FSCACHE
> >>> + struct fscache_cookie *fscache;
> >>> +#endif
> >>>  };
> >>>
> >>>
> >>> @@ -319,6 +327,10 @@ struct ceph_inode_info {
> >>>
> >>>   struct work_struct i_vmtruncate_work;
> >>>
> >>> +#ifdef CONFIG_CEPH_FSCACHE
> >>> + struct fscache_cookie *fscache;
> >>> +#endif
> >>> +
> >>>   struct inode vfs_inode; /* at end */
> >>>  };
> >>>
> >>> --
> >>> 1.7.9.5
> >>> --
> >>> To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
> >>> the body of a message to majordomo@vger.kernel.org
> >>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> >>>
> >>>
> > --
> > To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
> > the body of a message to majordomo@vger.kernel.org
> > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> --
> To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 2/2] Enable fscache as an optional feature of ceph.
       [not found]       ` <CANP1eJHZskoMVa3KBGMHvxEfNcAJQdDK4ou47meaBDYzPVa=xg@mail.gmail.com>
  2013-06-17 14:31         ` Fwd: " Milosz Tanski
@ 2013-06-17 15:47         ` Elso Andras
  2013-06-17 16:00           ` Milosz Tanski
  1 sibling, 1 reply; 13+ messages in thread
From: Elso Andras @ 2013-06-17 15:47 UTC (permalink / raw)
  To: Milosz Tanski; +Cc: ceph-devel

Hi,


> 1) In the graphs you attached what am I looking at? My best guess is that
> it's traffic on a 10gigE card, but I can't tell from the graph since there's
> no labels.
Yes, 10G traffic on switch port. So "incoming" means server-to-switch,
"outgoing" means switch-to-server. No separated card for ceph traffic
:(

> 2) Can you give me more info about your serving case. What application are
> you using to serve the video (http server)? Are you serving static mp4 files
> from Ceph filesystem?
lighttpd server with mp4 streaming mod
(http://h264.code-shop.com/trac/wiki/Mod-H264-Streaming-Lighttpd-Version2),
the files lives on cephfs.
there is a speed limit, controlled by mp4 mod. the bandwidth is the
video bitrate value.

mount options:
name=test,rsize=0,rasize=131072,noshare,fsc,key=client.test

rsize=0 and rasize=131072 is a tested, with other values there was 4x
incoming (from osd) traffic than outgoing (to internet) traffic.

> 3) What's the hardware, most importantly how big is your partition that
> cachefilesd is on and what kind of disk are you hosting it on (rotating,
> SSD)?
there are 5 osd servers: HP DL380 G6, 32G ram, 16 X HP sas disk (10k
rpm) with raid0. bonding two 1G interface together.
(In previous life, this hw could serve the ~2.3G traffic with raid5
and three bonding interface)

> 4) Statistics from fscache. Can you paste the output /proc/fs/fscache/stats
> and /proc/fs/fscache/histogram.

FS-Cache statistics
Cookies: idx=1 dat=8001 spc=0
Objects: alc=0 nal=0 avl=0 ded=0
ChkAux : non=0 ok=0 upd=0 obs=0
Pages  : mrk=0 unc=0
Acquire: n=8002 nul=0 noc=0 ok=8002 nbf=0 oom=0
Lookups: n=0 neg=0 pos=0 crt=0 tmo=0
Invals : n=0 run=0
Updates: n=0 nul=0 run=0
Relinqs: n=2265 nul=0 wcr=0 rtr=0
AttrChg: n=0 ok=0 nbf=0 oom=0 run=0
Allocs : n=0 ok=0 wt=0 nbf=0 int=0
Allocs : ops=0 owt=0 abt=0
Retrvls: n=2983745 ok=0 wt=0 nod=0 nbf=2983745 int=0 oom=0
Retrvls: ops=0 owt=0 abt=0
Stores : n=0 ok=0 agn=0 nbf=0 oom=0
Stores : ops=0 run=0 pgs=0 rxd=0 olm=0
VmScan : nos=0 gon=0 bsy=0 can=0 wt=0
Ops    : pend=0 run=0 enq=0 can=0 rej=0
Ops    : dfr=0 rel=0 gc=0
CacheOp: alo=0 luo=0 luc=0 gro=0
CacheOp: inv=0 upo=0 dro=0 pto=0 atc=0 syn=0
CacheOp: rap=0 ras=0 alp=0 als=0 wrp=0 ucp=0 dsp=0

No histogram, i try to build to enable this.

> 5) dmesg lines for ceph/fscache/cachefiles like:
[  264.186887] FS-Cache: Loaded
[  264.223851] Key type ceph registered
[  264.223902] libceph: loaded (mon/osd proto 15/24)
[  264.246334] FS-Cache: Netfs 'ceph' registered for caching
[  264.246341] ceph: loaded (mds proto 32)
[  264.249497] libceph: client31274 fsid 1d78ebe5-f254-44ff-81c1-f641bb2036b6


Elbandi

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 2/2] Enable fscache as an optional feature of ceph.
  2013-06-17 15:47         ` Elso Andras
@ 2013-06-17 16:00           ` Milosz Tanski
  2013-06-17 17:09             ` Elso Andras
  0 siblings, 1 reply; 13+ messages in thread
From: Milosz Tanski @ 2013-06-17 16:00 UTC (permalink / raw)
  To: Elso Andras; +Cc: ceph-devel

Elbandi,

It looks like it's trying to use fscache (from the stats) but there's
no data. Did you install, configure and enable the cachefilesd daemon?
It's the user-space component of fscache. It's the only officially
supported fsache backed by Ubuntu, RHEL & SUSE. I'm guessing that's
your problem since I don't see any of the bellow lines in your dmesg
snippet.

[2049099.198234] CacheFiles: Loaded
[2049099.541721] FS-Cache: Cache "mycache" added (type cachefiles)
[2049099.541727] CacheFiles: File cache on md0 registered

- Milosz

On Mon, Jun 17, 2013 at 11:47 AM, Elso Andras <elso.andras@gmail.com> wrote:
> Hi,
>
>
>> 1) In the graphs you attached what am I looking at? My best guess is that
>> it's traffic on a 10gigE card, but I can't tell from the graph since there's
>> no labels.
> Yes, 10G traffic on switch port. So "incoming" means server-to-switch,
> "outgoing" means switch-to-server. No separated card for ceph traffic
> :(
>
>> 2) Can you give me more info about your serving case. What application are
>> you using to serve the video (http server)? Are you serving static mp4 files
>> from Ceph filesystem?
> lighttpd server with mp4 streaming mod
> (http://h264.code-shop.com/trac/wiki/Mod-H264-Streaming-Lighttpd-Version2),
> the files lives on cephfs.
> there is a speed limit, controlled by mp4 mod. the bandwidth is the
> video bitrate value.
>
> mount options:
> name=test,rsize=0,rasize=131072,noshare,fsc,key=client.test
>
> rsize=0 and rasize=131072 is a tested, with other values there was 4x
> incoming (from osd) traffic than outgoing (to internet) traffic.
>
>> 3) What's the hardware, most importantly how big is your partition that
>> cachefilesd is on and what kind of disk are you hosting it on (rotating,
>> SSD)?
> there are 5 osd servers: HP DL380 G6, 32G ram, 16 X HP sas disk (10k
> rpm) with raid0. bonding two 1G interface together.
> (In previous life, this hw could serve the ~2.3G traffic with raid5
> and three bonding interface)
>
>> 4) Statistics from fscache. Can you paste the output /proc/fs/fscache/stats
>> and /proc/fs/fscache/histogram.
>
> FS-Cache statistics
> Cookies: idx=1 dat=8001 spc=0
> Objects: alc=0 nal=0 avl=0 ded=0
> ChkAux : non=0 ok=0 upd=0 obs=0
> Pages  : mrk=0 unc=0
> Acquire: n=8002 nul=0 noc=0 ok=8002 nbf=0 oom=0
> Lookups: n=0 neg=0 pos=0 crt=0 tmo=0
> Invals : n=0 run=0
> Updates: n=0 nul=0 run=0
> Relinqs: n=2265 nul=0 wcr=0 rtr=0
> AttrChg: n=0 ok=0 nbf=0 oom=0 run=0
> Allocs : n=0 ok=0 wt=0 nbf=0 int=0
> Allocs : ops=0 owt=0 abt=0
> Retrvls: n=2983745 ok=0 wt=0 nod=0 nbf=2983745 int=0 oom=0
> Retrvls: ops=0 owt=0 abt=0
> Stores : n=0 ok=0 agn=0 nbf=0 oom=0
> Stores : ops=0 run=0 pgs=0 rxd=0 olm=0
> VmScan : nos=0 gon=0 bsy=0 can=0 wt=0
> Ops    : pend=0 run=0 enq=0 can=0 rej=0
> Ops    : dfr=0 rel=0 gc=0
> CacheOp: alo=0 luo=0 luc=0 gro=0
> CacheOp: inv=0 upo=0 dro=0 pto=0 atc=0 syn=0
> CacheOp: rap=0 ras=0 alp=0 als=0 wrp=0 ucp=0 dsp=0
>
> No histogram, i try to build to enable this.
>
>> 5) dmesg lines for ceph/fscache/cachefiles like:
> [  264.186887] FS-Cache: Loaded
> [  264.223851] Key type ceph registered
> [  264.223902] libceph: loaded (mon/osd proto 15/24)
> [  264.246334] FS-Cache: Netfs 'ceph' registered for caching
> [  264.246341] ceph: loaded (mds proto 32)
> [  264.249497] libceph: client31274 fsid 1d78ebe5-f254-44ff-81c1-f641bb2036b6
>
>
> Elbandi

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 2/2] Enable fscache as an optional feature of ceph.
  2013-06-17 16:00           ` Milosz Tanski
@ 2013-06-17 17:09             ` Elso Andras
  2013-06-17 17:43               ` Milosz Tanski
  2013-06-17 17:45               ` Matt W. Benjamin
  0 siblings, 2 replies; 13+ messages in thread
From: Elso Andras @ 2013-06-17 17:09 UTC (permalink / raw)
  To: Milosz Tanski; +Cc: ceph-devel

Hi,

Oh, i forgot about this daemon... but this daemon cache the data to
file. Thus it's useless, the caching to disk is more slow than the
whole osds.

Elbandi

2013/6/17 Milosz Tanski <milosz@adfin.com>:
> Elbandi,
>
> It looks like it's trying to use fscache (from the stats) but there's
> no data. Did you install, configure and enable the cachefilesd daemon?
> It's the user-space component of fscache. It's the only officially
> supported fsache backed by Ubuntu, RHEL & SUSE. I'm guessing that's
> your problem since I don't see any of the bellow lines in your dmesg
> snippet.
>
> [2049099.198234] CacheFiles: Loaded
> [2049099.541721] FS-Cache: Cache "mycache" added (type cachefiles)
> [2049099.541727] CacheFiles: File cache on md0 registered
>
> - Milosz
>
> On Mon, Jun 17, 2013 at 11:47 AM, Elso Andras <elso.andras@gmail.com> wrote:
>> Hi,
>>
>>
>>> 1) In the graphs you attached what am I looking at? My best guess is that
>>> it's traffic on a 10gigE card, but I can't tell from the graph since there's
>>> no labels.
>> Yes, 10G traffic on switch port. So "incoming" means server-to-switch,
>> "outgoing" means switch-to-server. No separated card for ceph traffic
>> :(
>>
>>> 2) Can you give me more info about your serving case. What application are
>>> you using to serve the video (http server)? Are you serving static mp4 files
>>> from Ceph filesystem?
>> lighttpd server with mp4 streaming mod
>> (http://h264.code-shop.com/trac/wiki/Mod-H264-Streaming-Lighttpd-Version2),
>> the files lives on cephfs.
>> there is a speed limit, controlled by mp4 mod. the bandwidth is the
>> video bitrate value.
>>
>> mount options:
>> name=test,rsize=0,rasize=131072,noshare,fsc,key=client.test
>>
>> rsize=0 and rasize=131072 is a tested, with other values there was 4x
>> incoming (from osd) traffic than outgoing (to internet) traffic.
>>
>>> 3) What's the hardware, most importantly how big is your partition that
>>> cachefilesd is on and what kind of disk are you hosting it on (rotating,
>>> SSD)?
>> there are 5 osd servers: HP DL380 G6, 32G ram, 16 X HP sas disk (10k
>> rpm) with raid0. bonding two 1G interface together.
>> (In previous life, this hw could serve the ~2.3G traffic with raid5
>> and three bonding interface)
>>
>>> 4) Statistics from fscache. Can you paste the output /proc/fs/fscache/stats
>>> and /proc/fs/fscache/histogram.
>>
>> FS-Cache statistics
>> Cookies: idx=1 dat=8001 spc=0
>> Objects: alc=0 nal=0 avl=0 ded=0
>> ChkAux : non=0 ok=0 upd=0 obs=0
>> Pages  : mrk=0 unc=0
>> Acquire: n=8002 nul=0 noc=0 ok=8002 nbf=0 oom=0
>> Lookups: n=0 neg=0 pos=0 crt=0 tmo=0
>> Invals : n=0 run=0
>> Updates: n=0 nul=0 run=0
>> Relinqs: n=2265 nul=0 wcr=0 rtr=0
>> AttrChg: n=0 ok=0 nbf=0 oom=0 run=0
>> Allocs : n=0 ok=0 wt=0 nbf=0 int=0
>> Allocs : ops=0 owt=0 abt=0
>> Retrvls: n=2983745 ok=0 wt=0 nod=0 nbf=2983745 int=0 oom=0
>> Retrvls: ops=0 owt=0 abt=0
>> Stores : n=0 ok=0 agn=0 nbf=0 oom=0
>> Stores : ops=0 run=0 pgs=0 rxd=0 olm=0
>> VmScan : nos=0 gon=0 bsy=0 can=0 wt=0
>> Ops    : pend=0 run=0 enq=0 can=0 rej=0
>> Ops    : dfr=0 rel=0 gc=0
>> CacheOp: alo=0 luo=0 luc=0 gro=0
>> CacheOp: inv=0 upo=0 dro=0 pto=0 atc=0 syn=0
>> CacheOp: rap=0 ras=0 alp=0 als=0 wrp=0 ucp=0 dsp=0
>>
>> No histogram, i try to build to enable this.
>>
>>> 5) dmesg lines for ceph/fscache/cachefiles like:
>> [  264.186887] FS-Cache: Loaded
>> [  264.223851] Key type ceph registered
>> [  264.223902] libceph: loaded (mon/osd proto 15/24)
>> [  264.246334] FS-Cache: Netfs 'ceph' registered for caching
>> [  264.246341] ceph: loaded (mds proto 32)
>> [  264.249497] libceph: client31274 fsid 1d78ebe5-f254-44ff-81c1-f641bb2036b6
>>
>>
>> Elbandi

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 2/2] Enable fscache as an optional feature of ceph.
  2013-06-17 17:09             ` Elso Andras
@ 2013-06-17 17:43               ` Milosz Tanski
  2013-06-17 18:08                 ` Milosz Tanski
  2013-06-17 17:45               ` Matt W. Benjamin
  1 sibling, 1 reply; 13+ messages in thread
From: Milosz Tanski @ 2013-06-17 17:43 UTC (permalink / raw)
  To: Elso Andras; +Cc: ceph-devel

Elso,

It does cache the data to the file, thus it may not be useful for your
situation. By default the ceph filesystem already uses the (in memory)
page cache provided by linux kernel. So if that's all you want, than
you're good with the current implementation.

Generally large sequential data transfers will not be improved
(although there's cases where we observed improvements). The
motivation for us to implement fscache has been the following
use-case.

We have a large distributed analytics databases (built in house) and
we have a few different access patterns present. First, there's
seemingly random access on the compressed indexes. Second, there's
also random access in the column data files for extent indexes.
Finally, there's either sequential or random access over the actual
data (depending on the query).

In our cases the machines that run the database have multiple large
SSD drives in a raid0 configuration. We're using it the SSD drives for
scratch storage (housekeeping background jobs) and the ceph fscache.
In some conditions we can get up to 1GB/s reads from these SSD drives.

We're currently in our last stages of deploying this to production.
And for most workloads our query performance for data stored locally
versus on ceph backed by fscache is pretty much the same. Our biggest
gain probably comes from much lower latency to get metadata and
indexes to make the query due the large number random iops the SSD
drives afford us. I'm going to published some updates numbers compared
to previous quick and dirty prototype.

I realize that's not going to be the case for everybody. However, if
you have a data access pattern that follows the 80/20 rule or the
zipfan distribution and fast local disks for caching this is a great.

Thanks,
- Milosz


On Mon, Jun 17, 2013 at 1:09 PM, Elso Andras <elso.andras@gmail.com> wrote:
> Hi,
>
> Oh, i forgot about this daemon... but this daemon cache the data to
> file. Thus it's useless, the caching to disk is more slow than the
> whole osds.
>
> Elbandi
>
> 2013/6/17 Milosz Tanski <milosz@adfin.com>:
>> Elbandi,
>>
>> It looks like it's trying to use fscache (from the stats) but there's
>> no data. Did you install, configure and enable the cachefilesd daemon?
>> It's the user-space component of fscache. It's the only officially
>> supported fsache backed by Ubuntu, RHEL & SUSE. I'm guessing that's
>> your problem since I don't see any of the bellow lines in your dmesg
>> snippet.
>>
>> [2049099.198234] CacheFiles: Loaded
>> [2049099.541721] FS-Cache: Cache "mycache" added (type cachefiles)
>> [2049099.541727] CacheFiles: File cache on md0 registered
>>
>> - Milosz
>>
>> On Mon, Jun 17, 2013 at 11:47 AM, Elso Andras <elso.andras@gmail.com> wrote:
>>> Hi,
>>>
>>>
>>>> 1) In the graphs you attached what am I looking at? My best guess is that
>>>> it's traffic on a 10gigE card, but I can't tell from the graph since there's
>>>> no labels.
>>> Yes, 10G traffic on switch port. So "incoming" means server-to-switch,
>>> "outgoing" means switch-to-server. No separated card for ceph traffic
>>> :(
>>>
>>>> 2) Can you give me more info about your serving case. What application are
>>>> you using to serve the video (http server)? Are you serving static mp4 files
>>>> from Ceph filesystem?
>>> lighttpd server with mp4 streaming mod
>>> (http://h264.code-shop.com/trac/wiki/Mod-H264-Streaming-Lighttpd-Version2),
>>> the files lives on cephfs.
>>> there is a speed limit, controlled by mp4 mod. the bandwidth is the
>>> video bitrate value.
>>>
>>> mount options:
>>> name=test,rsize=0,rasize=131072,noshare,fsc,key=client.test
>>>
>>> rsize=0 and rasize=131072 is a tested, with other values there was 4x
>>> incoming (from osd) traffic than outgoing (to internet) traffic.
>>>
>>>> 3) What's the hardware, most importantly how big is your partition that
>>>> cachefilesd is on and what kind of disk are you hosting it on (rotating,
>>>> SSD)?
>>> there are 5 osd servers: HP DL380 G6, 32G ram, 16 X HP sas disk (10k
>>> rpm) with raid0. bonding two 1G interface together.
>>> (In previous life, this hw could serve the ~2.3G traffic with raid5
>>> and three bonding interface)
>>>
>>>> 4) Statistics from fscache. Can you paste the output /proc/fs/fscache/stats
>>>> and /proc/fs/fscache/histogram.
>>>
>>> FS-Cache statistics
>>> Cookies: idx=1 dat=8001 spc=0
>>> Objects: alc=0 nal=0 avl=0 ded=0
>>> ChkAux : non=0 ok=0 upd=0 obs=0
>>> Pages  : mrk=0 unc=0
>>> Acquire: n=8002 nul=0 noc=0 ok=8002 nbf=0 oom=0
>>> Lookups: n=0 neg=0 pos=0 crt=0 tmo=0
>>> Invals : n=0 run=0
>>> Updates: n=0 nul=0 run=0
>>> Relinqs: n=2265 nul=0 wcr=0 rtr=0
>>> AttrChg: n=0 ok=0 nbf=0 oom=0 run=0
>>> Allocs : n=0 ok=0 wt=0 nbf=0 int=0
>>> Allocs : ops=0 owt=0 abt=0
>>> Retrvls: n=2983745 ok=0 wt=0 nod=0 nbf=2983745 int=0 oom=0
>>> Retrvls: ops=0 owt=0 abt=0
>>> Stores : n=0 ok=0 agn=0 nbf=0 oom=0
>>> Stores : ops=0 run=0 pgs=0 rxd=0 olm=0
>>> VmScan : nos=0 gon=0 bsy=0 can=0 wt=0
>>> Ops    : pend=0 run=0 enq=0 can=0 rej=0
>>> Ops    : dfr=0 rel=0 gc=0
>>> CacheOp: alo=0 luo=0 luc=0 gro=0
>>> CacheOp: inv=0 upo=0 dro=0 pto=0 atc=0 syn=0
>>> CacheOp: rap=0 ras=0 alp=0 als=0 wrp=0 ucp=0 dsp=0
>>>
>>> No histogram, i try to build to enable this.
>>>
>>>> 5) dmesg lines for ceph/fscache/cachefiles like:
>>> [  264.186887] FS-Cache: Loaded
>>> [  264.223851] Key type ceph registered
>>> [  264.223902] libceph: loaded (mon/osd proto 15/24)
>>> [  264.246334] FS-Cache: Netfs 'ceph' registered for caching
>>> [  264.246341] ceph: loaded (mds proto 32)
>>> [  264.249497] libceph: client31274 fsid 1d78ebe5-f254-44ff-81c1-f641bb2036b6
>>>
>>>
>>> Elbandi

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 2/2] Enable fscache as an optional feature of ceph.
  2013-06-17 17:09             ` Elso Andras
  2013-06-17 17:43               ` Milosz Tanski
@ 2013-06-17 17:45               ` Matt W. Benjamin
  1 sibling, 0 replies; 13+ messages in thread
From: Matt W. Benjamin @ 2013-06-17 17:45 UTC (permalink / raw)
  To: Elso Andras; +Cc: ceph-devel, Milosz Tanski

Hi,

1. in the cases where client caching is useful, AFS disk caching is still common--though yes, giant memory caches became more common over time, and

2. a memory fs-cache backend is probably out there (I wonder if you can write it in kernel mode), at worst, it looks like you can use cachefilesd on tempfs?

Matt
----- "Elso Andras" <elso.andras@gmail.com> wrote:

> Hi,
> 
> Oh, i forgot about this daemon... but this daemon cache the data to
> file. Thus it's useless, the caching to disk is more slow than the
> whole osds.
> 
> Elbandi
> 
> 2013/6/17 Milosz Tanski <milosz@adfin.com>:
> > Elbandi,
> >
> > It looks like it's trying to use fscache (from the stats) but
> there's
> > no data. Did you install, configure and enable the cachefilesd
> daemon?
> > It's the user-space component of fscache. It's the only officially
> > supported fsache backed by Ubuntu, RHEL & SUSE. I'm guessing that's
> > your problem since I don't see any of the bellow lines in your
> dmesg
> > snippet.
> >
> > [2049099.198234] CacheFiles: Loaded
> > [2049099.541721] FS-Cache: Cache "mycache" added (type cachefiles)
> > [2049099.541727] CacheFiles: File cache on md0 registered
> >
> > - Milosz
> >
> > On Mon, Jun 17, 2013 at 11:47 AM, Elso Andras
> <elso.andras@gmail.com> wrote:
> >> Hi,
> >>
> >>
> >>> 1) In the graphs you attached what am I looking at? My best guess
> is that
> >>> it's traffic on a 10gigE card, but I can't tell from the graph
> since there's
> >>> no labels.
> >> Yes, 10G traffic on switch port. So "incoming" means
> server-to-switch,
> >> "outgoing" means switch-to-server. No separated card for ceph
> traffic
> >> :(
> >>
> >>> 2) Can you give me more info about your serving case. What
> application are
> >>> you using to serve the video (http server)? Are you serving static
> mp4 files
> >>> from Ceph filesystem?
> >> lighttpd server with mp4 streaming mod
> >>
> (http://h264.code-shop.com/trac/wiki/Mod-H264-Streaming-Lighttpd-Version2),
> >> the files lives on cephfs.
> >> there is a speed limit, controlled by mp4 mod. the bandwidth is
> the
> >> video bitrate value.
> >>
> >> mount options:
> >> name=test,rsize=0,rasize=131072,noshare,fsc,key=client.test
> >>
> >> rsize=0 and rasize=131072 is a tested, with other values there was
> 4x
> >> incoming (from osd) traffic than outgoing (to internet) traffic.
> >>
> >>> 3) What's the hardware, most importantly how big is your partition
> that
> >>> cachefilesd is on and what kind of disk are you hosting it on
> (rotating,
> >>> SSD)?
> >> there are 5 osd servers: HP DL380 G6, 32G ram, 16 X HP sas disk
> (10k
> >> rpm) with raid0. bonding two 1G interface together.
> >> (In previous life, this hw could serve the ~2.3G traffic with
> raid5
> >> and three bonding interface)
> >>
> >>> 4) Statistics from fscache. Can you paste the output
> /proc/fs/fscache/stats
> >>> and /proc/fs/fscache/histogram.
> >>
> >> FS-Cache statistics
> >> Cookies: idx=1 dat=8001 spc=0
> >> Objects: alc=0 nal=0 avl=0 ded=0
> >> ChkAux : non=0 ok=0 upd=0 obs=0
> >> Pages  : mrk=0 unc=0
> >> Acquire: n=8002 nul=0 noc=0 ok=8002 nbf=0 oom=0
> >> Lookups: n=0 neg=0 pos=0 crt=0 tmo=0
> >> Invals : n=0 run=0
> >> Updates: n=0 nul=0 run=0
> >> Relinqs: n=2265 nul=0 wcr=0 rtr=0
> >> AttrChg: n=0 ok=0 nbf=0 oom=0 run=0
> >> Allocs : n=0 ok=0 wt=0 nbf=0 int=0
> >> Allocs : ops=0 owt=0 abt=0
> >> Retrvls: n=2983745 ok=0 wt=0 nod=0 nbf=2983745 int=0 oom=0
> >> Retrvls: ops=0 owt=0 abt=0
> >> Stores : n=0 ok=0 agn=0 nbf=0 oom=0
> >> Stores : ops=0 run=0 pgs=0 rxd=0 olm=0
> >> VmScan : nos=0 gon=0 bsy=0 can=0 wt=0
> >> Ops    : pend=0 run=0 enq=0 can=0 rej=0
> >> Ops    : dfr=0 rel=0 gc=0
> >> CacheOp: alo=0 luo=0 luc=0 gro=0
> >> CacheOp: inv=0 upo=0 dro=0 pto=0 atc=0 syn=0
> >> CacheOp: rap=0 ras=0 alp=0 als=0 wrp=0 ucp=0 dsp=0
> >>
> >> No histogram, i try to build to enable this.
> >>
> >>> 5) dmesg lines for ceph/fscache/cachefiles like:
> >> [  264.186887] FS-Cache: Loaded
> >> [  264.223851] Key type ceph registered
> >> [  264.223902] libceph: loaded (mon/osd proto 15/24)
> >> [  264.246334] FS-Cache: Netfs 'ceph' registered for caching
> >> [  264.246341] ceph: loaded (mds proto 32)
> >> [  264.249497] libceph: client31274 fsid
> 1d78ebe5-f254-44ff-81c1-f641bb2036b6
> >>
> >>
> >> Elbandi
> --
> To unsubscribe from this list: send the line "unsubscribe ceph-devel"
> in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

-- 
Matt Benjamin
The Linux Box
206 South Fifth Ave. Suite 150
Ann Arbor, MI  48104

http://linuxbox.com

tel.  734-761-4689 
fax.  734-769-8938 
cel.  734-216-5309 

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 2/2] Enable fscache as an optional feature of ceph.
  2013-06-17 17:43               ` Milosz Tanski
@ 2013-06-17 18:08                 ` Milosz Tanski
  0 siblings, 0 replies; 13+ messages in thread
From: Milosz Tanski @ 2013-06-17 18:08 UTC (permalink / raw)
  To: Elso Andras; +Cc: ceph-devel

I forgot to mention that when it comes to our distributed databases
there's locality of caching too. The "frontend" database machines have
certain segments they are responsible for and when rebalancing we have
a high affinity to not "move" segments from node to node.

- Milosz

On Mon, Jun 17, 2013 at 1:43 PM, Milosz Tanski <milosz@adfin.com> wrote:
> Elso,
>
> It does cache the data to the file, thus it may not be useful for your
> situation. By default the ceph filesystem already uses the (in memory)
> page cache provided by linux kernel. So if that's all you want, than
> you're good with the current implementation.
>
> Generally large sequential data transfers will not be improved
> (although there's cases where we observed improvements). The
> motivation for us to implement fscache has been the following
> use-case.
>
> We have a large distributed analytics databases (built in house) and
> we have a few different access patterns present. First, there's
> seemingly random access on the compressed indexes. Second, there's
> also random access in the column data files for extent indexes.
> Finally, there's either sequential or random access over the actual
> data (depending on the query).
>
> In our cases the machines that run the database have multiple large
> SSD drives in a raid0 configuration. We're using it the SSD drives for
> scratch storage (housekeeping background jobs) and the ceph fscache.
> In some conditions we can get up to 1GB/s reads from these SSD drives.
>
> We're currently in our last stages of deploying this to production.
> And for most workloads our query performance for data stored locally
> versus on ceph backed by fscache is pretty much the same. Our biggest
> gain probably comes from much lower latency to get metadata and
> indexes to make the query due the large number random iops the SSD
> drives afford us. I'm going to published some updates numbers compared
> to previous quick and dirty prototype.
>
> I realize that's not going to be the case for everybody. However, if
> you have a data access pattern that follows the 80/20 rule or the
> zipfan distribution and fast local disks for caching this is a great.
>
> Thanks,
> - Milosz
>
>
> On Mon, Jun 17, 2013 at 1:09 PM, Elso Andras <elso.andras@gmail.com> wrote:
>> Hi,
>>
>> Oh, i forgot about this daemon... but this daemon cache the data to
>> file. Thus it's useless, the caching to disk is more slow than the
>> whole osds.
>>
>> Elbandi
>>
>> 2013/6/17 Milosz Tanski <milosz@adfin.com>:
>>> Elbandi,
>>>
>>> It looks like it's trying to use fscache (from the stats) but there's
>>> no data. Did you install, configure and enable the cachefilesd daemon?
>>> It's the user-space component of fscache. It's the only officially
>>> supported fsache backed by Ubuntu, RHEL & SUSE. I'm guessing that's
>>> your problem since I don't see any of the bellow lines in your dmesg
>>> snippet.
>>>
>>> [2049099.198234] CacheFiles: Loaded
>>> [2049099.541721] FS-Cache: Cache "mycache" added (type cachefiles)
>>> [2049099.541727] CacheFiles: File cache on md0 registered
>>>
>>> - Milosz
>>>
>>> On Mon, Jun 17, 2013 at 11:47 AM, Elso Andras <elso.andras@gmail.com> wrote:
>>>> Hi,
>>>>
>>>>
>>>>> 1) In the graphs you attached what am I looking at? My best guess is that
>>>>> it's traffic on a 10gigE card, but I can't tell from the graph since there's
>>>>> no labels.
>>>> Yes, 10G traffic on switch port. So "incoming" means server-to-switch,
>>>> "outgoing" means switch-to-server. No separated card for ceph traffic
>>>> :(
>>>>
>>>>> 2) Can you give me more info about your serving case. What application are
>>>>> you using to serve the video (http server)? Are you serving static mp4 files
>>>>> from Ceph filesystem?
>>>> lighttpd server with mp4 streaming mod
>>>> (http://h264.code-shop.com/trac/wiki/Mod-H264-Streaming-Lighttpd-Version2),
>>>> the files lives on cephfs.
>>>> there is a speed limit, controlled by mp4 mod. the bandwidth is the
>>>> video bitrate value.
>>>>
>>>> mount options:
>>>> name=test,rsize=0,rasize=131072,noshare,fsc,key=client.test
>>>>
>>>> rsize=0 and rasize=131072 is a tested, with other values there was 4x
>>>> incoming (from osd) traffic than outgoing (to internet) traffic.
>>>>
>>>>> 3) What's the hardware, most importantly how big is your partition that
>>>>> cachefilesd is on and what kind of disk are you hosting it on (rotating,
>>>>> SSD)?
>>>> there are 5 osd servers: HP DL380 G6, 32G ram, 16 X HP sas disk (10k
>>>> rpm) with raid0. bonding two 1G interface together.
>>>> (In previous life, this hw could serve the ~2.3G traffic with raid5
>>>> and three bonding interface)
>>>>
>>>>> 4) Statistics from fscache. Can you paste the output /proc/fs/fscache/stats
>>>>> and /proc/fs/fscache/histogram.
>>>>
>>>> FS-Cache statistics
>>>> Cookies: idx=1 dat=8001 spc=0
>>>> Objects: alc=0 nal=0 avl=0 ded=0
>>>> ChkAux : non=0 ok=0 upd=0 obs=0
>>>> Pages  : mrk=0 unc=0
>>>> Acquire: n=8002 nul=0 noc=0 ok=8002 nbf=0 oom=0
>>>> Lookups: n=0 neg=0 pos=0 crt=0 tmo=0
>>>> Invals : n=0 run=0
>>>> Updates: n=0 nul=0 run=0
>>>> Relinqs: n=2265 nul=0 wcr=0 rtr=0
>>>> AttrChg: n=0 ok=0 nbf=0 oom=0 run=0
>>>> Allocs : n=0 ok=0 wt=0 nbf=0 int=0
>>>> Allocs : ops=0 owt=0 abt=0
>>>> Retrvls: n=2983745 ok=0 wt=0 nod=0 nbf=2983745 int=0 oom=0
>>>> Retrvls: ops=0 owt=0 abt=0
>>>> Stores : n=0 ok=0 agn=0 nbf=0 oom=0
>>>> Stores : ops=0 run=0 pgs=0 rxd=0 olm=0
>>>> VmScan : nos=0 gon=0 bsy=0 can=0 wt=0
>>>> Ops    : pend=0 run=0 enq=0 can=0 rej=0
>>>> Ops    : dfr=0 rel=0 gc=0
>>>> CacheOp: alo=0 luo=0 luc=0 gro=0
>>>> CacheOp: inv=0 upo=0 dro=0 pto=0 atc=0 syn=0
>>>> CacheOp: rap=0 ras=0 alp=0 als=0 wrp=0 ucp=0 dsp=0
>>>>
>>>> No histogram, i try to build to enable this.
>>>>
>>>>> 5) dmesg lines for ceph/fscache/cachefiles like:
>>>> [  264.186887] FS-Cache: Loaded
>>>> [  264.223851] Key type ceph registered
>>>> [  264.223902] libceph: loaded (mon/osd proto 15/24)
>>>> [  264.246334] FS-Cache: Netfs 'ceph' registered for caching
>>>> [  264.246341] ceph: loaded (mds proto 32)
>>>> [  264.249497] libceph: client31274 fsid 1d78ebe5-f254-44ff-81c1-f641bb2036b6
>>>>
>>>>
>>>> Elbandi

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH 2/2] Enable fscache as an optional feature of ceph.
@ 2013-06-20  3:44 Milosz Tanski
  0 siblings, 0 replies; 13+ messages in thread
From: Milosz Tanski @ 2013-06-20  3:44 UTC (permalink / raw)
  To: ceph-devel; +Cc: Sage Weil, linux-cachefs, Yan, Zheng

Adding support for fscache to the Ceph filesystem. This would bring it to on
par with some of the other network filesystems in Linux (like NFS, AFS, etc...)

This exploits the existing Ceph cache & lazyio capabilities.

In order to mount the filesystem with fscache the 'fsc' mount option must be
passed.

Signed-off-by: Milosz Tanski <milosz@adfin.com>
---
 fs/ceph/Kconfig  |    9 ++++++
 fs/ceph/Makefile |    2 ++
 fs/ceph/addr.c   |   86 +++++++++++++++++++++++++++++++++++++++++-------------
 fs/ceph/caps.c   |   21 ++++++++++++-
 fs/ceph/file.c   |    9 ++++++
 fs/ceph/inode.c  |   25 ++++++++++++++--
 fs/ceph/super.c  |   39 +++++++++++++++++++++++--
 fs/ceph/super.h  |   13 +++++++++
 8 files changed, 178 insertions(+), 26 deletions(-)

diff --git a/fs/ceph/Kconfig b/fs/ceph/Kconfig
index 49bc782..ac9a2ef 100644
--- a/fs/ceph/Kconfig
+++ b/fs/ceph/Kconfig
@@ -16,3 +16,12 @@ config CEPH_FS

   If unsure, say N.

+if CEPH_FS
+config CEPH_FSCACHE
+ bool "Enable Ceph client caching support"
+ depends on CEPH_FS=m && FSCACHE || CEPH_FS=y && FSCACHE=y
+ help
+  Choose Y here to enable persistent, read-only local
+  caching support for Ceph clients using FS-Cache
+
+endif
diff --git a/fs/ceph/Makefile b/fs/ceph/Makefile
index bd35212..0af0678 100644
--- a/fs/ceph/Makefile
+++ b/fs/ceph/Makefile
@@ -9,3 +9,5 @@ ceph-y := super.o inode.o dir.o file.o locks.o addr.o ioctl.o \
  mds_client.o mdsmap.o strings.o ceph_frag.o \
  debugfs.o

+ceph-$(CONFIG_CEPH_FSCACHE) += cache.o
+
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 3e68ac1..ab026a1 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -11,6 +11,7 @@

 #include "super.h"
 #include "mds_client.h"
+#include "cache.h"
 #include <linux/ceph/osd_client.h>

 /*
@@ -149,11 +150,26 @@ static void ceph_invalidatepage(struct page
*page, unsigned long offset)
  struct ceph_inode_info *ci;
  struct ceph_snap_context *snapc = page_snap_context(page);

- BUG_ON(!PageLocked(page));
- BUG_ON(!PagePrivate(page));
  BUG_ON(!page->mapping);

  inode = page->mapping->host;
+ ci = ceph_inode(inode);
+
+ if (offset != 0) {
+ dout("%p invalidatepage %p idx %lu partial dirty page\n",
+     inode, page, page->index);
+ return;
+ }
+
+#ifdef CONFIG_CEPH_FSCACHE
+ if (PageFsCache(page))
+ ceph_invalidate_fscache_page(inode, page);
+#endif
+
+ if (!PagePrivate(page))
+ return;
+
+ BUG_ON(!PageLocked(page));

  /*
  * We can get non-dirty pages here due to races between
@@ -163,31 +179,32 @@ static void ceph_invalidatepage(struct page
*page, unsigned long offset)
  if (!PageDirty(page))
  pr_err("%p invalidatepage %p page not dirty\n", inode, page);

- if (offset == 0)
- ClearPageChecked(page);
+ ClearPageChecked(page);

- ci = ceph_inode(inode);
- if (offset == 0) {
- dout("%p invalidatepage %p idx %lu full dirty page %lu\n",
-     inode, page, page->index, offset);
- ceph_put_wrbuffer_cap_refs(ci, 1, snapc);
- ceph_put_snap_context(snapc);
- page->private = 0;
- ClearPagePrivate(page);
- } else {
- dout("%p invalidatepage %p idx %lu partial dirty page\n",
-     inode, page, page->index);
- }
+ dout("%p invalidatepage %p idx %lu full dirty page %lu\n",
+     inode, page, page->index, offset);
+
+ ceph_put_wrbuffer_cap_refs(ci, 1, snapc);
+ ceph_put_snap_context(snapc);
+ page->private = 0;
+ ClearPagePrivate(page);
 }

-/* just a sanity check */
 static int ceph_releasepage(struct page *page, gfp_t g)
 {
  struct inode *inode = page->mapping ? page->mapping->host : NULL;
  dout("%p releasepage %p idx %lu\n", inode, page, page->index);
  WARN_ON(PageDirty(page));
- WARN_ON(PagePrivate(page));
- return 0;
+
+#ifdef CONFIG_CEPH_FSCACHE
+ /* Can we release the page from the cache? */
+ if (PageFsCache(page) && ceph_release_fscache_page(page, g) == 0)
+ return 0;
+#endif
+ if (PagePrivate(page))
+ return 0;
+
+ return 1;
 }

 /*
@@ -197,11 +214,18 @@ static int readpage_nounlock(struct file *filp,
struct page *page)
 {
  struct inode *inode = file_inode(filp);
  struct ceph_inode_info *ci = ceph_inode(inode);
- struct ceph_osd_client *osdc =
+ struct ceph_osd_client *osdc =
  &ceph_inode_to_client(inode)->client->osdc;
  int err = 0;
  u64 len = PAGE_CACHE_SIZE;

+#ifdef CONFIG_CEPH_FSCACHE
+ err = ceph_readpage_from_fscache(inode, page);
+
+ if (err == 0)
+ goto out;
+#endif
+
  dout("readpage inode %p file %p page %p index %lu\n",
      inode, filp, page, page->index);
  err = ceph_osdc_readpages(osdc, ceph_vino(inode), &ci->i_layout,
@@ -219,6 +243,11 @@ static int readpage_nounlock(struct file *filp,
struct page *page)
  }
  SetPageUptodate(page);

+#ifdef CONFIG_CEPH_FSCACHE
+ if (err == 0)
+ ceph_readpage_to_fscache(inode, page);
+#endif
+
 out:
  return err < 0 ? err : 0;
 }
@@ -261,6 +290,9 @@ static void finish_read(struct ceph_osd_request
*req, struct ceph_msg *msg)
      page->index);
  flush_dcache_page(page);
  SetPageUptodate(page);
+#ifdef CONFIG_CEPH_FSCACHE
+ ceph_readpage_to_fscache(inode, page);
+#endif
  unlock_page(page);
  page_cache_release(page);
  bytes -= PAGE_CACHE_SIZE;
@@ -330,7 +362,7 @@ static int start_read(struct inode *inode, struct
list_head *page_list, int max)
  page = list_entry(page_list->prev, struct page, lru);
  BUG_ON(PageLocked(page));
  list_del(&page->lru);
-
+
  dout("start_read %p adding %p idx %lu\n", inode, page,
      page->index);
  if (add_to_page_cache_lru(page, &inode->i_data, page->index,
@@ -377,6 +409,14 @@ static int ceph_readpages(struct file *file,
struct address_space *mapping,
  int rc = 0;
  int max = 0;

+#ifdef CONFIG_CEPH_FSCACHE
+ rc = ceph_readpages_from_fscache(mapping->host, mapping, page_list,
+ &nr_pages);
+
+ if (rc == 0)
+ goto out;
+#endif
+
  if (fsc->mount_options->rsize >= PAGE_CACHE_SIZE)
  max = (fsc->mount_options->rsize + PAGE_CACHE_SIZE - 1)
  >> PAGE_SHIFT;
@@ -490,6 +530,10 @@ static int writepage_nounlock(struct page *page,
struct writeback_control *wbc)
     CONGESTION_ON_THRESH(fsc->mount_options->congestion_kb))
  set_bdi_congested(&fsc->backing_dev_info, BLK_RW_ASYNC);

+#ifdef CONFIG_CEPH_FSCACHE
+ ceph_readpage_to_fscache(inode, page);
+#endif
+
  set_page_writeback(page);
  err = ceph_osdc_writepages(osdc, ceph_vino(inode),
    &ci->i_layout, snapc,
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index da0f9b8..7e8d8d3 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -10,6 +10,7 @@

 #include "super.h"
 #include "mds_client.h"
+#include "cache.h"
 #include <linux/ceph/decode.h>
 #include <linux/ceph/messenger.h>

@@ -486,8 +487,14 @@ static void __check_cap_issue(struct
ceph_inode_info *ci, struct ceph_cap *cap,
  * i_rdcache_gen.
  */
  if ((issued & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) &&
-    (had & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) == 0)
+    (had & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) == 0) {
  ci->i_rdcache_gen++;
+#ifdef CONFIG_CEPH_FSCACHE
+ /* Invalidate the cache for the whole file. */
+ dout("Invalidating inode data cache: %p", &ci->vfs_inode);
+ fscache_invalidate(ci->fscache);
+#endif
+ }

  /*
  * if we are newly issued FILE_SHARED, mark dir not complete; we
@@ -2356,6 +2363,12 @@ static void handle_cap_grant(struct inode
*inode, struct ceph_mds_caps *grant,
  if (((cap->issued & ~newcaps) & CEPH_CAP_FILE_CACHE) &&
     (newcaps & CEPH_CAP_FILE_LAZYIO) == 0 &&
     !ci->i_wrbuffer_ref) {
+
+#ifdef CONFIG_CEPH_FSCACHE
+ /* Close the fscache on inode */
+ ceph_fscache_unregister_inode_cookie(ci);
+#endif
+
  if (try_nonblocking_invalidate(inode) == 0) {
  revoked_rdcache = 1;
  } else {
@@ -2425,6 +2438,12 @@ static void handle_cap_grant(struct inode
*inode, struct ceph_mds_caps *grant,
  wake = 1;
  }

+#ifdef CONFIG_CEPH_FSCACHE
+ /* Register cache (if needed); perform this after amny size change. */
+ if ((issued & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)))
+ ceph_fscache_register_inode_cookie(session->s_mdsc->fsc, ci);
+#endif
+
  /* check cap bits */
  wanted = __ceph_caps_wanted(ci);
  used = __ceph_caps_used(ci);
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 656e169..1ec8df4 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -11,6 +11,7 @@

 #include "super.h"
 #include "mds_client.h"
+#include "cache.h"

 /*
  * Ceph file operations
@@ -67,10 +68,17 @@ out:
 static int ceph_init_file(struct inode *inode, struct file *file, int fmode)
 {
  struct ceph_file_info *cf;
+ struct ceph_inode_info *ci = ceph_inode(inode);
+ struct ceph_fs_client *fsc = ceph_sb_to_client(inode->i_sb);
  int ret = 0;

  switch (inode->i_mode & S_IFMT) {
  case S_IFREG:
+#ifdef CONFIG_CEPH_FSCACHE
+ spin_lock(&ci->i_ceph_lock);
+ ceph_fscache_register_inode_cookie(fsc, ci);
+ spin_unlock(&ci->i_ceph_lock);
+#endif
  case S_IFDIR:
  dout("init_file %p %p 0%o (regular)\n", inode, file,
      inode->i_mode);
@@ -181,6 +189,7 @@ int ceph_open(struct inode *inode, struct file *file)
  spin_unlock(&ci->i_ceph_lock);
  return ceph_init_file(inode, file, fmode);
  }
+
  spin_unlock(&ci->i_ceph_lock);

  dout("open fmode %d wants %s\n", fmode, ceph_cap_string(wanted));
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index be0f7e2..620b84c 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -12,6 +12,7 @@

 #include "super.h"
 #include "mds_client.h"
+#include "cache.h"
 #include <linux/ceph/decode.h>

 /*
@@ -377,6 +378,10 @@ struct inode *ceph_alloc_inode(struct super_block *sb)

  INIT_WORK(&ci->i_vmtruncate_work, ceph_vmtruncate_work);

+#ifdef CONFIG_CEPH_FSCACHE
+ ci->fscache = NULL;
+#endif
+
  return &ci->vfs_inode;
 }

@@ -396,6 +401,10 @@ void ceph_destroy_inode(struct inode *inode)

  dout("destroy_inode %p ino %llx.%llx\n", inode, ceph_vinop(inode));

+#ifdef CONFIG_CEPH_FSCACHE
+ ceph_fscache_unregister_inode_cookie(ci);
+#endif
+
  ceph_queue_caps_release(inode);

  /*
@@ -430,7 +439,6 @@ void ceph_destroy_inode(struct inode *inode)
  call_rcu(&inode->i_rcu, ceph_i_callback);
 }

-
 /*
  * Helpers to fill in size, ctime, mtime, and atime.  We have to be
  * careful because either the client or MDS may have more up to date
@@ -633,6 +641,14 @@ static int fill_inode(struct inode *inode,
     le32_to_cpu(info->time_warp_seq),
     &ctime, &mtime, &atime);

+#ifdef CONFIG_CEPH_FSCACHE
+ /* Notify the cache that size has changed */
+ if (queue_trunc && ci->fscache) {
+ pr_info("size changed inode: %p cap flags\n", &ci->vfs_inode);
+ fscache_attr_changed(ci->fscache);
+ }
+#endif
+
  /* only update max_size on auth cap */
  if ((info->cap.flags & CEPH_CAP_FLAG_AUTH) &&
     ci->i_max_size != le64_to_cpu(info->max_size)) {
@@ -1066,7 +1082,7 @@ int ceph_fill_trace(struct super_block *sb,
struct ceph_mds_request *req,
  * complete.
  */
  ceph_set_dentry_offset(req->r_old_dentry);
- dout("dn %p gets new offset %lld\n", req->r_old_dentry,
+ dout("dn %p gets new offset %lld\n", req->r_old_dentry,
      ceph_dentry(req->r_old_dentry)->offset);

  dn = req->r_old_dentry;  /* use old_dentry */
@@ -1430,6 +1446,11 @@ static void ceph_invalidate_work(struct
work_struct *work)
  orig_gen = ci->i_rdcache_gen;
  spin_unlock(&ci->i_ceph_lock);

+#ifdef CONFIG_CEPH_FSCACHE
+ pr_info("cache invalidating inode: %p cap flags\n", &ci->vfs_inode);
+ fscache_invalidate(ci->fscache);
+#endif
+
  truncate_inode_pages(&inode->i_data, 0);

  spin_lock(&ci->i_ceph_lock);
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index 7d377c9..850c161 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -17,6 +17,7 @@

 #include "super.h"
 #include "mds_client.h"
+#include "cache.h"

 #include <linux/ceph/ceph_features.h>
 #include <linux/ceph/decode.h>
@@ -142,6 +143,8 @@ enum {
  Opt_nodcache,
  Opt_ino32,
  Opt_noino32,
+ Opt_fscache,
+ Opt_nofscache
 };

 static match_table_t fsopt_tokens = {
@@ -167,6 +170,8 @@ static match_table_t fsopt_tokens = {
  {Opt_nodcache, "nodcache"},
  {Opt_ino32, "ino32"},
  {Opt_noino32, "noino32"},
+ {Opt_fscache, "fsc"},
+ {Opt_nofscache, "nofsc"},
  {-1, NULL}
 };

@@ -260,6 +265,12 @@ static int parse_fsopt_token(char *c, void *private)
  case Opt_noino32:
  fsopt->flags &= ~CEPH_MOUNT_OPT_INO32;
  break;
+ case Opt_fscache:
+ fsopt->flags |= CEPH_MOUNT_OPT_FSCACHE;
+ break;
+ case Opt_nofscache:
+ fsopt->flags &= ~CEPH_MOUNT_OPT_FSCACHE;
+ break;
  default:
  BUG_ON(token);
  }
@@ -422,6 +433,10 @@ static int ceph_show_options(struct seq_file *m,
struct dentry *root)
  seq_puts(m, ",dcache");
  else
  seq_puts(m, ",nodcache");
+ if (fsopt->flags & CEPH_MOUNT_OPT_FSCACHE)
+ seq_puts(m, ",fsc");
+ else
+ seq_puts(m, ",nofsc");

  if (fsopt->wsize)
  seq_printf(m, ",wsize=%d", fsopt->wsize);
@@ -530,6 +545,11 @@ static struct ceph_fs_client
*create_fs_client(struct ceph_mount_options *fsopt,
  if (!fsc->wb_pagevec_pool)
  goto fail_trunc_wq;

+#ifdef CONFIG_CEPH_FSCACHE
+ if ((fsopt->flags & CEPH_MOUNT_OPT_FSCACHE))
+ ceph_fscache_register_fsid_cookie(fsc);
+#endif
+
  /* caps */
  fsc->min_caps = fsopt->max_readdir;

@@ -554,6 +574,10 @@ static void destroy_fs_client(struct ceph_fs_client *fsc)
 {
  dout("destroy_fs_client %p\n", fsc);

+#ifdef CONFIG_CEPH_FSCACHE
+ ceph_fscache_unregister_fsid_cookie(fsc);
+#endif
+
  destroy_workqueue(fsc->wb_wq);
  destroy_workqueue(fsc->pg_inv_wq);
  destroy_workqueue(fsc->trunc_wq);
@@ -588,6 +612,8 @@ static void ceph_inode_init_once(void *foo)

 static int __init init_caches(void)
 {
+ int error = -ENOMEM;
+
  ceph_inode_cachep = kmem_cache_create("ceph_inode_info",
       sizeof(struct ceph_inode_info),
       __alignof__(struct ceph_inode_info),
@@ -611,15 +637,19 @@ static int __init init_caches(void)
  if (ceph_file_cachep == NULL)
  goto bad_file;

- return 0;
+#ifdef CONFIG_CEPH_FSCACHE
+ if ((error = fscache_register_netfs(&ceph_cache_netfs)))
+ goto bad_file;
+#endif

+ return 0;
 bad_file:
  kmem_cache_destroy(ceph_dentry_cachep);
 bad_dentry:
  kmem_cache_destroy(ceph_cap_cachep);
 bad_cap:
  kmem_cache_destroy(ceph_inode_cachep);
- return -ENOMEM;
+ return error;
 }

 static void destroy_caches(void)
@@ -629,10 +659,15 @@ static void destroy_caches(void)
  * destroy cache.
  */
  rcu_barrier();
+
  kmem_cache_destroy(ceph_inode_cachep);
  kmem_cache_destroy(ceph_cap_cachep);
  kmem_cache_destroy(ceph_dentry_cachep);
  kmem_cache_destroy(ceph_file_cachep);
+
+#ifdef CONFIG_CEPH_FSCACHE
+ fscache_unregister_netfs(&ceph_cache_netfs);
+#endif
 }


diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 7ccfdb4..5ddaad5 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -16,6 +16,10 @@

 #include <linux/ceph/libceph.h>

+#ifdef CONFIG_CEPH_FSCACHE
+#include <linux/fscache.h>
+#endif
+
 /* f_type in struct statfs */
 #define CEPH_SUPER_MAGIC 0x00c36400

@@ -29,6 +33,7 @@
 #define CEPH_MOUNT_OPT_NOASYNCREADDIR  (1<<7) /* no dcache readdir */
 #define CEPH_MOUNT_OPT_INO32           (1<<8) /* 32 bit inos */
 #define CEPH_MOUNT_OPT_DCACHE          (1<<9) /* use dcache for readdir etc */
+#define CEPH_MOUNT_OPT_FSCACHE         (1<<10) /* use fscache */

 #define CEPH_MOUNT_OPT_DEFAULT    (CEPH_MOUNT_OPT_RBYTES)

@@ -90,6 +95,10 @@ struct ceph_fs_client {
  struct dentry *debugfs_bdi;
  struct dentry *debugfs_mdsc, *debugfs_mdsmap;
 #endif
+
+#ifdef CONFIG_CEPH_FSCACHE
+ struct fscache_cookie *fscache;
+#endif
 };


@@ -319,6 +328,10 @@ struct ceph_inode_info {

  struct work_struct i_vmtruncate_work;

+#ifdef CONFIG_CEPH_FSCACHE
+ struct fscache_cookie *fscache;
+#endif
+
  struct inode vfs_inode; /* at end */
 };

--
1.7.9.5

^ permalink raw reply related	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2013-06-20  3:44 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2013-05-23 21:55 [PATCH 2/2] Enable fscache as an optional feature of ceph Milosz Tanski
2013-05-28 17:11 ` Sage Weil
2013-05-29 18:06   ` Milosz Tanski
2013-06-05 16:26     ` Milosz Tanski
2013-06-17 13:16     ` Elso Andras
     [not found]       ` <CANP1eJHZskoMVa3KBGMHvxEfNcAJQdDK4ou47meaBDYzPVa=xg@mail.gmail.com>
2013-06-17 14:31         ` Fwd: " Milosz Tanski
2013-06-17 15:47         ` Elso Andras
2013-06-17 16:00           ` Milosz Tanski
2013-06-17 17:09             ` Elso Andras
2013-06-17 17:43               ` Milosz Tanski
2013-06-17 18:08                 ` Milosz Tanski
2013-06-17 17:45               ` Matt W. Benjamin
2013-06-20  3:44 Milosz Tanski

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.