linux-fsdevel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Boaz Harrosh <boazh@netapp.com>
To: linux-fsdevel <linux-fsdevel@vger.kernel.org>
Cc: Ric Wheeler <rwheeler@redhat.com>,
	Miklos Szeredi <mszeredi@redhat.com>,
	Steve French <smfrench@gmail.com>,
	Steven Whitehouse <swhiteho@redhat.com>,
	Jefff moyer <jmoyer@redhat.com>, Sage Weil <sweil@redhat.com>,
	Jan Kara <jack@suse.cz>, Amir Goldstein <amir73il@gmail.com>,
	Andy Rudof <andy.rudoff@intel.com>,
	Anna Schumaker <Anna.Schumaker@netapp.com>,
	Amit Golander <Amit.Golander@netapp.com>,
	Sagi Manole <sagim@netapp.com>,
	Shachar Sharon <Shachar.Sharon@netapp.com>
Subject: [RFC 6/7] zuf: Filesystem operations
Date: Tue, 13 Mar 2018 19:39:50 +0200	[thread overview]
Message-ID: <ba7cddd1-9464-1b90-3738-9a5bc3dcb2b3@netapp.com> (raw)
In-Reply-To: <8a4e01bd-0014-1c44-e89f-9d70ccbe0658@netapp.com>


The principle for all operations is the same.

* The few parameters are given on the despatch IOCTL
  buffer (up to 4k of parameters)

* Any application buffers or other big buffers
  like readdir are mapped via zuf-core to the the Server VM

* The operation is despatched. Return code and few out
  parameter are returned in the despatch-return buffer.
  Any data stored/read at mapped application buffers.

* zus objects like zus_inode symlinks and so on are returned
  through a dpp_t (Dual port pointer) - a special kind of zuf
  construct that enables to have a Kernel pointer and a server
  pointer to the same memory. If pmem is used this is usually
  a pointer to pmem.

  The Kernel's zuf part may even write to this returned pointer
  but it will then send a despatch, for the FS to persist the
  change.

TODO:
	This patch is probably too big. how best to split it?

Signed-off-by: Boaz Harrosh <boazh@netapp.com>
---
 fs/zuf/Makefile    |   2 +-
 fs/zuf/_extern.h   |  48 +++++
 fs/zuf/directory.c | 156 ++++++++++++++
 fs/zuf/file.c      | 403 ++++++++++++++++++++++++++++++++++
 fs/zuf/inode.c     | 617 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
 fs/zuf/namei.c     | 421 ++++++++++++++++++++++++++++++++++++
 fs/zuf/symlink.c   |  76 +++++++
 fs/zuf/zus_api.h   | 234 ++++++++++++++++++++
 8 files changed, 1953 insertions(+), 4 deletions(-)
 create mode 100644 fs/zuf/directory.c
 create mode 100644 fs/zuf/file.c
 create mode 100644 fs/zuf/namei.c
 create mode 100644 fs/zuf/symlink.c

diff --git a/fs/zuf/Makefile b/fs/zuf/Makefile
index 94ce80b..4c125f7 100644
--- a/fs/zuf/Makefile
+++ b/fs/zuf/Makefile
@@ -17,5 +17,5 @@ zuf-y += md.o t2.o t1.o
 zuf-y += zuf-core.o zuf-root.o
 
 # Main FS
-zuf-y += super.o inode.o
+zuf-y += super.o inode.o directory.o file.o namei.o symlink.o
 zuf-y += module.o
diff --git a/fs/zuf/_extern.h b/fs/zuf/_extern.h
index 0543fd8..cf2e80f 100644
--- a/fs/zuf/_extern.h
+++ b/fs/zuf/_extern.h
@@ -19,16 +19,43 @@
  * extern functions declarations
  */
 
+/* directory.c */
+int zuf_add_dentry(struct inode *dir, struct qstr *str,
+		   struct inode *inode, bool rename);
+int zuf_remove_dentry(struct inode *dir, struct qstr *str);
+
 /* inode.c */
+int zuf_evict_dispatch(struct super_block *sb, struct zus_inode_info *zus_ii,
+		       int operation);
 struct inode *zuf_iget(struct super_block *sb, struct zus_inode_info *zus_ii,
 		       zu_dpp_t _zi, bool *exist);
 void zuf_evict_inode(struct inode *inode);
+struct inode *zuf_new_inode(struct inode *dir, umode_t mode,
+			    const struct qstr *qstr, const char *symname,
+			    ulong rdev_or_isize, bool tmpfile);
 int zuf_write_inode(struct inode *inode, struct writeback_control *wbc);
+int zuf_update_time(struct inode *inode, struct timespec *time, int flags);
+int zuf_setattr(struct dentry *dentry, struct iattr *attr);
+int zuf_getattr(const struct path *path, struct kstat *stat,
+		 u32 request_mask, unsigned int flags);
+void zuf_set_inode_flags(struct inode *inode, struct zus_inode *zi);
+bool zuf_dir_emit(struct super_block *sb, struct dir_context *ctx,
+		  ulong ino, const char *name, int length);
+
+/* symlink.c */
+uint zuf_prepare_symname(struct zufs_ioc_new_inode *ioc_new_inode,
+			const char *symname, ulong len, struct page *pages[2]);
+
+/* file.c */
 int zuf_isync(struct inode *inode, loff_t start, loff_t end, int datasync);
 
 /* super.c */
 int zuf_init_inodecache(void);
 void zuf_destroy_inodecache(void);
+
+void zuf_sync_inc(struct inode *inode);
+void zuf_sync_dec(struct inode *inode, ulong write_unmapped);
+
 struct dentry *zuf_mount(struct file_system_type *fs_type, int flags,
 			 const char *dev_name, void *data);
 
@@ -56,4 +83,25 @@ int zuf_register_fs(struct super_block *sb, struct zufs_ioc_register_fs *rfs);
 /* t1.c */
 int zuf_pmem_mmap(struct file *file, struct vm_area_struct *vma);
 
+/*
+ * Inodes and files operations
+ */
+
+/* dir.c */
+extern const struct file_operations zuf_dir_operations;
+
+/* file.c */
+extern const struct inode_operations zuf_file_inode_operations;
+extern const struct file_operations zuf_file_operations;
+
+/* inode.c */
+extern const struct address_space_operations zuf_aops;
+
+/* namei.c */
+extern const struct inode_operations zuf_dir_inode_operations;
+extern const struct inode_operations zuf_special_inode_operations;
+
+/* symlink.c */
+extern const struct inode_operations zuf_symlink_inode_operations;
+
 #endif	/*ndef __ZUF_EXTERN_H__*/
diff --git a/fs/zuf/directory.c b/fs/zuf/directory.c
new file mode 100644
index 0000000..f8f68b8
--- /dev/null
+++ b/fs/zuf/directory.c
@@ -0,0 +1,156 @@
+/*
+ * BRIEF DESCRIPTION
+ *
+ * File operations for directories.
+ *
+ * Copyright (c) 2018 NetApp Inc. All rights reserved.
+ *
+ * ZUFS-License: GPL-2.0 OR BSD-3-Clause. See module.c for LICENSE details.
+ *
+ * Authors:
+ *	Boaz Harrosh <boazh@netapp.com>
+ *	Sagi Manole <sagim@netapp.com>"
+ */
+
+#include <linux/fs.h>
+#include <linux/vmalloc.h>
+#include "zuf.h"
+
+static int zuf_readdir(struct file *file, struct dir_context *ctx)
+{
+	struct inode *inode = file_inode(file);
+	struct super_block *sb = inode->i_sb;
+	loff_t i_size = i_size_read(inode);
+	struct zufs_ioc_readdir ioc_readdir = {
+		.hdr.in_len = sizeof(ioc_readdir),
+		.hdr.out_len = sizeof(ioc_readdir),
+		.hdr.operation = ZUS_OP_READDIR,
+		.dir_ii = ZUII(inode)->zus_ii,
+	};
+	struct zufs_readdir_iter rdi;
+	struct page *pages[ZUS_API_MAP_MAX_PAGES];
+	struct zufs_dir_entry *zde;
+	void *addr, *__a;
+	uint nump, i;
+	int err;
+
+	if (ctx->pos && i_size <= ctx->pos)
+		return 0;
+	if (!i_size)
+		i_size = PAGE_SIZE; /* Just for the . && .. */
+
+	ioc_readdir.hdr.len = min_t(loff_t, i_size - ctx->pos,
+				    ZUS_API_MAP_MAX_SIZE);
+	nump = md_o2p_up(ioc_readdir.hdr.len);
+	addr = vzalloc(md_p2o(nump));
+	if (unlikely(!addr))
+		return -ENOMEM;
+
+	WARN_ON((ulong)addr & (PAGE_SIZE - 1));
+
+	__a = addr;
+	for (i = 0; i < nump; ++i) {
+		pages[i] = vmalloc_to_page(__a);
+		__a += PAGE_SIZE;
+	}
+
+more:
+	ioc_readdir.pos = ctx->pos;
+
+	err = zufs_dispatch(ZUF_ROOT(SBI(sb)), &ioc_readdir.hdr, pages, nump);
+	if (unlikely(err)) {
+		zuf_err("zufs_dispatch failed => %d\n", err);
+		goto out;
+	}
+
+	zufs_readdir_iter_init(&rdi, &ioc_readdir, addr);
+	while ((zde = zufs_next_zde(&rdi)) != NULL) {
+		zuf_dbg_verbose("%s pos=0x%lx\n",
+				zde->zstr.name, (ulong)zde->pos);
+		ctx->pos = zde->pos;
+		if (!dir_emit(ctx, zde->zstr.name, zde->zstr.len, zde->ino,
+			      zde->type))
+			goto out;
+	}
+	ctx->pos = ioc_readdir.pos;
+	if (ioc_readdir.more) {
+		zuf_dbg_err("more\n");
+		goto more;
+	}
+out:
+	vfree(addr);
+	return err;
+}
+
+/*
+ *FIXME comment to full git diff
+ */
+
+static int _dentry_dispatch(struct inode *dir, struct inode *inode,
+			    struct qstr *str, int operation)
+{
+	struct zufs_ioc_dentry ioc_dentry = {
+		.hdr.operation = operation,
+		.hdr.in_len = sizeof(ioc_dentry),
+		.hdr.out_len = sizeof(ioc_dentry),
+		.zus_ii = inode ? ZUII(inode)->zus_ii : NULL,
+		.zus_dir_ii = ZUII(dir)->zus_ii,
+		.str.len = str->len,
+	};
+	int err;
+
+	memcpy(&ioc_dentry.str.name, str->name, str->len);
+
+	err = zufs_dispatch(ZUF_ROOT(SBI(dir->i_sb)), &ioc_dentry.hdr, NULL, 0);
+	if (unlikely(err)) {
+		zuf_err("op=%d zufs_dispatch failed => %d\n", operation, err);
+		return err;
+	}
+
+	return 0;
+}
+
+/* return pointer to added de on success, err-code on failure */
+int zuf_add_dentry(struct inode *dir, struct qstr *str, struct inode *inode,
+		   bool rename)
+{
+	struct zuf_inode_info *zii = ZUII(dir);
+	int err;
+
+	if (!str->len || !zii->zi)
+		return -EINVAL;
+
+	zus_inode_cmtime_now(dir, zii->zi);
+	err = _dentry_dispatch(dir, inode, str, ZUS_OP_ADD_DENTRY);
+	if (unlikely(err)) {
+		zuf_err("_dentry_dispatch failed => %d\n", err);
+		return err;
+	}
+	i_size_write(dir, le64_to_cpu(zii->zi->i_size));
+
+	return 0;
+}
+
+int zuf_remove_dentry(struct inode *dir, struct qstr *str)
+{
+	struct zuf_inode_info *zii = ZUII(dir);
+	int err;
+
+	if (!str->len)
+		return -EINVAL;
+
+	zus_inode_cmtime_now(dir, zii->zi);
+	err = _dentry_dispatch(dir, NULL, str, ZUS_OP_REMOVE_DENTRY);
+	if (unlikely(err))
+		return err;
+
+	i_size_write(dir, le64_to_cpu(zii->zi->i_size));
+	return 0;
+}
+
+const struct file_operations zuf_dir_operations = {
+	.llseek		= generic_file_llseek,
+	.read		= generic_read_dir,
+	.iterate_shared	= zuf_readdir,
+	.fsync		= noop_fsync,
+};
diff --git a/fs/zuf/file.c b/fs/zuf/file.c
new file mode 100644
index 0000000..3b37d9f
--- /dev/null
+++ b/fs/zuf/file.c
@@ -0,0 +1,403 @@
+/*
+ * BRIEF DESCRIPTION
+ *
+ * File operations for files.
+ *
+ * Copyright (c) 2018 NetApp Inc. All rights reserved.
+ *
+ * ZUFS-License: GPL-2.0 OR BSD-3-Clause. See module.c for LICENSE details.
+ *
+ * Authors:
+ *	Boaz Harrosh <boazh@netapp.com>
+ *	Sagi Manole <sagim@netapp.com>"
+ */
+
+#include <linux/fs.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/uio.h>
+#include <linux/mm.h>
+#include <linux/uaccess.h>
+#include <linux/falloc.h>
+#include <linux/mman.h>
+#include <linux/fadvise.h>
+#include "zuf.h"
+
+static long zuf_fallocate(struct file *file, int mode, loff_t offset,
+			   loff_t len)
+{
+	struct inode *inode = file->f_path.dentry->d_inode;
+	struct zuf_inode_info *zii = ZUII(inode);
+	struct zufs_ioc_range ioc_range = {
+		.hdr.in_len = sizeof(ioc_range),
+		.hdr.operation = ZUS_OP_FALLOCATE,
+		.zus_ii = ZUII(inode)->zus_ii,
+		.offset = offset,
+		.length = len,
+		.opflags = mode,
+	};
+	int err;
+
+	zuf_dbg_vfs("[%ld] mode=0x%x offset=0x%llx len=0x%llx\n",
+		     inode->i_ino, mode, offset, len);
+
+	if (!S_ISREG(inode->i_mode))
+		return -EINVAL;
+
+	zuf_w_lock(zii);
+
+	if (!(mode & FALLOC_FL_KEEP_SIZE) &&
+	     (i_size_read(inode) < offset + len)) {
+		err = inode_newsize_ok(inode, offset + len);
+		if (unlikely(err))
+			goto out;
+	}
+
+	zus_inode_cmtime_now(inode, zii->zi);
+
+	err = zufs_dispatch(ZUF_ROOT(SBI(inode->i_sb)), &ioc_range.hdr,
+			    NULL, 0);
+	if (unlikely(err))
+		zuf_err("zufs_dispatch failed => %d\n", err);
+
+	i_size_write(inode, le64_to_cpu(zii->zi->i_size));
+	inode->i_blocks = le64_to_cpu(zii->zi->i_blocks);
+
+out:
+	zuf_w_unlock(zii);
+
+	return err;
+}
+
+static loff_t zuf_llseek(struct file *file, loff_t offset, int whence)
+{
+	struct inode *inode = file->f_path.dentry->d_inode;
+	struct zuf_inode_info *zii = ZUII(inode);
+	struct zufs_ioc_seek ioc_seek = {
+		.hdr.in_len = sizeof(ioc_seek),
+		.hdr.out_len = sizeof(ioc_seek),
+		.hdr.operation = ZUS_OP_LLSEEK,
+		.zus_ii = zii->zus_ii,
+		.offset_in = offset,
+		.whence = whence,
+	};
+	int err = 0;
+
+	zuf_dbg_vfs("[%ld] offset=0x%llx whence=%d\n",
+		     inode->i_ino, offset, whence);
+
+	if (whence != SEEK_DATA && whence != SEEK_HOLE)
+		return generic_file_llseek(file, offset, whence);
+
+	zuf_r_lock(zii);
+
+	if ((offset < 0 && !(file->f_mode & FMODE_UNSIGNED_OFFSET)) ||
+	    offset > inode->i_sb->s_maxbytes) {
+		err = -EINVAL;
+		goto out;
+	} else if (inode->i_size <= offset) {
+		err = -ENXIO;
+		goto out;
+	} else if (!inode->i_blocks) {
+		if (whence == SEEK_HOLE)
+			ioc_seek.offset_out = i_size_read(inode);
+		else
+			err = -ENXIO;
+		goto out;
+	}
+
+	err = zufs_dispatch(ZUF_ROOT(SBI(inode->i_sb)), &ioc_seek.hdr, NULL, 0);
+	if (unlikely(err)) {
+		zuf_err("zufs_dispatch failed => %d\n", err);
+		goto out;
+	}
+
+	if (ioc_seek.offset_out != file->f_pos) {
+		file->f_pos = ioc_seek.offset_out;
+		file->f_version = 0;
+	}
+
+out:
+	zuf_r_unlock(zii);
+
+	return err ?: ioc_seek.offset_out;
+}
+
+/* This function is called by both msync() and fsync(). */
+int zuf_isync(struct inode *inode, loff_t start, loff_t end, int datasync)
+{
+	struct zuf_inode_info *zii = ZUII(inode);
+	struct zufs_ioc_range ioc_range = {
+		.hdr.in_len = sizeof(ioc_range),
+		.hdr.operation = ZUS_OP_SYNC,
+		.zus_ii = zii->zus_ii,
+		.offset = start,
+		.opflags = datasync,
+	};
+	loff_t isize;
+	ulong uend = end + 1;
+	int err = 0;
+
+	zuf_dbg_vfs(
+		"[%ld] start=0x%llx end=0x%llx  datasync=%d write_mapped=%d\n",
+		inode->i_ino, start, end, datasync,
+		atomic_read(&zii->write_mapped));
+
+	/* We want to serialize the syncs so they don't fight with each other
+	 * and is though more efficient, but we do not want to lock out
+	 * read/writes and page-faults so we have a special sync semaphore
+	 */
+	zuf_smw_lock(zii);
+
+	isize = i_size_read(inode);
+	if (!isize) {
+		zuf_dbg_mmap("[%ld] file is empty\n", inode->i_ino);
+		goto out;
+	}
+	if (isize < uend)
+		uend = isize;
+	if (uend < start) {
+		zuf_dbg_mmap("[%ld] isize=0x%llx start=0x%llx end=0x%lx\n",
+				 inode->i_ino, isize, start, uend);
+		err = -ENODATA;
+		goto out;
+	}
+
+	if (!atomic_read(&zii->write_mapped))
+		goto out; /* Nothing to do on this inode */
+
+	ioc_range.length = uend - start;
+	unmap_mapping_range(inode->i_mapping, start, ioc_range.length, 0);
+
+	err = zufs_dispatch(ZUF_ROOT(SBI(inode->i_sb)), &ioc_range.hdr,
+			    NULL, 0);
+	if (unlikely(err))
+		zuf_err("zufs_dispatch failed => %d\n", err);
+
+	zuf_sync_dec(inode, ioc_range.write_unmapped);
+
+out:
+	zuf_smw_unlock(zii);
+	return err;
+}
+
+static int zuf_fsync(struct file *file, loff_t start, loff_t end, int datasync)
+{
+	return zuf_isync(file_inode(file), start, end, datasync);
+}
+
+/* This callback is called when a file is closed */
+static int zuf_flush(struct file *file, fl_owner_t id)
+{
+	zuf_dbg_vfs("[%ld]\n", file->f_inode->i_ino);
+
+	return 0;
+}
+
+static int tozu_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
+		       u64 offset, u64 len)
+{
+	int err = 0;
+	ulong start_index = md_o2p(offset);
+	ulong end_index = md_o2p_up(offset + len);
+	struct zuf_inode_info *zii = ZUII(inode);
+
+	zuf_dbg_vfs(
+		"[%ld] offset=0x%llx len=0x%llx i-start=0x%lx i-end=0x%lx\n",
+		inode->i_ino, offset, len, start_index, end_index);
+
+	if (fiemap_check_flags(fieinfo, FIEMAP_FLAG_SYNC))
+		return -EBADR;
+
+	zuf_r_lock(zii);
+
+	/* TODO: ZUS fiemap (&msi)*/
+
+	zuf_r_unlock(zii);
+	return err;
+}
+
+static void _lock_two_ziis(struct zuf_inode_info *zii1,
+			   struct zuf_inode_info *zii2)
+{
+	if (zii1 > zii2)
+		swap(zii2, zii2);
+
+	zuf_w_lock(zii1);
+	if (zii1 != zii2)
+		zuf_w_lock_nested(zii2);
+}
+
+static void _unlock_two_ziis(struct zuf_inode_info *zii1,
+		      struct zuf_inode_info *zii2)
+{
+	if (zii1 > zii2)
+		swap(zii2, zii2);
+
+	if (zii1 != zii2)
+		zuf_w_unlock(zii2);
+	zuf_w_unlock(zii1);
+}
+
+static int _clone_file_range(struct inode *src_inode, loff_t pos_in,
+			     struct inode *dst_inode, loff_t pos_out,
+			     u64 len, int operation)
+{
+	struct zuf_inode_info *src_zii = ZUII(src_inode);
+	struct zuf_inode_info *dst_zii = ZUII(dst_inode);
+	struct zus_inode *dst_zi = dst_zii->zi;
+	struct super_block *sb = src_inode->i_sb;
+	struct zufs_ioc_clone ioc_clone = {
+		.hdr.in_len = sizeof(ioc_clone),
+		.hdr.out_len = sizeof(ioc_clone),
+		.hdr.operation = operation,
+		.src_zus_ii = src_zii->zus_ii,
+		.dst_zus_ii = dst_zii->zus_ii,
+		.pos_in = pos_in,
+		.pos_out = pos_out,
+		.len = len,
+	};
+	int err;
+
+	_lock_two_ziis(src_zii, dst_zii);
+
+	/* NOTE: len==0 means to-end-of-file which is what we want */
+	unmap_mapping_range(src_inode->i_mapping, pos_in,  len, 0);
+	unmap_mapping_range(dst_inode->i_mapping, pos_out, len, 0);
+
+	zus_inode_cmtime_now(dst_inode, dst_zi);
+	err = zufs_dispatch(ZUF_ROOT(SBI(sb)), &ioc_clone.hdr, NULL, 0);
+	if (unlikely(err)) {
+		zuf_err("failed to clone %ld -> %ld ; err=%d\n",
+			 src_inode->i_ino, dst_inode->i_ino, err);
+		goto out;
+	}
+
+	dst_inode->i_blocks = le64_to_cpu(dst_zi->i_blocks);
+	i_size_write(dst_inode, dst_zi->i_size);
+
+out:
+	_unlock_two_ziis(src_zii, dst_zii);
+
+	return err;
+}
+
+static int zuf_clone_file_range(struct file *file_in, loff_t pos_in,
+				struct file *file_out, loff_t pos_out, u64 len)
+{
+	struct inode *src_inode = file_inode(file_in);
+	struct inode *dst_inode = file_inode(file_out);
+	ulong src_size = i_size_read(src_inode);
+	ulong dst_size = i_size_read(dst_inode);
+	struct super_block *sb = src_inode->i_sb;
+	ulong len_up = len;
+	int err;
+
+	zuf_dbg_vfs(
+		"ino-in=%ld ino-out=%ld pos_in=0x%llx pos_out=0x%llx length=0x%llx\n",
+		src_inode->i_ino, dst_inode->i_ino, pos_in, pos_out, len);
+
+	if (src_inode == dst_inode) {
+		if (pos_in == pos_out) {
+			zuf_dbg_err("[%ld] Clone nothing!!\n",
+				src_inode->i_ino);
+			return 0;
+		}
+		if (pos_in < pos_out) {
+			if (pos_in + len > pos_out) {
+				zuf_dbg_err(
+					"[%ld] overlapping pos_in < pos_out?? => EINVAL\n",
+					src_inode->i_ino);
+				return -EINVAL;
+			}
+		} else {
+			if (pos_out + len > pos_in) {
+				zuf_dbg_err("[%ld] overlapping pos_out < pos_in?? => EINVAL\n",
+					src_inode->i_ino);
+				return -EINVAL;
+			}
+		}
+	}
+
+	if ((pos_in & (sb->s_blocksize - 1)) ||
+	    (pos_out & (sb->s_blocksize - 1))) {
+		zuf_err("[%ld] Not aligned len=0x%llx pos_in=0x%llx "
+			"pos_out=0x%llx src-size=0x%llx dst-size=0x%llx\n",
+			 src_inode->i_ino, len, pos_in, pos_out,
+			 i_size_read(src_inode), i_size_read(dst_inode));
+		return -EINVAL;
+	}
+
+	/* STD says that len==0 means up to end of SRC */
+	if (!len)
+		len_up = len = src_size - pos_in;
+
+	if (!pos_in && !pos_out && (src_size <= pos_in + len) &&
+	    (dst_size <= src_size)) {
+		len_up = 0;
+	} else if (len & (sb->s_blocksize - 1)) {
+		/* un-aligned len, see if it is beyond EOF */
+		if ((src_size > pos_in  + len) ||
+		    (dst_size > pos_out + len)) {
+			zuf_err("[%ld] Not aligned len=0x%llx pos_in=0x%llx "
+				"pos_out=0x%llx src-size=0x%lx dst-size=0x%lx\n",
+				src_inode->i_ino, len, pos_in, pos_out,
+				src_size, dst_size);
+			return -EINVAL;
+		}
+		len_up = md_p2o(md_o2p_up(len));
+	}
+
+	err = _clone_file_range(src_inode, pos_in, dst_inode, pos_out, len_up,
+				ZUS_OP_CLONE);
+	if (unlikely(err))
+		zuf_err("_clone_file_range failed => %d\n", err);
+
+	return err;
+}
+
+static ssize_t zuf_copy_file_range(struct file *file_in, loff_t pos_in,
+				   struct file *file_out, loff_t pos_out,
+				   size_t len, uint flags)
+{
+	struct inode *src_inode = file_inode(file_in);
+	struct inode *dst_inode = file_inode(file_out);
+	ssize_t ret;
+
+	zuf_dbg_vfs("ino-in=%ld ino-out=%ld pos_in=0x%llx pos_out=0x%llx length=0x%lx\n",
+		    src_inode->i_ino, dst_inode->i_ino, pos_in, pos_out, len);
+
+	ret = _clone_file_range(src_inode, pos_in, dst_inode, pos_out, len,
+				ZUS_OP_COPY);
+
+	return ret ?: len;
+}
+
+/* ZUFS:
+ * make sure we clean up the resources consumed by zufs_init()
+ */
+static int zuf_file_release(struct inode *inode, struct file *filp)
+{
+	if (unlikely(filp->private_data))
+		zuf_err("not yet\n");
+
+	return 0;
+}
+
+const struct file_operations zuf_file_operations = {
+	.llseek			= zuf_llseek,
+	.open			= generic_file_open,
+	.fsync			= zuf_fsync,
+	.flush			= zuf_flush,
+	.release		= zuf_file_release,
+	.fallocate		= zuf_fallocate,
+	.copy_file_range	= zuf_copy_file_range,
+	.clone_file_range	= zuf_clone_file_range,
+};
+
+const struct inode_operations zuf_file_inode_operations = {
+	.setattr	= zuf_setattr,
+	.getattr	= zuf_getattr,
+	.update_time	= zuf_update_time,
+	.fiemap		= tozu_fiemap,
+};
diff --git a/fs/zuf/inode.c b/fs/zuf/inode.c
index 7aa8c9e..1129aae 100644
--- a/fs/zuf/inode.c
+++ b/fs/zuf/inode.c
@@ -12,16 +12,424 @@
  *	Sagi Manole <sagim@netapp.com>"
  */
 
+#include <linux/fs.h>
+#include <linux/aio.h>
+#include <linux/highuid.h>
+#include <linux/module.h>
+#include <linux/mpage.h>
+#include <linux/backing-dev.h>
+#include <linux/types.h>
+#include <linux/ratelimit.h>
+#include <linux/posix_acl_xattr.h>
+#include <linux/security.h>
+#include <linux/delay.h>
 #include "zuf.h"
 
+/* Flags that should be inherited by new inodes from their parent. */
+#define ZUFS_FL_INHERITED (FS_SECRM_FL | FS_UNRM_FL | FS_COMPR_FL |	\
+			FS_SYNC_FL | FS_NODUMP_FL | FS_NOATIME_FL |	\
+			FS_COMPRBLK_FL | FS_NOCOMP_FL |			\
+			FS_JOURNAL_DATA_FL | FS_NOTAIL_FL | FS_DIRSYNC_FL)
+/* Flags that are appropriate for regular files (all but dir-specific ones). */
+#define ZUFS_REG_FLMASK (~(FS_DIRSYNC_FL | FS_TOPDIR_FL))
+/* Flags that are appropriate for non-directories/regular files. */
+#define ZUFS_OTHER_FLMASK (FS_NODUMP_FL | FS_NOATIME_FL)
+
+
+static bool _zi_valid(struct zus_inode *zi)
+{
+	if (!_zi_active(zi))
+		return false;
+
+	switch (le16_to_cpu(zi->i_mode) & S_IFMT) {
+	case S_IFREG:
+	case S_IFDIR:
+	case S_IFLNK:
+	case S_IFBLK:
+	case S_IFCHR:
+	case S_IFIFO:
+	case S_IFSOCK:
+		return true;
+	default:
+		zuf_err("unknown file type ino=%lld mode=%d\n", zi->i_ino,
+			  zi->i_mode);
+		return false;
+	}
+}
+
+static void _set_inode_from_zi(struct inode *inode, struct zus_inode *zi)
+{
+	inode->i_mode = le16_to_cpu(zi->i_mode);
+	inode->i_uid = KUIDT_INIT(le32_to_cpu(zi->i_uid));
+	inode->i_gid = KGIDT_INIT(le32_to_cpu(zi->i_gid));
+	set_nlink(inode, le16_to_cpu(zi->i_nlink));
+	inode->i_size = le64_to_cpu(zi->i_size);
+	inode->i_size = le64_to_cpu(zi->i_blocks);
+	mt_to_timespec(&inode->i_atime, &zi->i_atime);
+	mt_to_timespec(&inode->i_ctime, &zi->i_ctime);
+	mt_to_timespec(&inode->i_mtime, &zi->i_mtime);
+	inode->i_generation = le64_to_cpu(zi->i_generation);
+	zuf_set_inode_flags(inode, zi);
+
+	inode->i_blocks = le64_to_cpu(zi->i_blocks);
+	inode->i_mapping->a_ops = &zuf_aops;
+
+	switch (inode->i_mode & S_IFMT) {
+	case S_IFREG:
+		inode->i_op = &zuf_file_inode_operations;
+		inode->i_fop = &zuf_file_operations;
+		break;
+	case S_IFDIR:
+		inode->i_op = &zuf_dir_inode_operations;
+		inode->i_fop = &zuf_dir_operations;
+		break;
+	case S_IFLNK:
+		inode->i_op = &zuf_symlink_inode_operations;
+		break;
+	case S_IFBLK:
+	case S_IFCHR:
+	case S_IFIFO:
+	case S_IFSOCK:
+		inode->i_size = 0;
+		inode->i_op = &zuf_special_inode_operations;
+		init_special_inode(inode, inode->i_mode,
+				   le32_to_cpu(zi->i_rdev));
+		break;
+	default:
+		zuf_err("unknown file type ino=%lld mode=%d\n", zi->i_ino,
+			  zi->i_mode);
+		break;
+	}
+
+	inode->i_ino = le64_to_cpu(zi->i_ino);
+}
+
+static void tozu_get_inode_flags(struct inode *inode, struct zus_inode *zi)
+{
+	unsigned int flags = inode->i_flags;
+	unsigned int tozu_flags = le32_to_cpu(zi->i_flags);
+
+	tozu_flags &= ~(FS_SYNC_FL | FS_APPEND_FL | FS_IMMUTABLE_FL |
+			 FS_NOATIME_FL | FS_DIRSYNC_FL);
+	if (flags & S_SYNC)
+		tozu_flags |= FS_SYNC_FL;
+	if (flags & S_APPEND)
+		tozu_flags |= FS_APPEND_FL;
+	if (flags & S_IMMUTABLE)
+		tozu_flags |= FS_IMMUTABLE_FL;
+	if (flags & S_NOATIME)
+		tozu_flags |= FS_NOATIME_FL;
+	if (flags & S_DIRSYNC)
+		tozu_flags |= FS_DIRSYNC_FL;
+
+	zi->i_flags = cpu_to_le32(tozu_flags);
+}
+
+/* Mask out flags that are inappropriate for the given type of inode. */
+static __le32 _mask_flags(umode_t mode, __le32 flags)
+{
+	flags &= cpu_to_le32(ZUFS_FL_INHERITED);
+	if (S_ISDIR(mode))
+		return flags;
+	else if (S_ISREG(mode))
+		return flags & cpu_to_le32(ZUFS_REG_FLMASK);
+	else
+		return flags & cpu_to_le32(ZUFS_OTHER_FLMASK);
+}
+
+static int _set_zi_from_inode(struct inode *dir, struct zus_inode *zi,
+			      struct inode *inode)
+{
+	struct zus_inode *zidir = zus_zi(dir);
+
+	if (unlikely(!zidir))
+		return -EACCES;
+
+	zi->i_flags = _mask_flags(inode->i_mode, zidir->i_flags);
+	zi->i_mode = cpu_to_le16(inode->i_mode);
+	zi->i_uid = cpu_to_le32(__kuid_val(inode->i_uid));
+	zi->i_gid = cpu_to_le32(__kgid_val(inode->i_gid));
+	/* NOTE: zus is boss of i_nlink (but let it know what we think) */
+	zi->i_nlink = cpu_to_le16(inode->i_nlink);
+	zi->i_size = cpu_to_le64(inode->i_size);
+	zi->i_blocks = cpu_to_le64(inode->i_blocks);
+	timespec_to_mt(&zi->i_atime, &inode->i_atime);
+	timespec_to_mt(&zi->i_mtime, &inode->i_mtime);
+	timespec_to_mt(&zi->i_ctime, &inode->i_ctime);
+	zi->i_generation = cpu_to_le32(inode->i_generation);
+	tozu_get_inode_flags(inode, zi);
+
+	if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode))
+		zi->i_rdev = cpu_to_le32(inode->i_rdev);
+
+	return 0;
+}
+
+static bool _times_equal(struct timespec *t, __le64 *mt)
+{
+	__le64 time;
+
+	timespec_to_mt(&time, t);
+	return time == *mt;
+}
+
+/* This function checks if VFS's inode and zus_inode are in sync */
+static void _warn_inode_dirty(struct inode *inode, struct zus_inode *zi)
+{
+#define __MISMACH_INT(inode, X, Y)	\
+	if (X != Y)			\
+		zuf_warn("[%ld] " #X"=0x%lx " #Y"=0x%lx""\n",	\
+			  inode->i_ino, (ulong)(X), (ulong)(Y))
+#define __MISMACH_TIME(inode, X, Y)	\
+	if (!_times_equal(X, Y)) {	\
+		struct timespec t;	\
+		mt_to_timespec(&t, (Y));\
+		zuf_warn("[%ld] " #X"=%ld:%ld " #Y"=%ld:%ld""\n",	\
+			  inode->i_ino, (X)->tv_sec, (X)->tv_nsec,	\
+			  t.tv_sec, t.tv_nsec);		\
+	}
+
+	if (!_times_equal(&inode->i_ctime, &zi->i_ctime) ||
+	    !_times_equal(&inode->i_mtime, &zi->i_mtime) ||
+	    !_times_equal(&inode->i_atime, &zi->i_atime) ||
+	    inode->i_size != le64_to_cpu(zi->i_size) ||
+	    inode->i_mode != le16_to_cpu(zi->i_mode) ||
+	    __kuid_val(inode->i_uid) != le32_to_cpu(zi->i_uid) ||
+	    __kgid_val(inode->i_gid) != le32_to_cpu(zi->i_gid) ||
+	    inode->i_nlink != le16_to_cpu(zi->i_nlink) ||
+	    inode->i_ino != _zi_ino(zi) ||
+	    inode->i_blocks != le64_to_cpu(zi->i_blocks)) {
+		__MISMACH_TIME(inode, &inode->i_ctime, &zi->i_ctime);
+		__MISMACH_TIME(inode, &inode->i_mtime, &zi->i_mtime);
+		__MISMACH_TIME(inode, &inode->i_atime, &zi->i_atime);
+		__MISMACH_INT(inode, inode->i_size, le64_to_cpu(zi->i_size));
+		__MISMACH_INT(inode, inode->i_mode, le16_to_cpu(zi->i_mode));
+		__MISMACH_INT(inode, __kuid_val(inode->i_uid),
+			      le32_to_cpu(zi->i_uid));
+		__MISMACH_INT(inode, __kgid_val(inode->i_gid),
+			      le32_to_cpu(zi->i_gid));
+		__MISMACH_INT(inode, inode->i_nlink, le16_to_cpu(zi->i_nlink));
+		__MISMACH_INT(inode, inode->i_ino, _zi_ino(zi));
+		__MISMACH_INT(inode, inode->i_blocks,
+			      le64_to_cpu(zi->i_blocks));
+	}
+}
+
+static void _zii_connect(struct inode *inode, struct zus_inode *zi,
+			 struct zus_inode_info *zus_ii)
+{
+	struct zuf_inode_info *zii = ZUII(inode);
+
+	zii->zi = zi;
+	zii->zus_ii = zus_ii;
+}
+
 struct inode *zuf_iget(struct super_block *sb, struct zus_inode_info *zus_ii,
 		       zu_dpp_t _zi, bool *exist)
 {
-	return ERR_PTR(-ENOMEM);
+	struct zuf_sb_info *sbi = SBI(sb);
+	struct zus_inode *zi = md_addr_verify(sbi->md, _zi);
+	struct inode *inode;
+
+	if (unlikely(!zi)) {
+		/* Don't trust ZUS pointers */
+		zuf_err("Bad zus_inode 0x%llx\n", _zi);
+		return ERR_PTR(-EIO);
+	}
+	if (unlikely(!zus_ii)) {
+		zuf_err("zus_ii NULL\n");
+		return ERR_PTR(-EIO);
+	}
+
+	if (!_zi_valid(zi)) {
+		zuf_err("inactive node ino=%lld links=%d mode=%d\n", zi->i_ino,
+			  zi->i_nlink, zi->i_mode);
+		return ERR_PTR(-ESTALE);
+	}
+
+	zuf_dbg_zus("[%lld] size=0x%llx, blocks=0x%llx ct=0x%llx mt=0x%llx link=0x%x mode=0x%x xattr=0x%llx\n",
+		    zi->i_ino, zi->i_size, zi->i_blocks, zi->i_ctime,
+		    zi->i_mtime, zi->i_nlink, zi->i_mode, zi->i_xattr);
+
+	inode = iget_locked(sb, _zi_ino(zi));
+	if (unlikely(!inode))
+		return ERR_PTR(-ENOMEM);
+
+	if (!(inode->i_state & I_NEW)) {
+		*exist = true;
+		return inode;
+	}
+
+	*exist = false;
+	_set_inode_from_zi(inode, zi);
+	_zii_connect(inode, zi, zus_ii);
+
+	unlock_new_inode(inode);
+	return inode;
+}
+
+int zuf_evict_dispatch(struct super_block *sb, struct zus_inode_info *zus_ii,
+		       int operation)
+{
+	struct zufs_ioc_evict_inode ioc_evict_inode = {
+		.hdr.in_len = sizeof(ioc_evict_inode),
+		.hdr.out_len = sizeof(ioc_evict_inode),
+		.hdr.operation = operation,
+		.zus_ii = zus_ii,
+	};
+	int err;
+
+	err = zufs_dispatch(ZUF_ROOT(SBI(sb)), &ioc_evict_inode.hdr, NULL, 0);
+	if (unlikely(err))
+		zuf_err("zufs_dispatch failed op=%d => %d\n",
+			 operation, err);
+	return err;
 }
 
 void zuf_evict_inode(struct inode *inode)
 {
+	struct super_block *sb = inode->i_sb;
+	struct zuf_inode_info *zii = ZUII(inode);
+	int operation;
+	int write_mapped;
+
+	if (!inode->i_nlink) {
+		if (unlikely(!zii->zi)) {
+			zuf_dbg_err("[%ld] inode without zi mode=0x%x size=0x%llx\n",
+				    inode->i_ino, inode->i_mode, inode->i_size);
+			goto out;
+		}
+
+		if (unlikely(is_bad_inode(inode)))
+			zuf_warn("[%ld] inode is bad mode=0x%x zi=%p\n",
+				  inode->i_ino, inode->i_mode, zii->zi);
+		else
+			_warn_inode_dirty(inode, zii->zi);
+
+		operation = ZUS_OP_FREE_INODE;
+	} else {
+		zuf_dbg_verbose("[%ld] inode is going down?\n", inode->i_ino);
+
+		if (unlikely(!inode || !sb || !sb->s_root ||
+			     !sb->s_root->d_inode ||
+			     !sb->s_root->d_inode->i_mapping))
+			goto out;
+
+		operation = ZUS_OP_EVICT_INODE;
+	}
+
+	zuf_evict_dispatch(sb, zii->zus_ii, operation);
+
+out:
+	zii->zus_ii = NULL;
+	zii->zi = NULL;
+
+	if (zii && zii->zero_page) {
+		zii->zero_page->mapping = NULL;
+		__free_pages(zii->zero_page, 0);
+		zii->zero_page = NULL;
+	}
+
+	/* ZUS on evict has synced all mmap dirty pages, YES? */
+	write_mapped = atomic_read(&zii->write_mapped);
+	if (unlikely(write_mapped || !list_empty(&zii->i_mmap_dirty))) {
+		zuf_dbg_mmap("[%ld] !!!! write_mapped=%d list_empty=%d\n",
+			      inode->i_ino, write_mapped,
+			      list_empty(&zii->i_mmap_dirty));
+		zuf_sync_dec(inode, write_mapped);
+	}
+
+	clear_inode(inode);
+}
+
+/* @rdev_or_isize is i_size in the case of a symlink
+ * and rdev in the case of special-files
+ */
+struct inode *zuf_new_inode(struct inode *dir, umode_t mode,
+			    const struct qstr *qstr, const char *symname,
+			    ulong rdev_or_isize, bool tmpfile)
+{
+	struct super_block *sb = dir->i_sb;
+	struct zuf_sb_info *sbi = SBI(sb);
+	struct zufs_ioc_new_inode ioc_new_inode = {
+		.hdr.in_len = sizeof(ioc_new_inode),
+		.hdr.out_len = sizeof(ioc_new_inode),
+		.hdr.operation = ZUS_OP_NEW_INODE,
+		.dir_ii = ZUII(dir)->zus_ii,
+		.flags = tmpfile ? ZI_TMPFILE : 0,
+		.str.len = qstr->len,
+	};
+	struct inode *inode;
+	struct zus_inode *zi;
+	struct page *pages[2];
+	uint nump = 0;
+	int err;
+
+	memcpy(&ioc_new_inode.str.name, qstr->name, qstr->len);
+
+	inode = new_inode(sb);
+	if (!inode)
+		return ERR_PTR(-ENOMEM);
+
+	inode_init_owner(inode, dir, mode);
+	inode->i_blocks = inode->i_size = 0;
+	inode->i_ctime = inode->i_mtime = current_time(dir);
+	inode->i_atime = inode->i_ctime;
+
+	zuf_dbg_verbose("inode=%p name=%s\n", inode, qstr->name);
+
+	zuf_set_inode_flags(inode, &ioc_new_inode.zi);
+
+	err = _set_zi_from_inode(dir, &ioc_new_inode.zi, inode);
+	if (unlikely(err))
+		goto fail;
+
+	if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) ||
+	    S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) {
+		init_special_inode(inode, mode, rdev_or_isize);
+	} else if (symname) {
+		inode->i_size = rdev_or_isize;
+		nump = zuf_prepare_symname(&ioc_new_inode, symname,
+					   rdev_or_isize, pages);
+	}
+
+	err = zufs_dispatch(ZUF_ROOT(sbi), &ioc_new_inode.hdr, pages, nump);
+	if (unlikely(err)) {
+		zuf_err("zufs_dispatch failed => %d\n", err);
+		goto fail;
+	}
+	zi = md_addr(sbi->md, ioc_new_inode._zi);
+
+	_zii_connect(inode, zi, ioc_new_inode.zus_ii);
+
+	/* update inode fields from filesystem inode */
+	inode->i_ino = le64_to_cpu(zi->i_ino);
+	inode->i_size = le64_to_cpu(zi->i_size);
+	inode->i_generation = le64_to_cpu(zi->i_generation);
+	inode->i_blocks = le64_to_cpu(zi->i_blocks);
+	set_nlink(inode, le16_to_cpu(zi->i_nlink));
+	i_size_write(dir, le64_to_cpu(zus_zi(dir)->i_size));
+
+	zuf_dbg_zus("[%lld] size=0x%llx, blocks=0x%llx ct=0x%llx mt=0x%llx link=0x%x mode=0x%x xattr=0x%llx\n",
+		    zi->i_ino, zi->i_size, zi->i_blocks, zi->i_ctime,
+		    zi->i_mtime, zi->i_nlink, zi->i_mode, zi->i_xattr);
+
+	zuf_dbg_verbose("allocating inode %ld (zi=%p)\n", _zi_ino(zi), zi);
+
+	err = insert_inode_locked(inode);
+	if (unlikely(err)) {
+		zuf_err("[%ld:%s] generation=%lld insert_inode_locked => %d\n",
+			 inode->i_ino, qstr->name, zi->i_generation, err);
+		goto fail;
+	}
+
+	return inode;
+
+fail:
+	clear_nlink(inode);
+	make_bad_inode(inode);
+	iput(inode);
+	return ERR_PTR(err);
 }
 
 int zuf_write_inode(struct inode *inode, struct writeback_control *wbc)
@@ -38,8 +446,211 @@ int zuf_write_inode(struct inode *inode, struct writeback_control *wbc)
 	return 0;
 }
 
-/* This function is called by msync(), fsync() && sync_fs(). */
-int zuf_isync(struct inode *inode, loff_t start, loff_t end, int datasync)
+/*
+ * Mostly supporting file_accessed() for now. Which is the only one we use.
+ *
+ * But also file_update_time is used by fifo code.
+ */
+int zuf_update_time(struct inode *inode, struct timespec *time, int flags)
+{
+	struct zus_inode *zi = zus_zi(inode);
+	struct zufs_ioc_attr ioc_attr = {
+		.hdr.in_len = sizeof(ioc_attr),
+		.hdr.out_len = sizeof(ioc_attr),
+		.hdr.operation = ZUS_OP_UPDATE_TIME,
+		.zus_ii = ZUII(inode)->zus_ii,
+	};
+	int err;
+
+	if (flags & S_ATIME) {
+		ioc_attr.zuf_attr |= STATX_ATIME;
+		inode->i_atime = *time;
+		timespec_to_mt(&zi->i_atime, &inode->i_atime);
+	}
+
+	/* for Support of file_update_time() */
+	if ((flags & S_CTIME) || (flags & S_MTIME) || (flags & S_VERSION)) {
+		if (flags & S_VERSION) {
+			ioc_attr.zuf_attr |= ZUFS_STATX_VERSION;
+			inode_inc_iversion(inode);
+			zi->i_generation = cpu_to_le64(inode->i_version);
+		}
+		if (flags & S_CTIME) {
+			ioc_attr.zuf_attr |= STATX_CTIME;
+			inode->i_ctime = *time;
+			timespec_to_mt(&zi->i_ctime, &inode->i_ctime);
+		}
+		if (flags & S_MTIME) {
+			ioc_attr.zuf_attr |= STATX_MTIME;
+			inode->i_mtime = *time;
+			timespec_to_mt(&zi->i_mtime, &inode->i_mtime);
+		}
+	}
+
+	if (ioc_attr.zuf_attr == 0)
+		return 0;
+
+	err = zufs_dispatch(ZUF_ROOT(SBI(inode->i_sb)), &ioc_attr.hdr, NULL, 0);
+	if (unlikely(err))
+		zuf_err("zufs_dispatch failed => %d\n", err);
+
+	return err;
+}
+
+int zuf_getattr(const struct path *path, struct kstat *stat, u32 request_mask,
+		unsigned int flags)
+{
+	struct dentry *dentry = path->dentry;
+	struct inode *inode = d_inode(dentry);
+
+	if (inode->i_flags & S_APPEND)
+		stat->attributes |= STATX_ATTR_APPEND;
+	if (inode->i_flags & S_IMMUTABLE)
+		stat->attributes |= STATX_ATTR_IMMUTABLE;
+
+	stat->attributes_mask |= (STATX_ATTR_APPEND |
+				  STATX_ATTR_IMMUTABLE);
+	generic_fillattr(inode, stat);
+	/* stat->blocks should be the number of 512B blocks */
+	stat->blocks = inode->i_blocks << (inode->i_sb->s_blocksize_bits - 9);
+
+	return 0;
+}
+
+int zuf_setattr(struct dentry *dentry, struct iattr *attr)
+{
+	struct inode *inode = dentry->d_inode;
+	struct zuf_inode_info *zii = ZUII(inode);
+	struct zus_inode *zi = zii->zi;
+	struct zufs_ioc_attr ioc_attr = {
+		.hdr.in_len = sizeof(ioc_attr),
+		.hdr.out_len = sizeof(ioc_attr),
+		.hdr.operation = ZUS_OP_SETATTR,
+		.zus_ii = zii->zus_ii,
+	};
+	int err;
+
+	if (!zi)
+		return -EACCES;
+
+	err = setattr_prepare(dentry, attr);
+	if (unlikely(err))
+		return err;
+
+	if (attr->ia_valid & ATTR_MODE) {
+		zuf_dbg_vfs("[%ld] ATTR_MODE=0x%x\n",
+			     inode->i_ino, attr->ia_mode);
+		ioc_attr.zuf_attr |= STATX_MODE;
+		inode->i_mode = attr->ia_mode;
+		zi->i_mode = cpu_to_le16(inode->i_mode);
+		if (test_opt(SBI(inode->i_sb), POSIXACL)) {
+			err = posix_acl_chmod(inode, inode->i_mode);
+			if (unlikely(err))
+				return err;
+		}
+	}
+
+	if (attr->ia_valid & ATTR_UID) {
+		zuf_dbg_vfs("[%ld] ATTR_UID=0x%x\n",
+			     inode->i_ino, __kuid_val(attr->ia_uid));
+		ioc_attr.zuf_attr |= STATX_UID;
+		inode->i_uid = attr->ia_uid;
+		zi->i_uid = cpu_to_le32(__kuid_val(inode->i_uid));
+	}
+	if (attr->ia_valid & ATTR_GID) {
+		zuf_dbg_vfs("[%ld] ATTR_GID=0x%x\n",
+			     inode->i_ino, __kgid_val(attr->ia_gid));
+		ioc_attr.zuf_attr |= STATX_GID;
+		inode->i_gid = attr->ia_gid;
+		zi->i_gid = cpu_to_le32(__kgid_val(inode->i_gid));
+	}
+
+	if ((attr->ia_valid & ATTR_SIZE)) {
+		zuf_dbg_vfs("[%ld] ATTR_SIZE=0x%llx\n",
+			     inode->i_ino, attr->ia_size);
+		if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
+		      S_ISLNK(inode->i_mode))) {
+			zuf_err("[%ld] wrong file mode=%x\n",
+				inode->i_ino, inode->i_mode);
+			return -EINVAL;
+		}
+		ioc_attr.zuf_attr |= STATX_SIZE;
+
+		ZUF_CHECK_I_W_LOCK(inode);
+		zuf_smw_lock(zii);
+
+		/* Make all mmap() users FAULT for truncated pages */
+		unmap_mapping_range(inode->i_mapping,
+				    attr->ia_size + PAGE_SIZE - 1, 0, 1);
+
+		ioc_attr.truncate_size = attr->ia_size;
+		/* on attr_size we want to update times as well */
+		attr->ia_valid |= ATTR_CTIME | ATTR_MTIME;
+	}
+
+	if (attr->ia_valid & ATTR_ATIME) {
+		ioc_attr.zuf_attr |= STATX_ATIME;
+		inode->i_atime = attr->ia_atime;
+		timespec_to_mt(&zi->i_atime, &inode->i_atime);
+		zuf_dbg_vfs("[%ld] ATTR_ATIME=0x%llx\n",
+			     inode->i_ino, zi->i_atime);
+	}
+	if (attr->ia_valid & ATTR_CTIME) {
+		ioc_attr.zuf_attr |= STATX_CTIME;
+		inode->i_ctime = attr->ia_ctime;
+		timespec_to_mt(&zi->i_ctime, &inode->i_ctime);
+		zuf_dbg_vfs("[%ld] ATTR_CTIME=0x%llx\n",
+			     inode->i_ino, zi->i_ctime);
+	}
+	if (attr->ia_valid & ATTR_MTIME) {
+		ioc_attr.zuf_attr |= STATX_MTIME;
+		inode->i_mtime = attr->ia_mtime;
+		timespec_to_mt(&zi->i_mtime, &inode->i_mtime);
+		zuf_dbg_vfs("[%ld] ATTR_MTIME=0x%llx\n",
+			     inode->i_ino, zi->i_mtime);
+	}
+
+	err = zufs_dispatch(ZUF_ROOT(SBI(inode->i_sb)), &ioc_attr.hdr, NULL, 0);
+	if (unlikely(err))
+		zuf_err("zufs_dispatch failed => %d\n", err);
+
+	if ((attr->ia_valid & ATTR_SIZE)) {
+		i_size_write(inode, le64_to_cpu(zi->i_size));
+		inode->i_blocks = le64_to_cpu(zi->i_blocks);
+
+		zuf_smw_unlock(zii);
+	}
+
+	return err;
+}
+
+void zuf_set_inode_flags(struct inode *inode, struct zus_inode *zi)
+{
+	unsigned int flags = le32_to_cpu(zi->i_flags);
+
+	inode->i_flags &=
+		~(S_SYNC | S_APPEND | S_IMMUTABLE | S_NOATIME | S_DIRSYNC);
+	if (flags & FS_SYNC_FL)
+		inode->i_flags |= S_SYNC;
+	if (flags & FS_APPEND_FL)
+		inode->i_flags |= S_APPEND;
+	if (flags & FS_IMMUTABLE_FL)
+		inode->i_flags |= S_IMMUTABLE;
+	if (flags & FS_NOATIME_FL)
+		inode->i_flags |= S_NOATIME;
+	if (flags & FS_DIRSYNC_FL)
+		inode->i_flags |= S_DIRSYNC;
+	if (!zi->i_xattr)
+		inode_has_no_xattr(inode);
+}
+
+/* direct_IO is not called. We set an empty one so open(O_DIRECT) will be happy
+ */
+static ssize_t zuf_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
 {
+	WARN_ON(1);
 	return 0;
 }
+const struct address_space_operations zuf_aops = {
+	.direct_IO		= zuf_direct_IO,
+};
diff --git a/fs/zuf/namei.c b/fs/zuf/namei.c
new file mode 100644
index 0000000..179069b
--- /dev/null
+++ b/fs/zuf/namei.c
@@ -0,0 +1,421 @@
+/*
+ * BRIEF DESCRIPTION
+ *
+ * Inode operations for directories.
+ *
+ * Copyright (c) 2018 NetApp Inc. All rights reserved.
+ *
+ * ZUFS-License: GPL-2.0 OR BSD-3-Clause. See module.c for LICENSE details.
+ *
+ * Authors:
+ *	Boaz Harrosh <boazh@netapp.com>
+ *	Sagi Manole <sagim@netapp.com>"
+ */
+#include <linux/fs.h>
+#include "zuf.h"
+
+
+static struct inode *d_parent(struct dentry *dentry)
+{
+	return dentry->d_parent->d_inode;
+}
+
+static void _instantiate_unlock(struct dentry *dentry, struct inode *inode)
+{
+	d_instantiate(dentry, inode);
+	unlock_new_inode(inode);
+}
+
+static struct dentry *zuf_lookup(struct inode *dir, struct dentry *dentry,
+				 uint flags)
+{
+	struct super_block *sb = dir->i_sb;
+	struct qstr *str = &dentry->d_name;
+	uint in_len = offsetof(struct zufs_ioc_lookup, _zi);
+	struct zufs_ioc_lookup ioc_lu = {
+		.hdr.in_len = in_len,
+		.hdr.out_start = in_len,
+		.hdr.out_len = sizeof(ioc_lu) - in_len,
+		.hdr.operation = ZUS_OP_LOOKUP,
+		.dir_ii = ZUII(dir)->zus_ii,
+		.str.len = str->len,
+	};
+	struct inode *inode = NULL;
+	bool exist;
+	int err;
+
+	zuf_dbg_vfs("[%ld] dentry-name=%s\n", dir->i_ino, dentry->d_name.name);
+
+	if (dentry->d_name.len > ZUFS_NAME_LEN)
+		return ERR_PTR(-ENAMETOOLONG);
+
+	memcpy(&ioc_lu.str.name, str->name, str->len);
+
+	err = zufs_dispatch(ZUF_ROOT(SBI(sb)), &ioc_lu.hdr, NULL, 0);
+	if (unlikely(err)) {
+		zuf_dbg_err("zufs_dispatch failed => %d\n", err);
+		goto out;
+	}
+
+	inode = zuf_iget(dir->i_sb, ioc_lu.zus_ii, ioc_lu._zi, &exist);
+	if (exist) {
+		zuf_dbg_err("race in lookup\n");
+		zuf_evict_dispatch(sb, ioc_lu.zus_ii, ZUS_OP_EVICT_INODE);
+	}
+
+out:
+	return d_splice_alias(inode, dentry);
+}
+
+/*
+ * By the time this is called, we already have created
+ * the directory cache entry for the new file, but it
+ * is so far negative - it has no inode.
+ *
+ * If the create succeeds, we fill in the inode information
+ * with d_instantiate().
+ */
+static int zuf_create(struct inode *dir, struct dentry *dentry, umode_t mode,
+		      bool excl)
+{
+	struct inode *inode;
+
+	zuf_dbg_vfs("[%ld] dentry-name=%s mode=0x%x\n",
+		     dir->i_ino, dentry->d_name.name, mode);
+
+	inode = zuf_new_inode(dir, mode, &dentry->d_name, NULL, 0, false);
+	if (IS_ERR(inode))
+		return PTR_ERR(inode);
+
+	inode->i_op = &zuf_file_inode_operations;
+	inode->i_mapping->a_ops = &zuf_aops;
+	inode->i_fop = &zuf_file_operations;
+
+	_instantiate_unlock(dentry, inode);
+
+	return 0;
+}
+
+static int zuf_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,
+		     dev_t rdev)
+{
+	struct inode *inode;
+
+	zuf_dbg_vfs("[%ld] mode=0x%x rdev=0x%x\n", dir->i_ino, mode, rdev);
+
+	inode = zuf_new_inode(dir, mode, &dentry->d_name, NULL, rdev, false);
+	if (IS_ERR(inode))
+		return PTR_ERR(inode);
+
+	inode->i_op = &zuf_special_inode_operations;
+
+	_instantiate_unlock(dentry, inode);
+
+	return 0;
+}
+
+static int zuf_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
+{
+	struct inode *inode;
+
+	inode = zuf_new_inode(dir, mode, &dentry->d_name, NULL, 0, true);
+	if (IS_ERR(inode))
+		return PTR_ERR(inode);
+
+	/* TODO: See about more ephemeral operations on this file, around
+	 * mmap and such.
+	 * Must see about that tmpfile mode that is later link_at
+	 * (probably the !O_EXCL flag)
+	 */
+	inode->i_op = &zuf_file_inode_operations;
+	inode->i_mapping->a_ops = &zuf_aops;
+	inode->i_fop = &zuf_file_operations;
+
+	set_nlink(inode, 1); /* user_mode knows nothing */
+	d_tmpfile(dentry, inode);
+	/* tmpfile operate on nlink=0. Since this is a tmp file we do not care
+	 * about cl_flushing. If later this file will be linked to a dir. the
+	 * add_dentry will flush the zi.
+	 */
+	zus_zi(inode)->i_nlink = inode->i_nlink;
+
+	unlock_new_inode(inode);
+	return 0;
+}
+
+static int zuf_symlink(struct inode *dir, struct dentry *dentry,
+		       const char *symname)
+{
+	struct inode *inode;
+	ulong len;
+
+	zuf_dbg_vfs("[%ld] de->name=%s symname=%s\n",
+			dir->i_ino, dentry->d_name.name, symname);
+
+	len = strlen(symname);
+	if (len + 1 > ZUFS_MAX_SYMLINK)
+		return -ENAMETOOLONG;
+
+	inode = zuf_new_inode(dir, S_IFLNK|S_IRWXUGO, &dentry->d_name,
+			       symname, len, false);
+	if (IS_ERR(inode))
+		return PTR_ERR(inode);
+
+	inode->i_op = &zuf_symlink_inode_operations;
+	inode->i_mapping->a_ops = &zuf_aops;
+
+	_instantiate_unlock(dentry, inode);
+
+	return 0;
+}
+
+static int zuf_link(struct dentry *dest_dentry, struct inode *dir,
+		    struct dentry *dentry)
+{
+	struct inode *inode = dest_dentry->d_inode;
+	int err;
+
+	zuf_dbg_vfs("[%ld] dentry-ino=%ld dentry-name=%s dentry-parent=%ld dest_d-ino=%ld dest_d-name=%s\n",
+		     dir->i_ino, inode->i_ino, dentry->d_name.name,
+		     d_parent(dentry)->i_ino,
+		     dest_dentry->d_inode->i_ino, dest_dentry->d_name.name);
+
+	if (inode->i_nlink >= ZUFS_LINK_MAX)
+		return -EMLINK;
+
+	ihold(inode);
+
+	err = zuf_add_dentry(dir, &dentry->d_name, inode, false);
+	if (unlikely(err)) {
+		iput(inode);
+		return err;
+	}
+
+	inode->i_ctime = current_time(dir);
+
+	set_nlink(inode, le16_to_cpu(zus_zi(inode)->i_nlink));
+
+	d_instantiate(dentry, inode);
+
+	return 0;
+}
+
+static int zuf_unlink(struct inode *dir, struct dentry *dentry)
+{
+	struct inode *inode = dentry->d_inode;
+	int err;
+
+	zuf_dbg_vfs("[%ld] dentry-ino=%ld dentry-name=%s dentry-parent=%ld\n",
+		     dir->i_ino, inode->i_ino, dentry->d_name.name,
+		     d_parent(dentry)->i_ino);
+
+	err = zuf_remove_dentry(dir, &dentry->d_name);
+	if (unlikely(err))
+		return err;
+
+	inode->i_ctime = dir->i_ctime;
+
+	set_nlink(inode, le16_to_cpu(ZUII(inode)->zi->i_nlink));
+
+	return 0;
+}
+
+static int zuf_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
+{
+	struct inode *inode;
+
+	zuf_dbg_vfs("[%ld] dentry-name=%s dentry-parent=%ld mode=0x%x\n",
+		     dir->i_ino, dentry->d_name.name, d_parent(dentry)->i_ino,
+		     mode);
+
+	if (dir->i_nlink >= ZUFS_LINK_MAX)
+		return -EMLINK;
+
+	inode = zuf_new_inode(dir, S_IFDIR | mode, &dentry->d_name, NULL, 0,
+			      false);
+	if (IS_ERR(inode))
+		return PTR_ERR(inode);
+
+	inode->i_op = &zuf_dir_inode_operations;
+	inode->i_fop = &zuf_dir_operations;
+	inode->i_mapping->a_ops = &zuf_aops;
+
+	set_nlink(dir, le16_to_cpu(ZUII(inode)->zi->i_nlink));
+
+	_instantiate_unlock(dentry, inode);
+
+	return 0;
+}
+
+static bool _empty_dir(struct inode *dir)
+{
+	if (dir->i_nlink != 2) {
+		zuf_warn("[%ld] directory has nlink(%d) != 2\n",
+			  dir->i_ino, dir->i_nlink);
+		return false;
+	}
+	/* NOTE: Above is not the only -ENOTEMPTY the zus-fs will need to check
+	 * for the "only-files" no subdirs case. And return -ENOTEMPTY below
+	 */
+	return true;
+}
+
+static int zuf_rmdir(struct inode *dir, struct dentry *dentry)
+{
+	struct inode *inode = dentry->d_inode;
+	int err;
+
+	zuf_dbg_vfs("[%ld] dentry-ino=%ld dentry-name=%s dentry-parent=%ld\n",
+		     dir->i_ino, inode->i_ino, dentry->d_name.name,
+		     d_parent(dentry)->i_ino);
+
+	if (!inode)
+		return -ENOENT;
+
+	if (!_empty_dir(inode))
+		return -ENOTEMPTY;
+
+	err = zuf_remove_dentry(dir, &dentry->d_name);
+	if (unlikely(err))
+		return err;
+
+	inode->i_ctime = dir->i_ctime;
+
+	set_nlink(inode, le16_to_cpu(zus_zi(inode)->i_nlink));
+	set_nlink(dir, le16_to_cpu(zus_zi(dir)->i_nlink));
+
+	return 0;
+}
+
+/* Structure of a directory element; */
+struct zuf_dir_element {
+	__le64  ino;
+	char name[254];
+};
+
+static int _rename_exchange(struct inode *old_inode, struct inode *new_inode,
+			    struct inode *old_dir, struct inode *new_dir)
+{
+	/* A subdir holds a ref on parent, see if we need to exchange refs */
+	if ((S_ISDIR(old_inode->i_mode) != S_ISDIR(new_inode->i_mode)) &&
+	    (old_dir != new_dir)) {
+		if (S_ISDIR(old_inode->i_mode)) {
+			if (ZUFS_LINK_MAX <= new_dir->i_nlink)
+				return -EMLINK;
+		} else {
+			if (ZUFS_LINK_MAX <= old_dir->i_nlink)
+				return -EMLINK;
+		}
+	}
+
+	set_nlink(old_dir, le16_to_cpu(zus_zi(old_dir)->i_nlink));
+	set_nlink(new_dir, le16_to_cpu(zus_zi(new_dir)->i_nlink));
+
+	/* Update Directory times */
+	mt_to_timespec(&old_dir->i_mtime, &zus_zi(old_dir)->i_mtime);
+	mt_to_timespec(&old_dir->i_ctime, &zus_zi(old_dir)->i_ctime);
+	if (old_dir != new_dir) {
+		mt_to_timespec(&new_dir->i_mtime, &zus_zi(new_dir)->i_mtime);
+		mt_to_timespec(&new_dir->i_ctime, &zus_zi(new_dir)->i_ctime);
+	}
+	return 0;
+}
+
+static int zuf_rename(struct inode *old_dir, struct dentry *old_dentry,
+		      struct inode *new_dir, struct dentry *new_dentry,
+		      uint flags)
+{
+	struct inode *old_inode = d_inode(old_dentry);
+	struct inode *new_inode = d_inode(new_dentry);
+	struct zuf_sb_info *sbi = SBI(old_inode->i_sb);
+	struct zufs_ioc_rename ioc_rename = {
+		.hdr.in_len = sizeof(ioc_rename),
+		.hdr.out_len = sizeof(ioc_rename),
+		.hdr.operation = ZUS_OP_RENAME,
+		.old_dir_ii = ZUII(old_dir)->zus_ii,
+		.new_dir_ii = ZUII(new_dir)->zus_ii,
+		.old_zus_ii = old_inode ? ZUII(old_inode)->zus_ii : NULL,
+		.new_zus_ii = new_inode ? ZUII(new_inode)->zus_ii : NULL,
+		.old_d_str.len = old_dentry->d_name.len,
+		.new_d_str.len = new_dentry->d_name.len,
+	};
+	struct timespec time = current_time(old_dir);
+	int err;
+
+	zuf_dbg_vfs("old_inode=%ld new_inode=%ld old_name=%s new_name=%s f=0x%x\n",
+		     old_inode ? old_inode->i_ino : 0,
+		     new_inode ? new_inode->i_ino : 0, old_dentry->d_name.name,
+		     new_dentry->d_name.name, flags);
+
+	if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE /*| RENAME_WHITEOUT*/))
+		return -EINVAL;
+
+	if (!(flags & RENAME_EXCHANGE) && S_ISDIR(old_inode->i_mode)) {
+		if (new_inode) {
+			if (!_empty_dir(new_inode))
+				return -ENOTEMPTY;
+		} else if (ZUFS_LINK_MAX <= new_dir->i_nlink) {
+			return -EMLINK;
+		}
+	}
+
+	memcpy(&ioc_rename.old_d_str.name, old_dentry->d_name.name,
+		old_dentry->d_name.len);
+	memcpy(&ioc_rename.new_d_str.name, new_dentry->d_name.name,
+		new_dentry->d_name.len);
+	timespec_to_mt(&ioc_rename.time, &time);
+
+	err = zufs_dispatch(ZUF_ROOT(sbi), &ioc_rename.hdr, NULL, 0);
+	if (unlikely(err)) {
+		zuf_err("zufs_dispatch failed => %d\n", err);
+		return err;
+	}
+
+	if (flags & RENAME_EXCHANGE)
+		return _rename_exchange(old_inode, new_inode, old_dir, new_dir);
+
+	mt_to_timespec(&new_dir->i_mtime, &zus_zi(new_dir)->i_mtime);
+	mt_to_timespec(&new_dir->i_ctime, &zus_zi(new_dir)->i_ctime);
+
+	if (new_inode) {
+		struct zus_inode *new_zi = zus_zi(new_inode);
+
+		set_nlink(new_inode, le16_to_cpu(new_zi->i_nlink));
+		mt_to_timespec(&new_inode->i_ctime, &new_zi->i_ctime);
+	} else {
+		struct zus_inode *old_zi = zus_zi(old_inode);
+
+		mt_to_timespec(&old_inode->i_ctime, &old_zi->i_ctime);
+	}
+
+	if (S_ISDIR(old_inode->i_mode)) {
+		set_nlink(old_dir, le16_to_cpu(zus_zi(old_dir)->i_nlink));
+		if (!new_inode)
+			set_nlink(new_dir,
+				  le16_to_cpu(zus_zi(new_dir)->i_nlink));
+	}
+
+	return 0;
+}
+
+const struct inode_operations zuf_dir_inode_operations = {
+	.create		= zuf_create,
+	.lookup		= zuf_lookup,
+	.link		= zuf_link,
+	.unlink		= zuf_unlink,
+	.symlink	= zuf_symlink,
+	.mkdir		= zuf_mkdir,
+	.rmdir		= zuf_rmdir,
+	.mknod		= zuf_mknod,
+	.tmpfile	= zuf_tmpfile,
+	.rename		= zuf_rename,
+	.setattr	= zuf_setattr,
+	.getattr	= zuf_getattr,
+	.update_time	= zuf_update_time,
+};
+
+const struct inode_operations zuf_special_inode_operations = {
+	.setattr	= zuf_setattr,
+	.getattr	= zuf_getattr,
+	.update_time	= zuf_update_time,
+};
diff --git a/fs/zuf/symlink.c b/fs/zuf/symlink.c
new file mode 100644
index 0000000..8188225
--- /dev/null
+++ b/fs/zuf/symlink.c
@@ -0,0 +1,76 @@
+/*
+ * BRIEF DESCRIPTION
+ *
+ * Symlink operations
+ *
+ * Copyright (c) 2018 NetApp Inc. All rights reserved.
+ *
+ * ZUFS-License: GPL-2.0 OR BSD-3-Clause. See module.c for LICENSE details.
+ *
+ * Authors:
+ *	Boaz Harrosh <boazh@netapp.com>
+ *	Sagi Manole <sagim@netapp.com>"
+ */
+
+#include "zuf.h"
+
+/* Can never fail all checks already made before.
+ * Returns: The number of pages stored @pages
+ */
+uint zuf_prepare_symname(struct zufs_ioc_new_inode *ioc_new_inode,
+			 const char *symname, ulong len,
+			 struct page *pages[2])
+{
+	uint nump;
+
+	ioc_new_inode->zi.i_size = cpu_to_le64(len);
+	if (len < sizeof(ioc_new_inode->zi.i_symlink)) {
+		memcpy(&ioc_new_inode->zi.i_symlink, symname, len);
+		return 0;
+	}
+
+	pages[0] = virt_to_page(symname);
+	nump = 1;
+
+	ioc_new_inode->hdr.len = len;
+	ioc_new_inode->hdr.offset = (ulong)symname & (PAGE_SIZE - 1);
+
+	if (PAGE_SIZE < ioc_new_inode->hdr.offset + len) {
+		pages[1] = virt_to_page(symname + PAGE_SIZE);
+		++nump;
+	}
+
+	return nump;
+}
+
+static const char *zuf_get_link(struct dentry *dentry, struct inode *inode,
+				struct delayed_call *notused)
+{
+	struct zuf_inode_info *zii = ZUII(inode);
+	struct zufs_ioc_get_link ioc_get_link = {
+		.hdr.in_len = sizeof(ioc_get_link),
+		.hdr.out_len = sizeof(ioc_get_link),
+		.hdr.operation = ZUS_OP_GET_SYMLINK,
+		.zus_ii = zii->zus_ii,
+	};
+	int err;
+
+	if (inode->i_size < sizeof(zii->zi->i_symlink))
+		return zii->zi->i_symlink;
+
+	err = zufs_dispatch(ZUF_ROOT(SBI(inode->i_sb)), &ioc_get_link.hdr,
+			    NULL, 0);
+	if (unlikely(err)) {
+		zuf_err("zufs_dispatch failed => %d\n", err);
+		return ERR_PTR(err);
+	}
+
+	return md_addr(SBI(inode->i_sb)->md, ioc_get_link._link);
+}
+
+const struct inode_operations zuf_symlink_inode_operations = {
+	.get_link	= zuf_get_link,
+	.update_time	= zuf_update_time,
+	.setattr	= zuf_setattr,
+	.getattr	= zuf_getattr,
+};
diff --git a/fs/zuf/zus_api.h b/fs/zuf/zus_api.h
index d461782..5870d63 100644
--- a/fs/zuf/zus_api.h
+++ b/fs/zuf/zus_api.h
@@ -20,6 +20,10 @@
 #include <stddef.h>
 #include <asm/statfs.h>
 
+/* TODO: Someone forgot i_version for STATX_ attrs should send a patch to add it
+ */
+#define ZUFS_STATX_VERSION	0x40000000U
+
 /*
  * Version rules:
  *   This is the zus-to-zuf API version. And not the Filesystem
@@ -337,6 +341,28 @@ enum e_zufs_operation {
 	ZUS_OP_NULL = 0,
 	ZUS_OP_STATFS,
 
+	ZUS_OP_NEW_INODE,
+	ZUS_OP_FREE_INODE,
+	ZUS_OP_EVICT_INODE,
+
+	ZUS_OP_LOOKUP,
+	ZUS_OP_ADD_DENTRY,
+	ZUS_OP_REMOVE_DENTRY,
+	ZUS_OP_RENAME,
+	ZUS_OP_READDIR,
+	ZUS_OP_CLONE,
+	ZUS_OP_COPY,
+
+	ZUS_OP_READ,
+	ZUS_OP_WRITE,
+	ZUS_OP_GET_BLOCK,
+	ZUS_OP_GET_SYMLINK,
+	ZUS_OP_SETATTR,
+	ZUS_OP_UPDATE_TIME,
+	ZUS_OP_SYNC,
+	ZUS_OP_FALLOCATE,
+	ZUS_OP_LLSEEK,
+
 	ZUS_OP_BREAK,		/* Kernel telling Server to exit */
 	ZUS_OP_MAX_OPT,
 };
@@ -351,4 +377,212 @@ struct zufs_ioc_statfs {
 	struct statfs64 statfs_out;
 };
 
+/* zufs_ioc_new_inode flags: */
+enum zi_flags {
+	ZI_TMPFILE = 1,		/* for new_inode */
+	ZI_LOOKUP_RACE = 1,	/* for evict */
+};
+
+struct zufs_str {
+	__u8 len;
+	char name[ZUFS_NAME_LEN];
+};
+
+/* ZUS_OP_NEW_INODE */
+struct zufs_ioc_new_inode {
+	struct zufs_ioc_hdr hdr;
+	 /* IN */
+	struct zus_inode zi;
+	struct zus_inode_info *dir_ii; /* If mktmp this is the root */
+	struct zufs_str str;
+	__u64 flags;
+
+	 /* OUT */
+	zu_dpp_t _zi;
+	struct zus_inode_info *zus_ii;
+};
+
+/* ZUS_OP_FREE_INODE, ZUS_OP_EVICT_INODE */
+struct zufs_ioc_evict_inode {
+	struct zufs_ioc_hdr hdr;
+	/* IN */
+	struct zus_inode_info *zus_ii;
+	__u64 flags;
+};
+
+/* ZUS_OP_LOOKUP */
+struct zufs_ioc_lookup {
+	struct zufs_ioc_hdr hdr;
+	/* IN */
+	struct zus_inode_info *dir_ii;
+	struct zufs_str str;
+
+	 /* OUT */
+	zu_dpp_t _zi;
+	struct zus_inode_info *zus_ii;
+};
+
+/* ZUS_OP_ADD_DENTRY, ZUS_OP_REMOVE_DENTRY */
+struct zufs_ioc_dentry {
+	struct zufs_ioc_hdr hdr;
+	struct zus_inode_info *zus_ii; /* IN */
+	struct zus_inode_info *zus_dir_ii; /* IN */
+	struct zufs_str str; /* IN */
+	__u64 ino; /* OUT - only for lookup */
+};
+
+/* ZUS_OP_RENAME */
+struct zufs_ioc_rename {
+	struct zufs_ioc_hdr hdr;
+	/* IN */
+	struct zus_inode_info *old_dir_ii;
+	struct zus_inode_info *new_dir_ii;
+	struct zus_inode_info *old_zus_ii;
+	struct zus_inode_info *new_zus_ii;
+	struct zufs_str old_d_str;
+	struct zufs_str new_d_str;
+	__le64 time;
+};
+
+/* ZUS_OP_READDIR */
+struct zufs_ioc_readdir {
+	struct zufs_ioc_hdr hdr;
+	/* IN */
+	struct zus_inode_info *dir_ii;
+	loff_t pos;
+
+	/* OUT */
+	__u8	more;
+};
+
+struct zufs_dir_entry {
+	__le64 ino;
+	struct {
+		unsigned	type	: 8;
+		ulong		pos	: 56;
+	};
+	struct zufs_str zstr;
+};
+
+struct zufs_readdir_iter {
+	void *__zde, *last;
+	struct zufs_ioc_readdir *ioc_readdir;
+};
+
+enum {E_ZDE_HDR_SIZE =
+	offsetof(struct zufs_dir_entry, zstr) + offsetof(struct zufs_str, name),
+};
+
+static inline void zufs_readdir_iter_init(struct zufs_readdir_iter *rdi,
+					  struct zufs_ioc_readdir *ioc_readdir,
+					  void *app_ptr)
+{
+	rdi->__zde = app_ptr;
+	rdi->last = app_ptr + ioc_readdir->hdr.len;
+	rdi->ioc_readdir = ioc_readdir;
+	ioc_readdir->more = false;
+}
+
+static inline uint zufs_dir_entry_len(__u8 name_len)
+{
+	return ALIGN(E_ZDE_HDR_SIZE + name_len, sizeof(__u64));
+}
+
+static inline
+struct zufs_dir_entry *zufs_next_zde(struct zufs_readdir_iter *rdi)
+{
+	struct zufs_dir_entry *zde = rdi->__zde;
+	uint len;
+
+	if (rdi->last <= rdi->__zde + E_ZDE_HDR_SIZE)
+		return NULL;
+	if (zde->zstr.len == 0)
+		return NULL;
+	len = zufs_dir_entry_len(zde->zstr.len);
+	if (rdi->last <= rdi->__zde + len)
+		return NULL;
+
+	rdi->__zde += len;
+	return zde;
+}
+
+static inline bool zufs_zde_emit(struct zufs_readdir_iter *rdi, __u64 ino,
+				 __u8 type, __u64 pos, const char *name,
+				 __u8 len)
+{
+	struct zufs_dir_entry *zde = rdi->__zde;
+
+	if (rdi->last <= rdi->__zde + zufs_dir_entry_len(len)) {
+		rdi->ioc_readdir->more = true;
+		return false;
+	}
+
+	rdi->ioc_readdir->more = 0;
+	zde->ino = ino;
+	zde->type = type;
+	/*ASSERT(0 == (pos && (1 << 56 - 1)));*/
+	zde->pos = pos;
+	strncpy(zde->zstr.name, name, len);
+	zde->zstr.len = len;
+	zufs_next_zde(rdi);
+
+	return true;
+}
+
+/* ZUS_OP_GET_SYMLINK */
+struct zufs_ioc_get_link {
+	struct zufs_ioc_hdr hdr;
+	/* IN */
+	struct zus_inode_info *zus_ii;
+
+	/* OUT */
+	zu_dpp_t _link;
+};
+
+/* ZUS_OP_SETATTR */
+struct zufs_ioc_attr {
+	struct zufs_ioc_hdr hdr;
+	/* IN */
+	struct zus_inode_info *zus_ii;
+	__u64 truncate_size;
+	__u32 zuf_attr;
+	__u32 pad;
+};
+
+/* ZUS_OP_ISYNC, ZUS_OP_FALLOCATE */
+struct zufs_ioc_range {
+	struct zufs_ioc_hdr hdr;
+	/* IN */
+	struct zus_inode_info *zus_ii;
+	__u64 offset, length;
+	__u32 opflags;
+	__u32 pad;
+
+	/* OUT */
+	__u64 write_unmapped;
+};
+
+/* ZUS_OP_CLONE */
+struct zufs_ioc_clone {
+	struct zufs_ioc_hdr hdr;
+	/* IN */
+	struct zus_inode_info *src_zus_ii;
+	struct zus_inode_info *dst_zus_ii;
+	__u64 pos_in, pos_out;
+	__u64 len;
+};
+
+/* ZUS_OP_LLSEEK */
+struct zufs_ioc_seek {
+	struct zufs_ioc_hdr hdr;
+	/* IN */
+	struct zus_inode_info *zus_ii;
+	__u64 offset_in;
+	__u32 whence;
+	__u32 pad;
+
+	/* OUT */
+	__u64 offset_out;
+};
+
 #endif /* _LINUX_ZUFS_API_H */
-- 
2.5.5

  reply	other threads:[~2018-03-13 17:40 UTC|newest]

Thread overview: 39+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-03-13 17:14 [RFC 0/7] first draft of ZUFS - the Kernel part Boaz Harrosh
2018-03-13 17:15 ` [RFC 1/7] mm: Add new vma flag VM_LOCAL_CPU Boaz Harrosh
2018-03-13 18:56   ` Matthew Wilcox
2018-03-14  8:20     ` Miklos Szeredi
2018-03-14 11:17       ` Matthew Wilcox
2018-03-14 11:31         ` Miklos Szeredi
2018-03-14 11:45           ` Matthew Wilcox
2018-03-14 14:49             ` Miklos Szeredi
2018-03-14 14:57               ` Matthew Wilcox
2018-03-14 15:39                 ` Miklos Szeredi
     [not found]                   ` <CAON-v2ygEDCn90C9t-zadjsd5GRgj0ECqntQSDDtO_Zjk=KoVw@mail.gmail.com>
2018-03-14 16:48                     ` Matthew Wilcox
2018-03-14 21:41       ` Boaz Harrosh
2018-03-15  8:47         ` Miklos Szeredi
2018-03-15 15:27           ` Boaz Harrosh
2018-03-15 15:34             ` Matthew Wilcox
2018-03-15 15:58               ` Boaz Harrosh
2018-03-15 16:10             ` Miklos Szeredi
2018-03-15 16:30               ` Boaz Harrosh
2018-03-15 20:42                 ` Miklos Szeredi
2018-04-25 12:21                   ` Boaz Harrosh
2018-05-07 10:46                     ` Miklos Szeredi
2018-03-13 17:17 ` [RFC 2/7] fs: Add the ZUF filesystem to the build + License Boaz Harrosh
2018-03-13 20:16   ` Andreas Dilger
2018-03-14 17:21     ` Boaz Harrosh
2018-03-15  4:21       ` Andreas Dilger
2018-03-15 13:58         ` Boaz Harrosh
2018-03-13 17:18 ` [RFC 3/7] zuf: Preliminary Documentation Boaz Harrosh
2018-03-13 20:32   ` Randy Dunlap
2018-03-14 18:01     ` Boaz Harrosh
2018-03-14 19:16       ` Randy Dunlap
2018-03-13 17:22 ` [RFC 4/7] zuf: zuf-rootfs && zuf-core Boaz Harrosh
2018-03-13 17:36   ` Boaz Harrosh
2018-03-14 12:56     ` Nikolay Borisov
2018-03-14 18:34       ` Boaz Harrosh
2018-03-13 17:25 ` [RFC 5/7] zus: Devices && mounting Boaz Harrosh
2018-03-13 17:38   ` Boaz Harrosh
2018-03-13 17:28 ` [RFC 6/7] zuf: Filesystem operations Boaz Harrosh
2018-03-13 17:39   ` Boaz Harrosh [this message]
2018-03-13 17:32 ` [RFC 7/7] zuf: Write/Read && mmap implementation Boaz Harrosh

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=ba7cddd1-9464-1b90-3738-9a5bc3dcb2b3@netapp.com \
    --to=boazh@netapp.com \
    --cc=Amit.Golander@netapp.com \
    --cc=Anna.Schumaker@netapp.com \
    --cc=Shachar.Sharon@netapp.com \
    --cc=amir73il@gmail.com \
    --cc=andy.rudoff@intel.com \
    --cc=jack@suse.cz \
    --cc=jmoyer@redhat.com \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=mszeredi@redhat.com \
    --cc=rwheeler@redhat.com \
    --cc=sagim@netapp.com \
    --cc=smfrench@gmail.com \
    --cc=sweil@redhat.com \
    --cc=swhiteho@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).