All of lore.kernel.org
 help / color / mirror / Atom feed
From: Jim Rees <rees@umich.edu>
To: Trond Myklebust <Trond.Myklebust@netapp.com>
Cc: linux-nfs@vger.kernel.org
Subject: [PATCH v1 16/25] pnfsblock: add extent manipulation functions
Date: Sun, 17 Jul 2011 19:59:03 -0400	[thread overview]
Message-ID: <1310947152-12255-17-git-send-email-rees@umich.edu> (raw)
In-Reply-To: <1310947152-12255-1-git-send-email-rees@umich.edu>

From: Fred Isaman <iisaman@citi.umich.edu>

Adds working implementations of various support functions
to handle INVAL extents, needed by writes, such as
bl_mark_sectors_init and is_sector_initialized.

[pnfsblock: fix 64-bit compiler warnings for extent manipulation]
Signed-off-by: Fred Isaman <iisaman@citi.umich.edu>
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
Signed-off-by: Benny Halevy <benny@tonian.com>
---
 fs/nfs/blocklayout/blocklayout.h |   31 +++++-
 fs/nfs/blocklayout/extents.c     |  253 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 282 insertions(+), 2 deletions(-)

diff --git a/fs/nfs/blocklayout/blocklayout.h b/fs/nfs/blocklayout/blocklayout.h
index 4e61b52..fa3d51d 100644
--- a/fs/nfs/blocklayout/blocklayout.h
+++ b/fs/nfs/blocklayout/blocklayout.h
@@ -38,6 +38,9 @@
 
 #include "../pnfs.h"
 
+#define PAGE_CACHE_SECTORS (PAGE_CACHE_SIZE >> SECTOR_SHIFT)
+#define PAGE_CACHE_SECTOR_SHIFT (PAGE_CACHE_SHIFT - SECTOR_SHIFT)
+
 struct block_mount_id {
 	spinlock_t			bm_lock;    /* protects list */
 	struct list_head		bm_devlist; /* holds pnfs_block_dev */
@@ -99,8 +102,23 @@ enum exstate4 {
 	PNFS_BLOCK_NONE_DATA		= 3  /* unmapped, it's a hole */
 };
 
+#define MY_MAX_TAGS (15) /* tag bitnums used must be less than this */
+
+struct my_tree {
+	sector_t		mtt_step_size;	/* Internal sector alignment */
+	struct list_head	mtt_stub; /* Should be a radix tree */
+};
+
 struct pnfs_inval_markings {
-	/* STUB */
+	spinlock_t	im_lock;
+	struct my_tree	im_tree;	/* Sectors that need LAYOUTCOMMIT */
+	sector_t	im_block_size;	/* Server blocksize in sectors */
+};
+
+struct pnfs_inval_tracking {
+	struct list_head it_link;
+	int		 it_sector;
+	int		 it_tags;
 };
 
 /* sector_t fields are all in 512-byte sectors */
@@ -119,7 +137,11 @@ struct pnfs_block_extent {
 static inline void
 INIT_INVAL_MARKS(struct pnfs_inval_markings *marks, sector_t blocksize)
 {
-	/* STUB */
+	spin_lock_init(&marks->im_lock);
+	INIT_LIST_HEAD(&marks->im_tree.mtt_stub);
+	marks->im_block_size = blocksize;
+	marks->im_tree.mtt_step_size = min((sector_t)PAGE_CACHE_SECTORS,
+					   blocksize);
 }
 
 enum extentclass4 {
@@ -200,8 +222,13 @@ void free_block_dev(struct pnfs_block_dev *bdev);
 struct pnfs_block_extent *
 bl_find_get_extent(struct pnfs_block_layout *bl, sector_t isect,
 		struct pnfs_block_extent **cow_read);
+int bl_mark_sectors_init(struct pnfs_inval_markings *marks,
+			     sector_t offset, sector_t length,
+			     sector_t **pages);
 void bl_put_extent(struct pnfs_block_extent *be);
 struct pnfs_block_extent *alloc_extent(void);
+struct pnfs_block_extent *get_extent(struct pnfs_block_extent *be);
+int is_sector_initialized(struct pnfs_inval_markings *marks, sector_t isect);
 int bl_add_merge_extent(struct pnfs_block_layout *bl,
 			 struct pnfs_block_extent *new);
 
diff --git a/fs/nfs/blocklayout/extents.c b/fs/nfs/blocklayout/extents.c
index 14065ca..9ace614 100644
--- a/fs/nfs/blocklayout/extents.c
+++ b/fs/nfs/blocklayout/extents.c
@@ -33,6 +33,259 @@
 #include "blocklayout.h"
 #define NFSDBG_FACILITY         NFSDBG_PNFS_LD
 
+/* Bit numbers */
+#define EXTENT_INITIALIZED 0
+#define EXTENT_WRITTEN     1
+#define EXTENT_IN_COMMIT   2
+#define INTERNAL_EXISTS    MY_MAX_TAGS
+#define INTERNAL_MASK      ((1 << INTERNAL_EXISTS) - 1)
+
+/* Returns largest t<=s s.t. t%base==0 */
+static inline sector_t normalize(sector_t s, int base)
+{
+	sector_t tmp = s; /* Since do_div modifies its argument */
+	return s - do_div(tmp, base);
+}
+
+static inline sector_t normalize_up(sector_t s, int base)
+{
+	return normalize(s + base - 1, base);
+}
+
+/* Complete stub using list while determine API wanted */
+
+/* Returns tags, or negative */
+static int32_t _find_entry(struct my_tree *tree, u64 s)
+{
+	struct pnfs_inval_tracking *pos;
+
+	dprintk("%s(%llu) enter\n", __func__, s);
+	list_for_each_entry_reverse(pos, &tree->mtt_stub, it_link) {
+		if (pos->it_sector > s)
+			continue;
+		else if (pos->it_sector == s)
+			return pos->it_tags & INTERNAL_MASK;
+		else
+			break;
+	}
+	return -ENOENT;
+}
+
+static inline
+int _has_tag(struct my_tree *tree, u64 s, int32_t tag)
+{
+	int32_t tags;
+
+	dprintk("%s(%llu, %i) enter\n", __func__, s, tag);
+	s = normalize(s, tree->mtt_step_size);
+	tags = _find_entry(tree, s);
+	if ((tags < 0) || !(tags & (1 << tag)))
+		return 0;
+	else
+		return 1;
+}
+
+/* Creates entry with tag, or if entry already exists, unions tag to it.
+ * If storage is not NULL, newly created entry will use it.
+ * Returns number of entries added, or negative on error.
+ */
+static int _add_entry(struct my_tree *tree, u64 s, int32_t tag,
+		      struct pnfs_inval_tracking *storage)
+{
+	int found = 0;
+	struct pnfs_inval_tracking *pos;
+
+	dprintk("%s(%llu, %i, %p) enter\n", __func__, s, tag, storage);
+	list_for_each_entry_reverse(pos, &tree->mtt_stub, it_link) {
+		if (pos->it_sector > s)
+			continue;
+		else if (pos->it_sector == s) {
+			found = 1;
+			break;
+		} else
+			break;
+	}
+	if (found) {
+		pos->it_tags |= (1 << tag);
+		return 0;
+	} else {
+		struct pnfs_inval_tracking *new;
+		if (storage)
+			new = storage;
+		else {
+			new = kmalloc(sizeof(*new), GFP_NOFS);
+			if (!new)
+				return -ENOMEM;
+		}
+		new->it_sector = s;
+		new->it_tags = (1 << tag);
+		list_add(&new->it_link, &pos->it_link);
+		return 1;
+	}
+}
+
+/* XXXX Really want option to not create */
+/* Over range, unions tag with existing entries, else creates entry with tag */
+static int _set_range(struct my_tree *tree, int32_t tag, u64 s, u64 length)
+{
+	u64 i;
+
+	dprintk("%s(%i, %llu, %llu) enter\n", __func__, tag, s, length);
+	for (i = normalize(s, tree->mtt_step_size); i < s + length;
+	     i += tree->mtt_step_size)
+		if (_add_entry(tree, i, tag, NULL))
+			return -ENOMEM;
+	return 0;
+}
+
+/* Ensure that future operations on given range of tree will not malloc */
+static int _preload_range(struct my_tree *tree, u64 offset, u64 length)
+{
+	u64 start, end, s;
+	int count, i, used = 0, status = -ENOMEM;
+	struct pnfs_inval_tracking **storage;
+
+	dprintk("%s(%llu, %llu) enter\n", __func__, offset, length);
+	start = normalize(offset, tree->mtt_step_size);
+	end = normalize_up(offset + length, tree->mtt_step_size);
+	count = (int)(end - start) / (int)tree->mtt_step_size;
+
+	/* Pre-malloc what memory we might need */
+	storage = kmalloc(sizeof(*storage) * count, GFP_NOFS);
+	if (!storage)
+		return -ENOMEM;
+	for (i = 0; i < count; i++) {
+		storage[i] = kmalloc(sizeof(struct pnfs_inval_tracking),
+				     GFP_NOFS);
+		if (!storage[i])
+			goto out_cleanup;
+	}
+
+	/* Now need lock - HOW??? */
+
+	for (s = start; s < end; s += tree->mtt_step_size)
+		used += _add_entry(tree, s, INTERNAL_EXISTS, storage[used]);
+
+	/* Unlock - HOW??? */
+	status = 0;
+
+ out_cleanup:
+	for (i = used; i < count; i++) {
+		if (!storage[i])
+			break;
+		kfree(storage[i]);
+	}
+	kfree(storage);
+	return status;
+}
+
+static void set_needs_init(sector_t *array, sector_t offset)
+{
+	sector_t *p = array;
+
+	dprintk("%s enter\n", __func__);
+	if (!p)
+		return;
+	while (*p < offset)
+		p++;
+	if (*p == offset)
+		return;
+	else if (*p == ~0) {
+		*p++ = offset;
+		*p = ~0;
+		return;
+	} else {
+		sector_t *save = p;
+		dprintk("%s Adding %llu\n", __func__, (u64)offset);
+		while (*p != ~0)
+			p++;
+		p++;
+		memmove(save + 1, save, (char *)p - (char *)save);
+		*save = offset;
+		return;
+	}
+}
+
+/* We are relying on page lock to serialize this */
+int is_sector_initialized(struct pnfs_inval_markings *marks, sector_t isect)
+{
+	int rv;
+
+	spin_lock(&marks->im_lock);
+	rv = _has_tag(&marks->im_tree, isect, EXTENT_INITIALIZED);
+	spin_unlock(&marks->im_lock);
+	return rv;
+}
+
+/* Marks sectors in [offest, offset_length) as having been initialized.
+ * All lengths are step-aligned, where step is min(pagesize, blocksize).
+ * Notes where partial block is initialized, and helps prepare it for
+ * complete initialization later.
+ */
+/* Currently assumes offset is page-aligned */
+int bl_mark_sectors_init(struct pnfs_inval_markings *marks,
+			     sector_t offset, sector_t length,
+			     sector_t **pages)
+{
+	sector_t s, start, end;
+	sector_t *array = NULL; /* Pages to mark */
+
+	dprintk("%s(offset=%llu,len=%llu) enter\n",
+		__func__, (u64)offset, (u64)length);
+	s = max((sector_t) 3,
+		2 * (marks->im_block_size / (PAGE_CACHE_SECTORS)));
+	dprintk("%s set max=%llu\n", __func__, (u64)s);
+	if (pages) {
+		array = kmalloc(s * sizeof(sector_t), GFP_NOFS);
+		if (!array)
+			goto outerr;
+		array[0] = ~0;
+	}
+
+	start = normalize(offset, marks->im_block_size);
+	end = normalize_up(offset + length, marks->im_block_size);
+	if (_preload_range(&marks->im_tree, start, end - start))
+		goto outerr;
+
+	spin_lock(&marks->im_lock);
+
+	for (s = normalize_up(start, PAGE_CACHE_SECTORS);
+	     s < offset; s += PAGE_CACHE_SECTORS) {
+		dprintk("%s pre-area pages\n", __func__);
+		/* Portion of used block is not initialized */
+		if (!_has_tag(&marks->im_tree, s, EXTENT_INITIALIZED))
+			set_needs_init(array, s);
+	}
+	if (_set_range(&marks->im_tree, EXTENT_INITIALIZED, offset, length))
+		goto out_unlock;
+	for (s = normalize_up(offset + length, PAGE_CACHE_SECTORS);
+	     s < end; s += PAGE_CACHE_SECTORS) {
+		dprintk("%s post-area pages\n", __func__);
+		if (!_has_tag(&marks->im_tree, s, EXTENT_INITIALIZED))
+			set_needs_init(array, s);
+	}
+
+	spin_unlock(&marks->im_lock);
+
+	if (pages) {
+		if (array[0] == ~0) {
+			kfree(array);
+			*pages = NULL;
+		} else
+			*pages = array;
+	}
+	return 0;
+
+ out_unlock:
+	spin_unlock(&marks->im_lock);
+ outerr:
+	if (pages) {
+		kfree(array);
+		*pages = NULL;
+	}
+	return -ENOMEM;
+}
+
 static void print_bl_extent(struct pnfs_block_extent *be)
 {
 	dprintk("PRINT EXTENT extent %p\n", be);
-- 
1.7.4.1


  parent reply	other threads:[~2011-07-17 23:59 UTC|newest]

Thread overview: 26+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2011-07-17 23:58 [PATCH v1 00/25] add block layout driver to pnfs client Jim Rees
2011-07-17 23:58 ` [PATCH v1 01/25] pnfs: GETDEVICELIST Jim Rees
2011-07-17 23:58 ` [PATCH v1 02/25] pnfs: add set-clear layoutdriver interface Jim Rees
2011-07-17 23:58 ` [PATCH v1 03/25] NFS41: Let layoutcommit handle multiple segments Jim Rees
2011-07-17 23:58 ` [PATCH v1 04/25] NFS41: save layoutcommit cred after first successful layoutget Jim Rees
2011-07-17 23:58 ` [PATCH v1 05/25] pnfs: ask for layout_blksize and save it in nfs_server Jim Rees
2011-07-17 23:58 ` [PATCH v1 06/25] pnfs: cleanup_layoutcommit Jim Rees
2011-07-17 23:58 ` [PATCH v1 07/25] pnfsblock: add blocklayout Kconfig option, Makefile, and stubs Jim Rees
2011-07-17 23:58 ` [PATCH v1 08/25] pnfsblock: basic extent code Jim Rees
2011-07-17 23:58 ` [PATCH v1 09/25] pnfsblock: add device operations Jim Rees
2011-07-17 23:58 ` [PATCH v1 10/25] pnfsblock: remove " Jim Rees
2011-07-17 23:58 ` [PATCH v1 11/25] pnfsblock: lseg alloc and free Jim Rees
2011-07-17 23:58 ` [PATCH v1 12/25] pnfsblock: merge extents Jim Rees
2011-07-17 23:59 ` [PATCH v1 13/25] pnfsblock: call and parse getdevicelist Jim Rees
2011-07-17 23:59 ` [PATCH v1 14/25] pnfsblock: xdr decode pnfs_block_layout4 Jim Rees
2011-07-17 23:59 ` [PATCH v1 15/25] pnfsblock: bl_find_get_extent Jim Rees
2011-07-17 23:59 ` Jim Rees [this message]
2011-07-17 23:59 ` [PATCH v1 17/25] pnfsblock: merge rw extents Jim Rees
2011-07-17 23:59 ` [PATCH v1 18/25] pnfsblock: encode_layoutcommit Jim Rees
2011-07-17 23:59 ` [PATCH v1 19/25] pnfsblock: cleanup_layoutcommit Jim Rees
2011-07-17 23:59 ` [PATCH v1 20/25] pnfsblock: bl_read_pagelist Jim Rees
2011-07-17 23:59 ` [PATCH v1 21/25] pnfsblock: bl_write_pagelist Jim Rees
2011-07-17 23:59 ` [PATCH v1 22/25] pnfsblock: note written INVAL areas for layoutcommit Jim Rees
2011-07-17 23:59 ` [PATCH v1 23/25] pnfsblock: Implement release_inval_marks Jim Rees
2011-07-17 23:59 ` [PATCH v1 24/25] pnfsblock: use pageio_ops api Jim Rees
2011-07-17 23:59 ` [PATCH v1 25/25] pnfsblock: write_pagelist handle zero invalid extents Jim Rees

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1310947152-12255-17-git-send-email-rees@umich.edu \
    --to=rees@umich.edu \
    --cc=Trond.Myklebust@netapp.com \
    --cc=linux-nfs@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.