From: Jim Rees <rees@umich.edu>
To: Benny Halevy <bhalevy@panasas.com>
Cc: linux-nfs@vger.kernel.org, peter honeyman <honey@citi.umich.edu>
Subject: [PATCH 50/88] pnfsblock: Lookup list entry of layouts and tags in reverse order
Date: Tue, 7 Jun 2011 13:32:04 -0400 [thread overview]
Message-ID: <249d0ef858a8f01a515db6584a5e2d55f4731b72.1307464382.git.rees@umich.edu> (raw)
In-Reply-To: <cover.1307464381.git.rees@umich.edu>
From: Zhang Jingwang <zhangjingwang@nrchpc.ac.cn>
Optimize for sequencial write. Layout infos and tags are organized by
file offset. When appending data to a file whole list will be examined,
which introduce notable performance decrease.
Signed-off-by: Zhang Jingwang <zhangjingwang@nrchpc.ac.cn>
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
---
fs/nfs/blocklayout/extents.c | 126 +++++++++++++++++++++---------------------
1 files changed, 64 insertions(+), 62 deletions(-)
diff --git a/fs/nfs/blocklayout/extents.c b/fs/nfs/blocklayout/extents.c
index cf5b3a3..6c26cd4 100644
--- a/fs/nfs/blocklayout/extents.c
+++ b/fs/nfs/blocklayout/extents.c
@@ -60,8 +60,8 @@ static int32_t _find_entry(struct my_tree_t *tree, u64 s)
struct pnfs_inval_tracking *pos;
dprintk("%s(%llu) enter\n", __func__, s);
- list_for_each_entry(pos, &tree->mtt_stub, it_link) {
- if (pos->it_sector < s)
+ list_for_each_entry_reverse(pos, &tree->mtt_stub, it_link) {
+ if (pos->it_sector > s)
continue;
else if (pos->it_sector == s)
return pos->it_tags & INTERNAL_MASK;
@@ -96,8 +96,8 @@ static int _add_entry(struct my_tree_t *tree, u64 s, int32_t tag,
struct pnfs_inval_tracking *pos;
dprintk("%s(%llu, %i, %p) enter\n", __func__, s, tag, storage);
- list_for_each_entry(pos, &tree->mtt_stub, it_link) {
- if (pos->it_sector < s)
+ list_for_each_entry_reverse(pos, &tree->mtt_stub, it_link) {
+ if (pos->it_sector > s)
continue;
else if (pos->it_sector == s) {
found = 1;
@@ -119,7 +119,7 @@ static int _add_entry(struct my_tree_t *tree, u64 s, int32_t tag,
}
new->it_sector = s;
new->it_tags = (1 << tag);
- list_add_tail(&new->it_link, &pos->it_link);
+ list_add(&new->it_link, &pos->it_link);
return 1;
}
}
@@ -225,14 +225,14 @@ _range_has_tag(struct my_tree_t *tree, u64 start, u64 end, int32_t tag)
u64 expect = 0;
dprintk("%s(%llu, %llu, %i) enter\n", __func__, start, end, tag);
- list_for_each_entry(pos, &tree->mtt_stub, it_link) {
- if (pos->it_sector < start)
+ list_for_each_entry_reverse(pos, &tree->mtt_stub, it_link) {
+ if (pos->it_sector >= end)
continue;
if (!expect) {
- if ((pos->it_sector == start) &&
+ if ((pos->it_sector == end - tree->mtt_step_size) &&
(pos->it_tags & (1 << tag))) {
- expect = start + tree->mtt_step_size;
- if (expect == end)
+ expect = pos->it_sector - tree->mtt_step_size;
+ if (expect < start)
return 1;
continue;
} else {
@@ -241,8 +241,8 @@ _range_has_tag(struct my_tree_t *tree, u64 start, u64 end, int32_t tag)
}
if (pos->it_sector != expect || !(pos->it_tags & (1 << tag)))
return 0;
- expect += tree->mtt_step_size;
- if (expect == end)
+ expect -= tree->mtt_step_size;
+ if (expect < start)
return 1;
}
return 0;
@@ -589,65 +589,67 @@ add_and_merge_extent(struct pnfs_block_layout *bl,
/* Scan for proper place to insert, extending new to the left
* as much as possible.
*/
- list_for_each_entry_safe(be, tmp, list, be_node) {
- if (new->be_f_offset < be->be_f_offset)
+ list_for_each_entry_safe_reverse(be, tmp, list, be_node) {
+ if (new->be_f_offset >= be->be_f_offset + be->be_length)
break;
- if (end <= be->be_f_offset + be->be_length) {
- /* new is a subset of existing be*/
+ if (new->be_f_offset >= be->be_f_offset) {
+ if (end <= be->be_f_offset + be->be_length) {
+ /* new is a subset of existing be*/
+ if (extents_consistent(be, new)) {
+ dprintk("%s: new is subset, ignoring\n",
+ __func__);
+ put_extent(new);
+ return 0;
+ } else {
+ goto out_err;
+ }
+ } else {
+ /* |<-- be -->|
+ * |<-- new -->| */
+ if (extents_consistent(be, new)) {
+ /* extend new to fully replace be */
+ new->be_length += new->be_f_offset -
+ be->be_f_offset;
+ new->be_f_offset = be->be_f_offset;
+ new->be_v_offset = be->be_v_offset;
+ dprintk("%s: removing %p\n", __func__, be);
+ list_del(&be->be_node);
+ put_extent(be);
+ } else {
+ goto out_err;
+ }
+ }
+ } else if (end >= be->be_f_offset + be->be_length) {
+ /* new extent overlap existing be */
if (extents_consistent(be, new)) {
- dprintk("%s: new is subset, ignoring\n",
- __func__);
- put_extent(new);
- return 0;
- } else
+ /* extend new to fully replace be */
+ dprintk("%s: removing %p\n", __func__, be);
+ list_del(&be->be_node);
+ put_extent(be);
+ } else {
goto out_err;
- } else if (new->be_f_offset <=
- be->be_f_offset + be->be_length) {
- /* new overlaps or abuts existing be */
- if (extents_consistent(be, new)) {
+ }
+ } else if (end > be->be_f_offset) {
+ /* |<-- be -->|
+ *|<-- new -->| */
+ if (extents_consistent(new, be)) {
/* extend new to fully replace be */
- new->be_length += new->be_f_offset -
- be->be_f_offset;
- new->be_f_offset = be->be_f_offset;
- new->be_v_offset = be->be_v_offset;
+ new->be_length += be->be_f_offset + be->be_length -
+ new->be_f_offset - new->be_length;
dprintk("%s: removing %p\n", __func__, be);
list_del(&be->be_node);
put_extent(be);
- } else if (new->be_f_offset !=
- be->be_f_offset + be->be_length)
+ } else {
goto out_err;
+ }
}
}
/* Note that if we never hit the above break, be will not point to a
* valid extent. However, in that case &be->be_node==list.
*/
- list_add_tail(&new->be_node, &be->be_node);
+ list_add(&new->be_node, &be->be_node);
dprintk("%s: inserting new\n", __func__);
print_elist(list);
- /* Scan forward for overlaps. If we find any, extend new and
- * remove the overlapped extent.
- */
- be = list_prepare_entry(new, list, be_node);
- list_for_each_entry_safe_continue(be, tmp, list, be_node) {
- if (end < be->be_f_offset)
- break;
- /* new overlaps or abuts existing be */
- if (extents_consistent(be, new)) {
- if (end < be->be_f_offset + be->be_length) {
- /* extend new to fully cover be */
- end = be->be_f_offset + be->be_length;
- new->be_length = end - new->be_f_offset;
- }
- dprintk("%s: removing %p\n", __func__, be);
- list_del(&be->be_node);
- put_extent(be);
- } else if (end != be->be_f_offset) {
- list_del(&new->be_node);
- goto out_err;
- }
- }
- dprintk("%s: after merging\n", __func__);
- print_elist(list);
/* STUB - The per-list consistency checks have all been done,
* should now check cross-list consistency.
*/
@@ -680,10 +682,10 @@ find_get_extent(struct pnfs_block_layout *bl, sector_t isect,
if (ret &&
(!cow_read || ret->be_state != PNFS_BLOCK_INVALID_DATA))
break;
- list_for_each_entry(be, &bl->bl_extents[i], be_node) {
- if (isect < be->be_f_offset)
+ list_for_each_entry_reverse(be, &bl->bl_extents[i], be_node) {
+ if (isect >= be->be_f_offset + be->be_length)
break;
- if (isect < be->be_f_offset + be->be_length) {
+ if (isect >= be->be_f_offset) {
/* We have found an extent */
dprintk("%s Get %p (%i)\n", __func__, be,
atomic_read(&be->be_refcnt.refcount));
@@ -716,10 +718,10 @@ find_get_extent_locked(struct pnfs_block_layout *bl, sector_t isect)
for (i = 0; i < EXTENT_LISTS; i++) {
if (ret)
break;
- list_for_each_entry(be, &bl->bl_extents[i], be_node) {
- if (isect < be->be_f_offset)
+ list_for_each_entry_reverse(be, &bl->bl_extents[i], be_node) {
+ if (isect >= be->be_f_offset + be->be_length)
break;
- if (isect < be->be_f_offset + be->be_length) {
+ if (isect >= be->be_f_offset) {
/* We have found an extent */
dprintk("%s Get %p (%i)\n", __func__, be,
atomic_read(&be->be_refcnt.refcount));
--
1.7.4.1
next prev parent reply other threads:[~2011-06-07 17:32 UTC|newest]
Thread overview: 136+ messages / expand[flat|nested] mbox.gz Atom feed top
2011-06-07 17:24 [PATCH 00/88] pnfs block layout driver rees
2011-06-07 17:26 ` [PATCH 01/88] pnfs: add set-clear layoutdriver interface Jim Rees
2011-06-07 17:26 ` [PATCH 02/88] pnfs: let layoutcommit code handle multiple segments Jim Rees
2011-06-07 17:26 ` [PATCH 03/88] pnfs_post_submit: Restore "pnfs: pnfs_do_flush" part 1 Jim Rees
2011-06-07 17:26 ` [PATCH 04/88] pnfs_post_submit: Restore the pnfs_write_end part of "pnfs: commit and pnfs_write_end" Jim Rees
2011-06-07 17:26 ` [PATCH 05/88] pnfs: xdr support for three word attribute bitmap Jim Rees
2011-06-07 17:26 ` [PATCH 06/88] pnfs: HACK: ask for layout_blksize on mount Jim Rees
2011-06-07 17:26 ` [PATCH 07/88] pnfs: HACK: modify write_end_cleanup Jim Rees
2011-06-07 17:26 ` [PATCH 08/88] HACK: propagate fsdata into nfs_writepage_setup Jim Rees
2011-06-07 17:26 ` [PATCH 09/88] pnfs: HACK: adjust eof handling Jim Rees
2011-06-07 17:27 ` [PATCH 10/88] pnfsblock: define PNFS_BLOCK Kconfig option Jim Rees
2011-06-07 17:27 ` [PATCH 11/88] pnfsblock: blocklayout stub Jim Rees
2011-06-07 17:27 ` [PATCH 12/88] pnfsblock: expose scsi interface Jim Rees
2011-06-07 17:27 ` [PATCH 13/88] pnfsblock: scan scsi devices Jim Rees
2011-06-07 17:27 ` [PATCH 14/88] pnfsblock: call and parse getdevicelist Jim Rees
2011-06-07 17:27 ` [PATCH 15/88] pnfsblock: dm kernel interface Jim Rees
2011-06-07 17:27 ` [PATCH 16/88] pnfsblock: select BLK_DEV_DM when PNFS_BLOCK is configured Jim Rees
2011-06-07 17:27 ` [PATCH 17/88] pnfsblock: create and destroy dm metadevice Jim Rees
2011-06-07 17:27 ` [PATCH 18/88] pnfsblock: construct and load md table Jim Rees
2011-06-07 17:28 ` [PATCH 19/88] pnfsblock: layout alloc and free Jim Rees
2011-06-07 17:28 ` [PATCH 20/88] pnfsblock: basic extent code Jim Rees
2011-06-07 17:28 ` [PATCH 21/88] pnfsblock: lseg alloc and free Jim Rees
2011-06-07 17:28 ` [PATCH 22/88] pnfsblock: xdr decode pnfs_block_layout4 Jim Rees
2011-06-07 17:28 ` [PATCH 23/88] pnfsblock: merge extents Jim Rees
2011-06-07 17:28 ` [PATCH 24/88] pnfsblock: find_get_extent Jim Rees
2011-06-07 17:28 ` [PATCH 25/88] pnfsblock: bl_read_pagelist Jim Rees
2011-06-07 17:28 ` [PATCH 26/88] pnfsblock: allow use of PG_owner_priv_1 flag Jim Rees
2011-06-07 17:29 ` [PATCH 27/88] pnfsblock: read path error handling Jim Rees
2011-06-07 17:29 ` [PATCH 28/88] pnfsblock: SPLITME: add extent manipulation functions Jim Rees
2011-06-07 17:29 ` [PATCH 29/88] pnfsblock: write_begin Jim Rees
2011-06-07 17:29 ` [PATCH 30/88] pnfsblock: write_end Jim Rees
2011-06-07 17:29 ` [PATCH 31/88] pnfsblock: write_end_cleanup Jim Rees
2011-06-07 17:29 ` [PATCH 32/88] pnfsblock: bl_write_pagelist support functions Jim Rees
2011-06-07 17:29 ` [PATCH 33/88] pnfsblock: bl_write_pagelist Jim Rees
2011-06-07 17:29 ` [PATCH 34/88] pnfsblock: note written INVAL areas for layoutcommit Jim Rees
2011-06-07 17:30 ` [PATCH 35/88] pnfsblock: bl_setup_layoutcommit Jim Rees
2011-06-07 17:30 ` [PATCH 36/88] pnfsblock: encode_layoutcommit Jim Rees
2011-06-07 17:30 ` [PATCH 37/88] pnfsblock: cleanup_layoutcommit Jim Rees
2011-06-07 17:30 ` [PATCH 38/88] pnfsblock: merge rw extents Jim Rees
2011-06-07 17:30 ` [PATCH 39/88] pnfsblock: debugging dprintks for clist info Jim Rees
2011-06-07 17:30 ` [PATCH 40/88] SQAUSHME: blocklayoutdriver: NULL pointer reference when committing too many extents Jim Rees
2011-06-07 17:30 ` [PATCH 41/88] SQUASHME: pnfs-block: remove of CONFIG_PNFS fallout Jim Rees
2011-06-07 17:30 ` [PATCH 42/88] SQUASHME: pnfsblock: Fix a memory leak Jim Rees
2011-06-07 17:31 ` [PATCH 43/88] SQUASHME: pnfsblock: fix bug when decoding block device info Jim Rees
2011-06-07 17:31 ` [PATCH 44/88] SQUASHME: pnfsblock: Wrong extent refcount in block extents list Jim Rees
2011-06-07 17:31 ` [PATCH 45/88] SQUASHME: pnfsblock: Implement release_inval_marks Jim Rees
2011-06-07 17:31 ` [PATCH 46/88] SQUASHME: pnfsblock: Fix missing extent in commit list Jim Rees
2011-06-07 17:31 ` [PATCH 47/88] pnfsblock: use the session max response size for getdeviceinfo's maxcount Jim Rees
2011-06-07 17:31 ` [PATCH 48/88] SQUASHME: pnfs-block: fix compile breakage Jim Rees
2011-06-07 17:31 ` [PATCH 49/88] SQUASHME: pnfs-block: convert APIs pnfs-post-submit Jim Rees
2011-06-07 17:32 ` Jim Rees [this message]
2011-06-07 17:32 ` [PATCH 51/88] pnfsblock: expose block_class interface Jim Rees
2011-06-07 17:32 ` [PATCH 52/88] pnfsblock: iterating all local block disks instead of only scsi disks when initializing mount point Jim Rees
2011-06-07 17:32 ` [PATCH 53/88] SQUASHME: pnfsblock: set pnfs_blksize before calling set_pnfs_layoutdriver Jim Rees
2011-06-07 17:32 ` [PATCH 54/88] SQUASHME: pnfsblock: get rid of threshold policy ops Jim Rees
2011-06-07 17:32 ` [PATCH 55/88] SQUASHME: pnfsblock: write_begin adjust for removed fields Jim Rees
2011-06-07 17:32 ` [PATCH 56/88] SQUASHME: pnfsblock: write_end adjust for removed ok_to_use_pnfs Jim Rees
2011-06-07 17:32 ` [PATCH 57/88] SQUASHME: pnfsblock: write_end_cleanup " Jim Rees
2011-06-07 17:32 ` [PATCH 58/88] SQUASHME: pnfsblock: bl_write_pagelist support functions adjust for missing PG_USE_PNFS Jim Rees
2011-06-07 17:33 ` [PATCH 59/88] SQUASHME: pnfsblock: bl_write_pagelist " Jim Rees
2011-06-07 17:33 ` [PATCH 60/88] SQUASHME: pnfs-block: nfs4_blk_add_block_disk ret must be signed Jim Rees
2011-06-07 17:33 ` [PATCH 61/88] SQUASHME: pnfs-block: use new alloc/free_layout API Jim Rees
2011-06-07 17:33 ` [PATCH 62/88] SQUASHME: pnfs-block: use new commit api Jim Rees
2011-06-07 17:33 ` [PATCH 63/88] SQUASHME: pnfs-block: use new read_pagelist api Jim Rees
2011-06-07 17:33 ` [PATCH 64/88] SQUASHME: pnfs-block: use new write_pagelist api Jim Rees
2011-06-07 17:33 ` [PATCH 65/88] pnfs-block: Add support for simple rpc pipefs Jim Rees
2011-06-07 17:33 ` [PATCH 66/88] pnfs-block: Remove device creation from kernel Jim Rees
2011-06-07 17:33 ` [PATCH 67/88] SQUASHME: pnfs-block: apply types rename Jim Rees
2011-06-07 17:34 ` [PATCH 68/88] SQUASHME: pnfs-block: Revert "pnfsblock: expose block_class interface" Jim Rees
2011-06-07 17:34 ` [PATCH 69/88] SQUASHME: pnfsblock: remove obsolete include file from blocklayout.h Jim Rees
2011-06-07 17:34 ` [PATCH 70/88] SQUASHME: pnfsblock: use nfs4_deviceid Jim Rees
2011-06-07 17:34 ` [PATCH 71/88] SQUASHME: pnfsblock: no callback ops Jim Rees
2011-06-07 17:34 ` [PATCH 72/88] SQAUSHME: pnfsblock: no PNFS_NFS_SERVER Jim Rees
2011-06-07 17:34 ` [PATCH 73/88] SQUASHME: pnfsblock: no dev_notify_types Jim Rees
2011-06-07 17:34 ` [PATCH 74/88] SQUASHME: pnfsblock: use new struct pnfs_layout_hdr Jim Rees
2011-06-07 17:34 ` [PATCH 75/88] SQUASHME: pnfsblock: compile error in blocklayout code Jim Rees
2011-06-07 17:34 ` [PATCH 76/88] SQUASHME: pnfs-block: deprecate get_stripesize Jim Rees
2011-06-07 17:35 ` [PATCH 77/88] move include lines out of include file Jim Rees
2011-06-07 17:35 ` [PATCH 78/88] SQUASHME: pnfs-block: use {set,clear}_layoutdriver Jim Rees
2011-06-07 17:35 ` [PATCH 79/88] SQUASHME: pnfs-block: Return failure from bl_initialize_mountpoint Jim Rees
2011-06-07 17:35 ` [PATCH 80/88] SQUASHME: pnfs-block: fixup setup_layoutcommit arguments Jim Rees
2011-06-07 17:35 ` [PATCH 81/88] SQUASHME: pnfs-block: fixup cleanup_layoutcommit arguments Jim Rees
2011-06-07 17:35 ` [PATCH 82/88] SQUASHME: pnfs-block: fixup encode_layoutcommit arguments Jim Rees
2011-06-07 17:35 ` [PATCH 83/88] SQUASHME: pnfs-block: fixup layoutcommit methods args Jim Rees
2011-06-07 17:35 ` [PATCH 84/88] pnfs-block: fix blocklayoutdev.c for new blkdev_get_by_dev() Jim Rees
2011-06-07 17:35 ` [PATCH 85/88] SQUASHME: pnfs-block: use pnfs_layout_hdr field prefix Jim Rees
2011-06-07 17:35 ` [PATCH 86/88] SQUASHME: pnfs: blocklayout: port block layout code Jim Rees
2011-06-08 1:27 ` Benny Halevy
2011-06-08 2:06 ` Benny Halevy
2011-06-08 7:38 ` Peng Tao
2011-06-07 17:36 ` [PATCH 87/88] Add configurable prefetch size for layoutget Jim Rees
2011-06-08 2:01 ` Benny Halevy
2011-06-08 2:18 ` Jim Rees
2011-06-08 7:15 ` Peng Tao
2011-06-09 6:06 ` Benny Halevy
2011-06-09 11:49 ` Jim Rees
2011-06-09 13:32 ` Benny Halevy
2011-06-09 13:58 ` Jim Rees
2011-06-09 15:07 ` Peng Tao
2011-06-09 21:22 ` Benny Halevy
2011-06-10 6:00 ` tao.peng
2011-06-10 12:33 ` Benny Halevy
2011-06-10 14:09 ` tao.peng
2011-06-10 19:23 ` Benny Halevy
2011-06-10 20:03 ` Fred Isaman
2011-06-10 21:15 ` Benny Halevy
2011-06-11 1:46 ` Peng Tao
2011-06-10 23:20 ` Boaz Harrosh
2011-06-11 2:19 ` Peng Tao
2011-06-12 14:40 ` Boaz Harrosh
2011-06-12 18:46 ` Peng Tao
2011-06-11 1:35 ` Peng Tao
2011-06-09 21:23 ` Benny Halevy
2011-06-10 5:36 ` tao.peng
2011-06-10 12:36 ` Benny Halevy
2011-06-10 14:17 ` tao.peng
2011-06-10 19:02 ` Benny Halevy
2011-06-09 15:01 ` Peng Tao
2011-06-09 14:54 ` Peng Tao
2011-06-09 21:30 ` Benny Halevy
2011-06-10 6:02 ` tao.peng
2011-06-10 12:47 ` Benny Halevy
2011-06-10 14:30 ` tao.peng
2011-06-10 19:07 ` Benny Halevy
2011-06-10 16:23 ` Boaz Harrosh
2011-06-10 16:44 ` Boaz Harrosh
2011-06-09 6:08 ` Benny Halevy
2011-06-07 17:36 ` [PATCH 88/88] NFS41: do not update isize if inode needs layoutcommit Jim Rees
2011-06-08 2:05 ` Benny Halevy
2011-06-08 7:06 ` Peng Tao
2011-06-08 7:29 ` Peng Tao
2011-06-09 21:52 ` [PATCH 00/88] pnfs block layout driver Boaz Harrosh
2011-06-09 22:15 ` Jim Rees
2011-06-10 2:16 ` Boaz Harrosh
2011-06-10 2:20 ` Boaz Harrosh
2011-06-10 4:04 ` Benny Halevy
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=249d0ef858a8f01a515db6584a5e2d55f4731b72.1307464382.git.rees@umich.edu \
--to=rees@umich.edu \
--cc=bhalevy@panasas.com \
--cc=honey@citi.umich.edu \
--cc=linux-nfs@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).