From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
To: wei.w.wang@intel.com
Cc: virtio-dev@lists.oasis-open.org, linux-kernel@vger.kernel.org,
qemu-devel@nongnu.org, virtualization@lists.linux-foundation.org,
kvm@vger.kernel.org, linux-mm@kvack.org, mst@redhat.com,
mhocko@kernel.org, akpm@linux-foundation.org,
mawilcox@microsoft.com, david@redhat.com,
cornelia.huck@de.ibm.com, mgorman@techsingularity.net,
aarcange@redhat.com, amit.shah@redhat.com, pbonzini@redhat.com,
willy@infradead.org, liliang.opensource@gmail.com,
yang.zhang.wz@gmail.com, quan.xu@aliyun.com, nilal@redhat.com,
riel@redhat.com
Subject: Re: [PATCH v19 3/7] xbitmap: add more operations
Date: Fri, 15 Dec 2017 01:29:45 +0900 [thread overview]
Message-ID: <201712150129.BFC35949.FFtFOLSOJOQHVM@I-love.SAKURA.ne.jp> (raw)
In-Reply-To: <5A31F445.6070504@intel.com>
Wei Wang wrote:
> I used the example of xb_clear_bit_range(), and xb_find_next_bit() is
> the same fundamentally. Please let me know if anywhere still looks fuzzy.
I don't think it is the same for xb_find_next_bit() with set == 0.
+ if (radix_tree_exception(bmap)) {
+ unsigned long tmp = (unsigned long)bmap;
+ unsigned long ebit = bit + 2;
+
+ if (ebit >= BITS_PER_LONG)
+ continue;
+ if (set)
+ ret = find_next_bit(&tmp, BITS_PER_LONG, ebit);
+ else
+ ret = find_next_zero_bit(&tmp, BITS_PER_LONG,
+ ebit);
+ if (ret < BITS_PER_LONG)
+ return ret - 2 + IDA_BITMAP_BITS * index;
What I'm saying is that find_next_zero_bit() will not be called if you do
"if (ebit >= BITS_PER_LONG) continue;" before calling find_next_zero_bit().
When scanning "0000000000000000000000000000000000000000000000000000000000000001",
"bit < BITS_PER_LONG - 2" case finds "0" in this word but
"bit >= BITS_PER_LONG - 2" case finds "0" in next word or segment.
I can't understand why this is correct behavior. It is too much puzzling.
> > Also, one more thing you need to check. Have you checked how long does
> > xb_find_next_set_bit(xb, 0, ULONG_MAX) on an empty xbitmap takes?
> > If it causes soft lockup warning, should we add cond_resched() ?
> > If yes, you have to document that this API might sleep. If no, you
> > have to document that the caller of this API is responsible for
> > not to pass such a large value range.
>
> Yes, that will take too long time. Probably we can document some
> comments as a reminder for the callers.
>
Then, I feel that API is poorly implemented. There is no need to brute-force
when scanning [0, ULONG_MAX] range. If you eliminate exception path and
redesign the data structure, xbitmap will become as simple as a sample
implementation shown below. Not tested yet, but I think that this will be
sufficient for what virtio-baloon wants to do; i.e. find consecutive "1" bits
quickly. I didn't test whether finding "struct ulong_list_data" using radix
tree can improve performance.
----------------------------------------
#include <linux/module.h>
#include <linux/slab.h>
#define BITMAP_LEN 1024
struct ulong_list_data {
struct list_head list;
unsigned long segment; /* prev->segment < segment < next->segment */
unsigned long bits; /* Number of bits set in this offset bitmap. */
unsigned long *bitmap; /* Offset bitmap of BITMAP_LEN bits. */
};
static struct ulong_list_data null_ulong_list = {
{ NULL, NULL }, ULONG_MAX, 0, NULL
};
struct ulong_list_head {
struct list_head list;
struct ulong_list_data *last_used;
};
static void init_ulong(struct ulong_list_head *head)
{
INIT_LIST_HEAD(&head->list);
head->last_used = &null_ulong_list;
}
static bool set_ulong(struct ulong_list_head *head, const unsigned long value)
{
struct ulong_list_data *ptr = head->last_used;
struct list_head *list = &head->list;
const unsigned long segment = value / BITMAP_LEN;
const unsigned long offset = value % BITMAP_LEN;
bool found = false;
if (ptr->segment == segment)
goto shortcut;
list_for_each_entry(ptr, &head->list, list) {
if (ptr->segment < segment) {
list = &ptr->list;
continue;
}
found = ptr->segment == segment;
break;
}
if (!found) {
ptr = kzalloc(sizeof(*ptr), GFP_NOWAIT | __GFP_NOWARN);
if (!ptr)
return false;
ptr->bitmap = kzalloc(BITMAP_LEN / 8,
GFP_NOWAIT | __GFP_NOWARN);
if (!ptr->bitmap) {
kfree(ptr);
return false;
}
ptr->segment = segment;
list_add(&ptr->list, list);
}
head->last_used = ptr;
shortcut:
if (!test_bit(offset, ptr->bitmap)) {
__set_bit(offset, ptr->bitmap);
ptr->bits++;
}
return true;
}
static void clear_ulong(struct ulong_list_head *head, const unsigned long value)
{
struct ulong_list_data *ptr = head->last_used;
const unsigned long segment = value / BITMAP_LEN;
const unsigned long offset = value % BITMAP_LEN;
if (ptr->segment == segment)
goto shortcut;
list_for_each_entry(ptr, &head->list, list) {
if (ptr->segment < segment)
continue;
if (ptr->segment == segment) {
head->last_used = ptr;
shortcut:
if (test_bit(offset, ptr->bitmap)) {
__clear_bit(offset, ptr->bitmap);
if (--ptr->bits)
return;
if (head->last_used == ptr)
head->last_used = &null_ulong_list;
list_del(&ptr->list);
kfree(ptr);
}
}
return;
}
}
static void destroy_ulong(struct ulong_list_head *head)
{
struct ulong_list_data *ptr;
struct ulong_list_data *tmp;
list_for_each_entry_safe(ptr, tmp, &head->list, list) {
list_del(&ptr->list);
kfree(ptr);
}
init_ulong(head);
}
static unsigned long get_ulong_set_range(struct ulong_list_head *head,
unsigned long *start)
{
struct ulong_list_data *ptr;
unsigned long segment = *start / BITMAP_LEN;
unsigned int offset = *start % BITMAP_LEN;
unsigned long ret = BITMAP_LEN;
list_for_each_entry(ptr, &head->list, list) {
if (ptr->segment < segment)
continue;
if (ptr->segment > segment)
offset = 0;
ret = find_next_bit(ptr->bitmap, BITMAP_LEN, offset);
if (ret == BITMAP_LEN)
continue;
break;
}
if (ret == BITMAP_LEN)
return 0;
segment = ptr->segment;
*start = segment * BITMAP_LEN + ret;
ret = find_next_zero_bit(ptr->bitmap, BITMAP_LEN, ret);
if (ret < BITMAP_LEN || segment == ULONG_MAX / BITMAP_LEN)
return segment * BITMAP_LEN + ret - *start;
ret = 0;
list_for_each_entry_continue(ptr, &head->list, list) {
if (ptr->segment != ++segment)
break;
if (ptr->bits == BITMAP_LEN)
continue;
ret = find_next_zero_bit(ptr->bitmap, BITMAP_LEN, 0);
break;
}
return segment * BITMAP_LEN + ret - *start;
}
static int __init test_init(void)
{
struct ulong_list_data *ptr;
struct ulong_list_head head;
unsigned long start = 0;
unsigned long len;
init_ulong(&head);
set_ulong(&head, 0);
set_ulong(&head, 10);
set_ulong(&head, 11);
set_ulong(&head, ULONG_MAX);
set_ulong(&head, 1000000);
set_ulong(&head, 12);
clear_ulong(&head, 11);
for (len = 1048576; len < 1048576 + BITMAP_LEN * 3; len++)
set_ulong(&head, len);
clear_ulong(&head, 1048600);
set_ulong(&head, 2000000000);
pr_info("Debug dump start\n");
list_for_each_entry(ptr, &head.list, list)
pr_info("Segment %lu %lu\n", ptr->segment, ptr->bits);
pr_info("Debug dump end\n");
while ((len = get_ulong_set_range(&head, &start)) > 0) {
pr_info("Range %lu@%lu\n", len, start);
start += len;
if (!start)
break;
}
destroy_ulong(&head);
return -EINVAL;
}
module_init(test_init);
MODULE_LICENSE("GPL");
----------------------------------------
next prev parent reply other threads:[~2017-12-14 16:29 UTC|newest]
Thread overview: 41+ messages / expand[flat|nested] mbox.gz Atom feed top
2017-12-12 11:55 [PATCH v19 0/7] Virtio-balloon Enhancement Wei Wang
2017-12-12 11:55 ` [PATCH v19 1/7] xbitmap: Introduce xbitmap Wei Wang
2017-12-12 12:53 ` Philippe Ombredanne
2017-12-15 11:05 ` kbuild test robot
2017-12-15 13:24 ` Matthew Wilcox
2017-12-16 10:10 ` Wei Wang
2017-12-12 11:55 ` [PATCH v19 2/7] xbitmap: potential improvement Wei Wang
2017-12-15 3:07 ` kbuild test robot
2017-12-12 11:55 ` [PATCH v19 3/7] xbitmap: add more operations Wei Wang
2017-12-12 13:20 ` Tetsuo Handa
2017-12-13 12:26 ` Wei Wang
2017-12-13 14:16 ` Tetsuo Handa
2017-12-14 3:47 ` Wei Wang
2017-12-14 11:47 ` [virtio-dev] " Wei Wang
2017-12-14 16:29 ` Tetsuo Handa [this message]
2017-12-14 18:12 ` Matthew Wilcox
2017-12-15 16:21 ` Tetsuo Handa
2017-12-15 18:26 ` Michael S. Tsirkin
2017-12-16 4:31 ` Tetsuo Handa
2017-12-16 5:05 ` Matthew Wilcox
2017-12-16 5:57 ` Tetsuo Handa
2017-12-15 18:49 ` Matthew Wilcox
2017-12-15 19:22 ` Matthew Wilcox
2017-12-17 13:47 ` Wang, Wei W
2017-12-17 22:18 ` Matthew Wilcox
2017-12-18 2:33 ` Wei Wang
2017-12-18 2:59 ` Matthew Wilcox
2017-12-16 10:14 ` Wei Wang
2017-12-14 12:37 ` Matthew Wilcox
2017-12-15 18:42 ` Matthew Wilcox
2017-12-16 10:12 ` Wei Wang
2017-12-16 11:28 ` Tetsuo Handa
2017-12-17 5:24 ` Wei Wang
2017-12-17 10:21 ` Tetsuo Handa
2017-12-17 11:50 ` Wang, Wei W
2017-12-17 15:16 ` Tetsuo Handa
2017-12-18 8:05 ` Wei Wang
2017-12-12 11:55 ` [PATCH v19 4/7] virtio-balloon: VIRTIO_BALLOON_F_SG Wei Wang
2017-12-12 11:55 ` [PATCH v19 5/7] mm: support reporting free page blocks Wei Wang
2017-12-12 11:55 ` [PATCH v19 6/7] virtio-balloon: VIRTIO_BALLOON_F_FREE_PAGE_VQ Wei Wang
2017-12-12 11:55 ` [PATCH v19 7/7] virtio-balloon: don't report free pages when page poisoning is enabled Wei Wang
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=201712150129.BFC35949.FFtFOLSOJOQHVM@I-love.SAKURA.ne.jp \
--to=penguin-kernel@i-love.sakura.ne.jp \
--cc=aarcange@redhat.com \
--cc=akpm@linux-foundation.org \
--cc=amit.shah@redhat.com \
--cc=cornelia.huck@de.ibm.com \
--cc=david@redhat.com \
--cc=kvm@vger.kernel.org \
--cc=liliang.opensource@gmail.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=mawilcox@microsoft.com \
--cc=mgorman@techsingularity.net \
--cc=mhocko@kernel.org \
--cc=mst@redhat.com \
--cc=nilal@redhat.com \
--cc=pbonzini@redhat.com \
--cc=qemu-devel@nongnu.org \
--cc=quan.xu@aliyun.com \
--cc=riel@redhat.com \
--cc=virtio-dev@lists.oasis-open.org \
--cc=virtualization@lists.linux-foundation.org \
--cc=wei.w.wang@intel.com \
--cc=willy@infradead.org \
--cc=yang.zhang.wz@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).