linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
To: wei.w.wang@intel.com
Cc: virtio-dev@lists.oasis-open.org, linux-kernel@vger.kernel.org,
	qemu-devel@nongnu.org, virtualization@lists.linux-foundation.org,
	kvm@vger.kernel.org, linux-mm@kvack.org, mst@redhat.com,
	mhocko@kernel.org, akpm@linux-foundation.org,
	mawilcox@microsoft.com, david@redhat.com,
	cornelia.huck@de.ibm.com, mgorman@techsingularity.net,
	aarcange@redhat.com, amit.shah@redhat.com, pbonzini@redhat.com,
	willy@infradead.org, liliang.opensource@gmail.com,
	yang.zhang.wz@gmail.com, quan.xu@aliyun.com, nilal@redhat.com,
	riel@redhat.com
Subject: Re: [PATCH v19 3/7] xbitmap: add more operations
Date: Fri, 15 Dec 2017 01:29:45 +0900	[thread overview]
Message-ID: <201712150129.BFC35949.FFtFOLSOJOQHVM@I-love.SAKURA.ne.jp> (raw)
In-Reply-To: <5A31F445.6070504@intel.com>

Wei Wang wrote:
> I used the example of xb_clear_bit_range(), and xb_find_next_bit() is 
> the same fundamentally. Please let me know if anywhere still looks fuzzy.

I don't think it is the same for xb_find_next_bit() with set == 0.

+		if (radix_tree_exception(bmap)) {
+			unsigned long tmp = (unsigned long)bmap;
+			unsigned long ebit = bit + 2;
+
+			if (ebit >= BITS_PER_LONG)
+				continue;
+			if (set)
+				ret = find_next_bit(&tmp, BITS_PER_LONG, ebit);
+			else
+				ret = find_next_zero_bit(&tmp, BITS_PER_LONG,
+							 ebit);
+			if (ret < BITS_PER_LONG)
+				return ret - 2 + IDA_BITMAP_BITS * index;

What I'm saying is that find_next_zero_bit() will not be called if you do
"if (ebit >= BITS_PER_LONG) continue;" before calling find_next_zero_bit().

When scanning "0000000000000000000000000000000000000000000000000000000000000001",
"bit < BITS_PER_LONG - 2" case finds "0" in this word but
"bit >= BITS_PER_LONG - 2" case finds "0" in next word or segment.

I can't understand why this is correct behavior. It is too much puzzling.



> > Also, one more thing you need to check. Have you checked how long does
> > xb_find_next_set_bit(xb, 0, ULONG_MAX) on an empty xbitmap takes?
> > If it causes soft lockup warning, should we add cond_resched() ?
> > If yes, you have to document that this API might sleep. If no, you
> > have to document that the caller of this API is responsible for
> > not to pass such a large value range.
> 
> Yes, that will take too long time. Probably we can document some 
> comments as a reminder for the callers.
> 

Then, I feel that API is poorly implemented. There is no need to brute-force
when scanning [0, ULONG_MAX] range. If you eliminate exception path and
redesign the data structure, xbitmap will become as simple as a sample
implementation shown below. Not tested yet, but I think that this will be
sufficient for what virtio-baloon wants to do; i.e. find consecutive "1" bits
quickly. I didn't test whether finding "struct ulong_list_data" using radix
tree can improve performance.

----------------------------------------
#include <linux/module.h>
#include <linux/slab.h>

#define BITMAP_LEN 1024

struct ulong_list_data {
	struct list_head list;
	unsigned long segment; /* prev->segment < segment < next->segment */
	unsigned long bits;    /* Number of bits set in this offset bitmap. */
	unsigned long *bitmap; /* Offset bitmap of BITMAP_LEN bits. */
};

static struct ulong_list_data null_ulong_list = {
	{ NULL, NULL }, ULONG_MAX, 0, NULL
};

struct ulong_list_head {
	struct list_head list;
	struct ulong_list_data *last_used;
};

static void init_ulong(struct ulong_list_head *head)
{
	INIT_LIST_HEAD(&head->list);
	head->last_used = &null_ulong_list;
}

static bool set_ulong(struct ulong_list_head *head, const unsigned long value)
{
	struct ulong_list_data *ptr = head->last_used;
	struct list_head *list = &head->list;
	const unsigned long segment = value / BITMAP_LEN;
	const unsigned long offset = value % BITMAP_LEN;
	bool found = false;

	if (ptr->segment == segment)
		goto shortcut;
	list_for_each_entry(ptr, &head->list, list) {
		if (ptr->segment < segment) {
			list = &ptr->list;
			continue;
		}
		found = ptr->segment == segment;
		break;
	}
	if (!found) {
		ptr = kzalloc(sizeof(*ptr), GFP_NOWAIT | __GFP_NOWARN);
		if (!ptr)
			return false;
		ptr->bitmap = kzalloc(BITMAP_LEN / 8,
				      GFP_NOWAIT | __GFP_NOWARN);
		if (!ptr->bitmap) {
			kfree(ptr);
			return false;
		}
		ptr->segment = segment;
		list_add(&ptr->list, list);
	}
	head->last_used = ptr;
 shortcut:
	if (!test_bit(offset, ptr->bitmap)) {
		__set_bit(offset, ptr->bitmap);
		ptr->bits++;
	}
	return true;
}

static void clear_ulong(struct ulong_list_head *head, const unsigned long value)
{
	struct ulong_list_data *ptr = head->last_used;
	const unsigned long segment = value / BITMAP_LEN;
	const unsigned long offset = value % BITMAP_LEN;

	if (ptr->segment == segment)
		goto shortcut;
	list_for_each_entry(ptr, &head->list, list) {
		if (ptr->segment < segment)
			continue;
		if (ptr->segment == segment) {
			head->last_used = ptr;
shortcut:
			if (test_bit(offset, ptr->bitmap)) {
				__clear_bit(offset, ptr->bitmap);
				if (--ptr->bits)
					return;
				if (head->last_used == ptr)
					head->last_used = &null_ulong_list;
				list_del(&ptr->list);
				kfree(ptr);
			}
		}
		return;
	}
}

static void destroy_ulong(struct ulong_list_head *head)
{
	struct ulong_list_data *ptr;
	struct ulong_list_data *tmp;

	list_for_each_entry_safe(ptr, tmp, &head->list, list) {
		list_del(&ptr->list);
		kfree(ptr);
	}
	init_ulong(head);
}

static unsigned long get_ulong_set_range(struct ulong_list_head *head,
					 unsigned long *start)
{
	struct ulong_list_data *ptr;
	unsigned long segment = *start / BITMAP_LEN;
	unsigned int offset = *start % BITMAP_LEN;
	unsigned long ret = BITMAP_LEN;

	list_for_each_entry(ptr, &head->list, list) {
		if (ptr->segment < segment)
			continue;
		if (ptr->segment > segment)
			offset = 0;
		ret = find_next_bit(ptr->bitmap, BITMAP_LEN, offset);
		if (ret == BITMAP_LEN)
			continue;
		break;
	}
	if (ret == BITMAP_LEN)
		return 0;
	segment = ptr->segment;
	*start = segment * BITMAP_LEN + ret;
	ret = find_next_zero_bit(ptr->bitmap, BITMAP_LEN, ret);
	if (ret < BITMAP_LEN || segment == ULONG_MAX / BITMAP_LEN)
		return segment * BITMAP_LEN + ret - *start;
	ret = 0;
	list_for_each_entry_continue(ptr, &head->list, list) {
		if (ptr->segment != ++segment)
			break;
		if (ptr->bits == BITMAP_LEN)
			continue;
		ret = find_next_zero_bit(ptr->bitmap, BITMAP_LEN, 0);
		break;
	}
	return segment * BITMAP_LEN + ret - *start;
}

static int __init test_init(void)
{
	struct ulong_list_data *ptr;
	struct ulong_list_head head;
	unsigned long start = 0;
	unsigned long len;

	init_ulong(&head);

	set_ulong(&head, 0);
	set_ulong(&head, 10);
	set_ulong(&head, 11);
	set_ulong(&head, ULONG_MAX);
	set_ulong(&head, 1000000);
	set_ulong(&head, 12);
	clear_ulong(&head, 11);
	for (len = 1048576; len < 1048576 + BITMAP_LEN * 3; len++)
		set_ulong(&head, len);
	clear_ulong(&head, 1048600);
	set_ulong(&head, 2000000000);

	pr_info("Debug dump start\n");
	list_for_each_entry(ptr, &head.list, list)
		pr_info("Segment %lu %lu\n", ptr->segment, ptr->bits);
	pr_info("Debug dump end\n");

	while ((len = get_ulong_set_range(&head, &start)) > 0) {
		pr_info("Range %lu@%lu\n", len, start);
		start += len;
		if (!start)
			break;
	}
	destroy_ulong(&head);
	return -EINVAL;
}

module_init(test_init);
MODULE_LICENSE("GPL");
----------------------------------------

  parent reply	other threads:[~2017-12-14 16:29 UTC|newest]

Thread overview: 41+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-12-12 11:55 [PATCH v19 0/7] Virtio-balloon Enhancement Wei Wang
2017-12-12 11:55 ` [PATCH v19 1/7] xbitmap: Introduce xbitmap Wei Wang
2017-12-12 12:53   ` Philippe Ombredanne
2017-12-15 11:05   ` kbuild test robot
2017-12-15 13:24     ` Matthew Wilcox
2017-12-16 10:10       ` Wei Wang
2017-12-12 11:55 ` [PATCH v19 2/7] xbitmap: potential improvement Wei Wang
2017-12-15  3:07   ` kbuild test robot
2017-12-12 11:55 ` [PATCH v19 3/7] xbitmap: add more operations Wei Wang
2017-12-12 13:20   ` Tetsuo Handa
2017-12-13 12:26     ` Wei Wang
2017-12-13 14:16       ` Tetsuo Handa
2017-12-14  3:47         ` Wei Wang
2017-12-14 11:47           ` [virtio-dev] " Wei Wang
2017-12-14 16:29           ` Tetsuo Handa [this message]
2017-12-14 18:12             ` Matthew Wilcox
2017-12-15 16:21               ` Tetsuo Handa
2017-12-15 18:26                 ` Michael S. Tsirkin
2017-12-16  4:31                   ` Tetsuo Handa
2017-12-16  5:05                     ` Matthew Wilcox
2017-12-16  5:57                       ` Tetsuo Handa
2017-12-15 18:49                 ` Matthew Wilcox
2017-12-15 19:22                   ` Matthew Wilcox
2017-12-17 13:47                     ` Wang, Wei W
2017-12-17 22:18                       ` Matthew Wilcox
2017-12-18  2:33                         ` Wei Wang
2017-12-18  2:59                           ` Matthew Wilcox
2017-12-16 10:14             ` Wei Wang
2017-12-14 12:37       ` Matthew Wilcox
2017-12-15 18:42   ` Matthew Wilcox
2017-12-16 10:12     ` Wei Wang
2017-12-16 11:28       ` Tetsuo Handa
2017-12-17  5:24         ` Wei Wang
2017-12-17 10:21           ` Tetsuo Handa
2017-12-17 11:50             ` Wang, Wei W
2017-12-17 15:16               ` Tetsuo Handa
2017-12-18  8:05                 ` Wei Wang
2017-12-12 11:55 ` [PATCH v19 4/7] virtio-balloon: VIRTIO_BALLOON_F_SG Wei Wang
2017-12-12 11:55 ` [PATCH v19 5/7] mm: support reporting free page blocks Wei Wang
2017-12-12 11:55 ` [PATCH v19 6/7] virtio-balloon: VIRTIO_BALLOON_F_FREE_PAGE_VQ Wei Wang
2017-12-12 11:55 ` [PATCH v19 7/7] virtio-balloon: don't report free pages when page poisoning is enabled Wei Wang

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=201712150129.BFC35949.FFtFOLSOJOQHVM@I-love.SAKURA.ne.jp \
    --to=penguin-kernel@i-love.sakura.ne.jp \
    --cc=aarcange@redhat.com \
    --cc=akpm@linux-foundation.org \
    --cc=amit.shah@redhat.com \
    --cc=cornelia.huck@de.ibm.com \
    --cc=david@redhat.com \
    --cc=kvm@vger.kernel.org \
    --cc=liliang.opensource@gmail.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mawilcox@microsoft.com \
    --cc=mgorman@techsingularity.net \
    --cc=mhocko@kernel.org \
    --cc=mst@redhat.com \
    --cc=nilal@redhat.com \
    --cc=pbonzini@redhat.com \
    --cc=qemu-devel@nongnu.org \
    --cc=quan.xu@aliyun.com \
    --cc=riel@redhat.com \
    --cc=virtio-dev@lists.oasis-open.org \
    --cc=virtualization@lists.linux-foundation.org \
    --cc=wei.w.wang@intel.com \
    --cc=willy@infradead.org \
    --cc=yang.zhang.wz@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).