All of lore.kernel.org
 help / color / mirror / Atom feed
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
To: Alexey Kardashevskiy <aik@ozlabs.ru>
Cc: Peter Lieven <pl@kamp.de>,
	"qemu-devel@nongnu.org" <qemu-devel@nongnu.org>,
	"qemu-ppc@nongnu.org" <qemu-ppc@nongnu.org>,
	Paolo Bonzini <pbonzini@redhat.com>,
	Wenchao Xia <xiawenc@linux.vnet.ibm.com>,
	David Gibson <david@gibson.dropbear.id.au>
Subject: Re: [Qemu-devel] [Qemu-ppc]  broken incoming migration
Date: Sun, 09 Jun 2013 12:52:06 +1000	[thread overview]
Message-ID: <1370746326.3766.472.camel@pasglop> (raw)
In-Reply-To: <51B3E9A8.5010705@ozlabs.ru>

On Sun, 2013-06-09 at 12:34 +1000, Alexey Kardashevskiy wrote:

> It is _live_ migration, the source sends changes, same pages can change and
> be sent several times. So we would need to turn tracking on on the
> destination to know if some page was received from the source or changed by
> the destination itself (by writing there bios/firmware images, etc) and
> then clear pages which were touched by the destination and were not sent by
> the source.

Or we can set some kind of flag so that when creating a "migration
target" VM we don't load all these things into memory.

> Or we do not make guesses, the source sends everything and the destination
> simply checks if a page which is empty on the source is empty on the
> destination and avoid writing zeroes to it. Looks simpler to me and this is
> what the new patch does.

But you end up sending a lot of zero's ... is the migration compressed
(I am not familiar with it at all) ? If it is, that shouldn't be a big
deal, but else it feels to me that you should be able to send a special
packet instead that says "all zeros" because you'll potentially have an
awful lot of these.

Ben.

> > 
> >>
> >>> Also, you mean following code is from qemu and it does not allocate
> >>> memory with you gcc right? Maybe it is related to KVM, how about
> >>> turn off KVM and retry following code in qemu?
> >>>
> >>>> #include <stdio.h>
> >>>> #include <stdlib.h>
> >>>> #include <assert.h>
> >>>> #include <unistd.h>
> >>>> #include <sys/resource.h>
> >>>> #include <inttypes.h>
> >>>> #include <string.h>
> >>>> #include <sys/mman.h>
> >>>> #include <errno.h>
> >>>>
> >>>> #if defined __SSE2__
> >>>> #include <emmintrin.h>
> >>>> #define VECTYPE        __m128i
> >>>> #define SPLAT(p)       _mm_set1_epi8(*(p))
> >>>> #define ALL_EQ(v1, v2) (_mm_movemask_epi8(_mm_cmpeq_epi8(v1, v2)) ==
> >>>> 0xFFFF)
> >>>> #else
> >>>> #define VECTYPE        unsigned long
> >>>> #define SPLAT(p)       (*(p) * (~0UL / 255))
> >>>> #define ALL_EQ(v1, v2) ((v1) == (v2))
> >>>> #endif
> >>>>
> >>>> #define BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR 8
> >>>>
> >>>> /* Round number down to multiple */
> >>>> #define QEMU_ALIGN_DOWN(n, m) ((n) / (m) * (m))
> >>>>
> >>>> /* Round number up to multiple */
> >>>> #define QEMU_ALIGN_UP(n, m) QEMU_ALIGN_DOWN((n) + (m) - 1, (m))
> >>>>
> >>>> #define QEMU_VMALLOC_ALIGN (256 * 4096)
> >>>>
> >>>> /* alloc shared memory pages */
> >>>> void *qemu_anon_ram_alloc(size_t size)
> >>>> {
> >>>>       size_t align = QEMU_VMALLOC_ALIGN;
> >>>>       size_t total = size + align - getpagesize();
> >>>>       void *ptr = mmap(0, total, PROT_READ | PROT_WRITE,
> >>>>                        MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
> >>>>       size_t offset = QEMU_ALIGN_UP((uintptr_t)ptr, align) -
> >>>> (uintptr_t)ptr;
> >>>>
> >>>>       if (ptr == MAP_FAILED) {
> >>>>           fprintf(stderr, "Failed to allocate %zu B: %s\n",
> >>>>                   size, strerror(errno));
> >>>>           abort();
> >>>>       }
> >>>>
> >>>>       ptr += offset;
> >>>>       total -= offset;
> >>>>
> >>>>       if (offset > 0) {
> >>>>           munmap(ptr - offset, offset);
> >>>>       }
> >>>>       if (total > size) {
> >>>>           munmap(ptr + size, total - size);
> >>>>       }
> >>>>
> >>>>       return ptr;
> >>>> }
> >>>>
> >>>> static inline int
> >>>> can_use_buffer_find_nonzero_offset(const void *buf, size_t len)
> >>>> {
> >>>>       return (len % (BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR
> >>>>                      * sizeof(VECTYPE)) == 0
> >>>>               && ((uintptr_t) buf) % sizeof(VECTYPE) == 0);
> >>>> }
> >>>>
> >>>> size_t buffer_find_nonzero_offset(const void *buf, size_t len)
> >>>> {
> >>>>       const VECTYPE *p = buf;
> >>>>       const VECTYPE zero = (VECTYPE){0};
> >>>>       size_t i;
> >>>>
> >>>>       if (!len) {
> >>>>           return 0;
> >>>>       }
> >>>>
> >>>>       assert(can_use_buffer_find_nonzero_offset(buf, len));
> >>>>
> >>>>       for (i = 0; i < BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR; i++) {
> >>>>           if (!ALL_EQ(p[i], zero)) {
> >>>>               return i * sizeof(VECTYPE);
> >>>>           }
> >>>>       }
> >>>>
> >>>>       for (i = BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR;
> >>>>            i < len / sizeof(VECTYPE);
> >>>>            i += BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR) {
> >>>>           VECTYPE tmp0 = p[i + 0] | p[i + 1];
> >>>>           VECTYPE tmp1 = p[i + 2] | p[i + 3];
> >>>>           VECTYPE tmp2 = p[i + 4] | p[i + 5];
> >>>>           VECTYPE tmp3 = p[i + 6] | p[i + 7];
> >>>>           VECTYPE tmp01 = tmp0 | tmp1;
> >>>>           VECTYPE tmp23 = tmp2 | tmp3;
> >>>>           if (!ALL_EQ(tmp01 | tmp23, zero)) {
> >>>>               break;
> >>>>           }
> >>>>       }
> >>>>
> >>>>       return i * sizeof(VECTYPE);
> >>>> }
> >>>>
> >>>> int main()
> >>>> {
> >>>>        //char *x = malloc(1024 << 20);
> >>>>        char *x = qemu_anon_ram_alloc(1024 << 20);
> >>>>
> >>>>        int i, j;
> >>>>        int ret = 0;
> >>>>        struct rusage rusage;
> >>>>        for (i = 0; i < 500; i ++) {
> >>>>            for (j = 0; j < 10 << 20; j += 4096) {
> >>>>                 ret += buffer_find_nonzero_offset((char*) (x + (i << 20)
> >>>> + j), 4096);
> >>>>            }
> >>>>            getrusage( RUSAGE_SELF, &rusage );
> >>>>            printf("read offset: %d kB, RSS size: %ld kB", ((i+1) << 10),
> >>>> rusage.ru_maxrss);
> >>>>            getchar();
> >>>>        }
> >>>>        printf("%d zero pages\n", ret);
> >>>> }
> >>>>
> >>>
> >>>
> >>
> >>
> > 
> > 
> 
> 

  reply	other threads:[~2013-06-09  2:52 UTC|newest]

Thread overview: 49+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-05-30  7:44 [Qemu-devel] broken incoming migration Alexey Kardashevskiy
2013-05-30  7:49 ` Alexey Kardashevskiy
2013-05-30  7:49 ` Paolo Bonzini
2013-05-30  8:18   ` Alexey Kardashevskiy
2013-05-30  9:08     ` Peter Lieven
2013-05-30  9:31       ` Alexey Kardashevskiy
2013-05-30 13:00       ` Paolo Bonzini
2013-05-30 13:38         ` Alexey Kardashevskiy
2013-05-30 14:08           ` Paolo Bonzini
2013-05-30 14:38         ` Peter Lieven
2013-05-30 14:41           ` Paolo Bonzini
2013-06-04 13:52             ` Peter Lieven
2013-06-04 14:14               ` Paolo Bonzini
2013-06-04 14:38                 ` Peter Lieven
2013-06-04 14:40                   ` Paolo Bonzini
2013-06-04 14:48                     ` Peter Lieven
2013-06-04 15:17                       ` Paolo Bonzini
2013-06-04 19:15                         ` Peter Lieven
2013-06-05  3:37                           ` Alexey Kardashevskiy
2013-06-05  6:09                             ` Peter Lieven
2013-06-09  4:12                               ` liu ping fan
2013-06-09  7:22                                 ` Peter Lieven
2013-06-04 15:10                     ` Peter Lieven
2013-06-08  8:27                       ` Wenchao Xia
2013-06-08  8:30                         ` Alexey Kardashevskiy
2013-06-09  2:16                           ` Wenchao Xia
2013-06-09  2:34                             ` Alexey Kardashevskiy
2013-06-09  2:52                               ` Benjamin Herrenschmidt [this message]
2013-06-09  3:01                                 ` [Qemu-devel] [Qemu-ppc] " Alexey Kardashevskiy
2013-06-09  3:01                               ` [Qemu-devel] " Wenchao Xia
2013-06-09  3:09                                 ` Alexey Kardashevskiy
2013-06-09  3:31                                   ` Wenchao Xia
2013-06-09  7:27                                   ` Peter Lieven
2013-06-10  6:39                                     ` Alexey Kardashevskiy
2013-06-10  6:50                                       ` Peter Lieven
2013-06-10  6:55                                         ` Alexey Kardashevskiy
2013-06-10  8:44                                           ` Peter Lieven
2013-06-10  9:10                                             ` Alexey Kardashevskiy
2013-06-10  9:33                                               ` [Qemu-devel] [Qemu-ppc] " Benjamin Herrenschmidt
2013-06-10  9:42                                                 ` Peter Lieven
2013-06-09  2:53                             ` Benjamin Herrenschmidt
2013-06-12 14:00                               ` Paolo Bonzini
2013-06-12 14:11                                 ` Benjamin Herrenschmidt
2013-06-12 20:10                                   ` Paolo Bonzini
2013-06-13  2:41                                     ` Wenchao Xia
2013-06-03 10:04           ` [Qemu-devel] " Alexey Kardashevskiy
2013-06-04 10:56             ` Peter Lieven
2013-06-08  8:24         ` Wenchao Xia
2013-05-30 10:18 ` Peter Maydell

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1370746326.3766.472.camel@pasglop \
    --to=benh@kernel.crashing.org \
    --cc=aik@ozlabs.ru \
    --cc=david@gibson.dropbear.id.au \
    --cc=pbonzini@redhat.com \
    --cc=pl@kamp.de \
    --cc=qemu-devel@nongnu.org \
    --cc=qemu-ppc@nongnu.org \
    --cc=xiawenc@linux.vnet.ibm.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.