All of lore.kernel.org
 help / color / mirror / Atom feed
* [Qemu-devel] [PATCHv3 0/6] ui/vnc: update optimizations
@ 2014-01-05 18:02 Peter Lieven
  2014-01-05 18:02 ` [Qemu-devel] [PATCHv3 1/6] ui/vnc: introduce VNC_DIRTY_PIXELS_PER_BIT macro Peter Lieven
                   ` (5 more replies)
  0 siblings, 6 replies; 14+ messages in thread
From: Peter Lieven @ 2014-01-05 18:02 UTC (permalink / raw)
  To: qemu-devel; +Cc: sw, Peter Lieven, aliguori

this series includes several optimizations for the ui/vnc guest to server and server to client
update cycles. comments/reviews appreciated.

v2->v3: - fixed checkpatch warnings [Stefan]
        - fixed an old coding style violation [Stefan]
v1->v2: - new patches 2,4,5
        - patch3: added performance test [Anthony]
        - patch3: further optimized the vnc_update_client by searching for the next zero bit
          with find_next_zero_bit.
        - patch3: further optimized vnc_dpy_switch by using bitmap_set to mask bits dirty.

Peter

Peter Lieven (6):
  ui/vnc: introduce VNC_DIRTY_PIXELS_PER_BIT macro
  ui/vnc: derive cmp_bytes from VNC_DIRTY_PIXELS_PER_BIT
  ui/vnc: optimize dirty bitmap tracking
  ui/vnc: optimize clearing in find_and_clear_dirty_height()
  ui/vnc: optimize setting in vnc_dpy_update()
  ui/vnc: disable adaptive update calculations if not needed

 ui/vnc.c |  191 ++++++++++++++++++++++++++++++++++++--------------------------
 ui/vnc.h |   10 +++-
 2 files changed, 120 insertions(+), 81 deletions(-)

-- 
1.7.9.5

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [Qemu-devel] [PATCHv3 1/6] ui/vnc: introduce VNC_DIRTY_PIXELS_PER_BIT macro
  2014-01-05 18:02 [Qemu-devel] [PATCHv3 0/6] ui/vnc: update optimizations Peter Lieven
@ 2014-01-05 18:02 ` Peter Lieven
  2014-01-06  6:52   ` Wenchao Xia
  2014-01-05 18:02 ` [Qemu-devel] [PATCHv3 2/6] ui/vnc: derive cmp_bytes from VNC_DIRTY_PIXELS_PER_BIT Peter Lieven
                   ` (4 subsequent siblings)
  5 siblings, 1 reply; 14+ messages in thread
From: Peter Lieven @ 2014-01-05 18:02 UTC (permalink / raw)
  To: qemu-devel; +Cc: sw, Peter Lieven, aliguori

Signed-off-by: Peter Lieven <pl@kamp.de>
---
 ui/vnc.c |   61 ++++++++++++++++++++++++++++++++++++++-----------------------
 ui/vnc.h |    6 +++++-
 2 files changed, 43 insertions(+), 24 deletions(-)

diff --git a/ui/vnc.c b/ui/vnc.c
index 5601cc3..1ca6021 100644
--- a/ui/vnc.c
+++ b/ui/vnc.c
@@ -442,17 +442,19 @@ static void vnc_dpy_update(DisplayChangeListener *dcl,
        iteration.  otherwise, if (x % 16) != 0, the last iteration may span
        two 16-pixel blocks but we only mark the first as dirty
     */
-    w += (x % 16);
-    x -= (x % 16);
+    w += (x % VNC_DIRTY_PIXELS_PER_BIT);
+    x -= (x % VNC_DIRTY_PIXELS_PER_BIT);
 
     x = MIN(x, width);
     y = MIN(y, height);
     w = MIN(x + w, width) - x;
     h = MIN(h, height);
 
-    for (; y < h; y++)
-        for (i = 0; i < w; i += 16)
-            set_bit((x + i) / 16, s->dirty[y]);
+    for (; y < h; y++) {
+        for (i = 0; i < w; i += VNC_DIRTY_PIXELS_PER_BIT) {
+            set_bit((x + i) / VNC_DIRTY_PIXELS_PER_BIT, s->dirty[y]);
+        }
+    }
 }
 
 void vnc_framebuffer_update(VncState *vs, int x, int y, int w, int h,
@@ -769,11 +771,12 @@ static void vnc_dpy_copy(DisplayChangeListener *dcl,
         y = dst_y + h - 1;
         inc = -1;
     }
-    w_lim = w - (16 - (dst_x % 16));
-    if (w_lim < 0)
+    w_lim = w - (VNC_DIRTY_PIXELS_PER_BIT - (dst_x % VNC_DIRTY_PIXELS_PER_BIT));
+    if (w_lim < 0) {
         w_lim = w;
-    else
-        w_lim = w - (w_lim % 16);
+    } else {
+        w_lim = w - (w_lim % VNC_DIRTY_PIXELS_PER_BIT);
+    }
     for (i = 0; i < h; i++) {
         for (x = 0; x <= w_lim;
                 x += s, src_row += cmp_bytes, dst_row += cmp_bytes) {
@@ -781,10 +784,10 @@ static void vnc_dpy_copy(DisplayChangeListener *dcl,
                 if ((s = w - w_lim) == 0)
                     break;
             } else if (!x) {
-                s = (16 - (dst_x % 16));
+                s = (16 - (dst_x % VNC_DIRTY_PIXELS_PER_BIT));
                 s = MIN(s, w_lim);
             } else {
-                s = 16;
+                s = VNC_DIRTY_PIXELS_PER_BIT;
             }
             cmp_bytes = s * VNC_SERVER_FB_BYTES;
             if (memcmp(src_row, dst_row, cmp_bytes) == 0)
@@ -911,7 +914,7 @@ static int vnc_update_client(VncState *vs, int has_dirty)
         for (y = 0; y < height; y++) {
             int x;
             int last_x = -1;
-            for (x = 0; x < width / 16; x++) {
+            for (x = 0; x < width / VNC_DIRTY_PIXELS_PER_BIT; x++) {
                 if (test_and_clear_bit(x, vs->dirty[y])) {
                     if (last_x == -1) {
                         last_x = x;
@@ -921,16 +924,22 @@ static int vnc_update_client(VncState *vs, int has_dirty)
                         int h = find_and_clear_dirty_height(vs, y, last_x, x,
                                                             height);
 
-                        n += vnc_job_add_rect(job, last_x * 16, y,
-                                              (x - last_x) * 16, h);
+                        n += vnc_job_add_rect(job,
+                                              last_x * VNC_DIRTY_PIXELS_PER_BIT,
+                                              y,
+                                              (x - last_x) *
+                                              VNC_DIRTY_PIXELS_PER_BIT,
+                                              h);
                     }
                     last_x = -1;
                 }
             }
             if (last_x != -1) {
                 int h = find_and_clear_dirty_height(vs, y, last_x, x, height);
-                n += vnc_job_add_rect(job, last_x * 16, y,
-                                      (x - last_x) * 16, h);
+                n += vnc_job_add_rect(job, last_x * VNC_DIRTY_PIXELS_PER_BIT,
+                                      y,
+                                      (x - last_x) * VNC_DIRTY_PIXELS_PER_BIT,
+                                      h);
             }
         }
 
@@ -1861,7 +1870,7 @@ static void framebuffer_update_request(VncState *vs, int incremental,
                                        int w, int h)
 {
     int i;
-    const size_t width = surface_width(vs->vd->ds) / 16;
+    const size_t width = surface_width(vs->vd->ds) / VNC_DIRTY_PIXELS_PER_BIT;
     const size_t height = surface_height(vs->vd->ds);
 
     if (y_position > height) {
@@ -2563,7 +2572,9 @@ static int vnc_refresh_lossy_rect(VncDisplay *vd, int x, int y)
 
         vs->lossy_rect[sty][stx] = 0;
         for (j = 0; j < VNC_STAT_RECT; ++j) {
-            bitmap_set(vs->dirty[y + j], x / 16, VNC_STAT_RECT / 16);
+            bitmap_set(vs->dirty[y + j],
+                       x / VNC_DIRTY_PIXELS_PER_BIT,
+                       VNC_STAT_RECT / VNC_DIRTY_PIXELS_PER_BIT);
         }
         has_dirty++;
     }
@@ -2710,17 +2721,21 @@ static int vnc_refresh_server_surface(VncDisplay *vd)
             }
             server_ptr = server_row;
 
-            for (x = 0; x + 15 < width;
-                    x += 16, guest_ptr += cmp_bytes, server_ptr += cmp_bytes) {
-                if (!test_and_clear_bit((x / 16), vd->guest.dirty[y]))
+            for (x = 0; x + VNC_DIRTY_PIXELS_PER_BIT - 1 < width;
+                 x += VNC_DIRTY_PIXELS_PER_BIT, guest_ptr += cmp_bytes,
+                 server_ptr += cmp_bytes) {
+                if (!test_and_clear_bit((x / VNC_DIRTY_PIXELS_PER_BIT),
+                    vd->guest.dirty[y])) {
                     continue;
-                if (memcmp(server_ptr, guest_ptr, cmp_bytes) == 0)
+                }
+                if (memcmp(server_ptr, guest_ptr, cmp_bytes) == 0) {
                     continue;
+                }
                 memcpy(server_ptr, guest_ptr, cmp_bytes);
                 if (!vd->non_adaptive)
                     vnc_rect_updated(vd, x, y, &tv);
                 QTAILQ_FOREACH(vs, &vd->clients, next) {
-                    set_bit((x / 16), vs->dirty[y]);
+                    set_bit((x / VNC_DIRTY_PIXELS_PER_BIT), vs->dirty[y]);
                 }
                 has_dirty++;
             }
diff --git a/ui/vnc.h b/ui/vnc.h
index 6e99213..4a8f33c 100644
--- a/ui/vnc.h
+++ b/ui/vnc.h
@@ -81,8 +81,12 @@ typedef void VncSendHextileTile(VncState *vs,
 #define VNC_MAX_WIDTH 2560
 #define VNC_MAX_HEIGHT 2048
 
+/* VNC_DIRTY_PIXELS_PER_BIT is the number of dirty pixels represented
+ * by one bit in the dirty bitmap */
+#define VNC_DIRTY_PIXELS_PER_BIT 16
+
 /* VNC_DIRTY_BITS is the number of bits in the dirty bitmap. */
-#define VNC_DIRTY_BITS (VNC_MAX_WIDTH / 16)
+#define VNC_DIRTY_BITS (VNC_MAX_WIDTH / VNC_DIRTY_PIXELS_PER_BIT)
 
 #define VNC_STAT_RECT  64
 #define VNC_STAT_COLS (VNC_MAX_WIDTH / VNC_STAT_RECT)
-- 
1.7.9.5

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [Qemu-devel] [PATCHv3 2/6] ui/vnc: derive cmp_bytes from VNC_DIRTY_PIXELS_PER_BIT
  2014-01-05 18:02 [Qemu-devel] [PATCHv3 0/6] ui/vnc: update optimizations Peter Lieven
  2014-01-05 18:02 ` [Qemu-devel] [PATCHv3 1/6] ui/vnc: introduce VNC_DIRTY_PIXELS_PER_BIT macro Peter Lieven
@ 2014-01-05 18:02 ` Peter Lieven
  2014-01-06  7:03   ` Wenchao Xia
  2014-01-05 18:02 ` [Qemu-devel] [PATCHv3 3/6] ui/vnc: optimize dirty bitmap tracking Peter Lieven
                   ` (3 subsequent siblings)
  5 siblings, 1 reply; 14+ messages in thread
From: Peter Lieven @ 2014-01-05 18:02 UTC (permalink / raw)
  To: qemu-devel; +Cc: sw, Peter Lieven, aliguori

this allows for setting VNC_DIRTY_PIXELS_PER_BIT to different
values than 16 if desired.

Signed-off-by: Peter Lieven <pl@kamp.de>
---
 ui/vnc.c |    2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ui/vnc.c b/ui/vnc.c
index 1ca6021..1d2aa1a 100644
--- a/ui/vnc.c
+++ b/ui/vnc.c
@@ -2697,7 +2697,7 @@ static int vnc_refresh_server_surface(VncDisplay *vd)
      * Check and copy modified bits from guest to server surface.
      * Update server dirty map.
      */
-    cmp_bytes = 64;
+    cmp_bytes = VNC_DIRTY_PIXELS_PER_BIT * VNC_SERVER_FB_BYTES;
     if (cmp_bytes > vnc_server_fb_stride(vd)) {
         cmp_bytes = vnc_server_fb_stride(vd);
     }
-- 
1.7.9.5

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [Qemu-devel] [PATCHv3 3/6] ui/vnc: optimize dirty bitmap tracking
  2014-01-05 18:02 [Qemu-devel] [PATCHv3 0/6] ui/vnc: update optimizations Peter Lieven
  2014-01-05 18:02 ` [Qemu-devel] [PATCHv3 1/6] ui/vnc: introduce VNC_DIRTY_PIXELS_PER_BIT macro Peter Lieven
  2014-01-05 18:02 ` [Qemu-devel] [PATCHv3 2/6] ui/vnc: derive cmp_bytes from VNC_DIRTY_PIXELS_PER_BIT Peter Lieven
@ 2014-01-05 18:02 ` Peter Lieven
  2014-01-06 10:08   ` Wenchao Xia
  2014-01-05 18:02 ` [Qemu-devel] [PATCHv3 4/6] ui/vnc: optimize clearing in find_and_clear_dirty_height() Peter Lieven
                   ` (2 subsequent siblings)
  5 siblings, 1 reply; 14+ messages in thread
From: Peter Lieven @ 2014-01-05 18:02 UTC (permalink / raw)
  To: qemu-devel; +Cc: sw, Peter Lieven, aliguori

vnc_update_client currently scans the dirty bitmap of each client
bitwise which is a very costly operation if only few bits are dirty.
vnc_refresh_server_surface does almost the same.
this patch optimizes both by utilizing the heavily optimized
function find_next_bit to find the offset of the next dirty
bit in the dirty bitmaps.

The following artifical test (just the bitmap operation part) running
vnc_update_client 65536 times on a 2560x2048 surface illustrates the
performance difference:

All bits clean - vnc_update_client_new: 0.07 secs
                 vnc_update_client_old: 10.98 secs

All bits dirty - vnc_update_client_new: 11.26 secs
                 vnc_update_client_old: 20.19 secs

Few bits dirty - vnc_update_client_new: 0.08 secs
                 vnc_update_client_old: 10.98 secs

The case for all bits dirty is still rather slow, this
is due to the implementation of find_and_clear_dirty_height.
This will be addresses in a separate patch.

Signed-off-by: Peter Lieven <pl@kamp.de>
---
 ui/vnc.c |  154 +++++++++++++++++++++++++++++++++-----------------------------
 ui/vnc.h |    4 ++
 2 files changed, 87 insertions(+), 71 deletions(-)

diff --git a/ui/vnc.c b/ui/vnc.c
index 1d2aa1a..6a0c03e 100644
--- a/ui/vnc.c
+++ b/ui/vnc.c
@@ -572,6 +572,14 @@ void *vnc_server_fb_ptr(VncDisplay *vd, int x, int y)
     ptr += x * VNC_SERVER_FB_BYTES;
     return ptr;
 }
+/* this sets only the visible pixels of a dirty bitmap */
+#define VNC_SET_VISIBLE_PIXELS_DIRTY(bitmap, w, h) {\
+        int y;\
+        memset(bitmap, 0x00, sizeof(bitmap));\
+        for (y = 0; y < h; y++) {\
+            bitmap_set(bitmap[y], 0, w / VNC_DIRTY_PIXELS_PER_BIT);\
+        } \
+    }
 
 static void vnc_dpy_switch(DisplayChangeListener *dcl,
                            DisplaySurface *surface)
@@ -597,7 +605,9 @@ static void vnc_dpy_switch(DisplayChangeListener *dcl,
     qemu_pixman_image_unref(vd->guest.fb);
     vd->guest.fb = pixman_image_ref(surface->image);
     vd->guest.format = surface->format;
-    memset(vd->guest.dirty, 0xFF, sizeof(vd->guest.dirty));
+    VNC_SET_VISIBLE_PIXELS_DIRTY(vd->guest.dirty,
+                                 surface_width(vd->ds),
+                                 surface_height(vd->ds));
 
     QTAILQ_FOREACH(vs, &vd->clients, next) {
         vnc_colordepth(vs);
@@ -605,7 +615,9 @@ static void vnc_dpy_switch(DisplayChangeListener *dcl,
         if (vs->vd->cursor) {
             vnc_cursor_define(vs);
         }
-        memset(vs->dirty, 0xFF, sizeof(vs->dirty));
+        VNC_SET_VISIBLE_PIXELS_DIRTY(vs->dirty,
+                                     surface_width(vd->ds),
+                                     surface_height(vd->ds));
     }
 }
 
@@ -889,10 +901,9 @@ static int vnc_update_client(VncState *vs, int has_dirty)
         VncDisplay *vd = vs->vd;
         VncJob *job;
         int y;
-        int width, height;
+        int height;
         int n = 0;
 
-
         if (vs->output.offset && !vs->audio_cap && !vs->force_update)
             /* kernel send buffers are full -> drop frames to throttle */
             return 0;
@@ -908,39 +919,27 @@ static int vnc_update_client(VncState *vs, int has_dirty)
          */
         job = vnc_job_new(vs);
 
-        width = MIN(pixman_image_get_width(vd->server), vs->client_width);
         height = MIN(pixman_image_get_height(vd->server), vs->client_height);
 
-        for (y = 0; y < height; y++) {
-            int x;
-            int last_x = -1;
-            for (x = 0; x < width / VNC_DIRTY_PIXELS_PER_BIT; x++) {
-                if (test_and_clear_bit(x, vs->dirty[y])) {
-                    if (last_x == -1) {
-                        last_x = x;
-                    }
-                } else {
-                    if (last_x != -1) {
-                        int h = find_and_clear_dirty_height(vs, y, last_x, x,
-                                                            height);
-
-                        n += vnc_job_add_rect(job,
-                                              last_x * VNC_DIRTY_PIXELS_PER_BIT,
-                                              y,
-                                              (x - last_x) *
-                                              VNC_DIRTY_PIXELS_PER_BIT,
-                                              h);
-                    }
-                    last_x = -1;
-                }
-            }
-            if (last_x != -1) {
-                int h = find_and_clear_dirty_height(vs, y, last_x, x, height);
-                n += vnc_job_add_rect(job, last_x * VNC_DIRTY_PIXELS_PER_BIT,
-                                      y,
-                                      (x - last_x) * VNC_DIRTY_PIXELS_PER_BIT,
-                                      h);
+        y = 0;
+        for (;;) {
+            int x, h;
+            unsigned long x2;
+            unsigned long offset = find_next_bit((unsigned long *) &vs->dirty,
+                                                 height * VNC_DIRTY_BPL(vs),
+                                                 y * VNC_DIRTY_BPL(vs));
+            if (offset == height * VNC_DIRTY_BPL(vs)) {
+                /* no more dirty bits */
+                break;
             }
+            y = offset / VNC_DIRTY_BPL(vs);
+            x = offset % VNC_DIRTY_BPL(vs);
+            x2 = find_next_zero_bit((unsigned long *) &vs->dirty[y],
+                                    VNC_DIRTY_BPL(vs), x);
+            bitmap_clear(vs->dirty[y], x, x2 - x);
+            h = find_and_clear_dirty_height(vs, y, x, x2, height);
+            n += vnc_job_add_rect(job, x * VNC_DIRTY_PIXELS_PER_BIT, y,
+                                  (x2 - x) * VNC_DIRTY_PIXELS_PER_BIT, h);
         }
 
         vnc_job_push(job);
@@ -2678,8 +2677,8 @@ static int vnc_refresh_server_surface(VncDisplay *vd)
     int width = pixman_image_get_width(vd->guest.fb);
     int height = pixman_image_get_height(vd->guest.fb);
     int y;
-    uint8_t *guest_row;
-    uint8_t *server_row;
+    uint8_t *guest_row0 = NULL, *server_row0;
+    int guest_stride = 0, server_stride;
     int cmp_bytes;
     VncState *vs;
     int has_dirty = 0;
@@ -2704,44 +2703,57 @@ static int vnc_refresh_server_surface(VncDisplay *vd)
     if (vd->guest.format != VNC_SERVER_FB_FORMAT) {
         int width = pixman_image_get_width(vd->server);
         tmpbuf = qemu_pixman_linebuf_create(VNC_SERVER_FB_FORMAT, width);
-    }
-    guest_row = (uint8_t *)pixman_image_get_data(vd->guest.fb);
-    server_row = (uint8_t *)pixman_image_get_data(vd->server);
-    for (y = 0; y < height; y++) {
-        if (!bitmap_empty(vd->guest.dirty[y], VNC_DIRTY_BITS)) {
-            int x;
-            uint8_t *guest_ptr;
-            uint8_t *server_ptr;
-
-            if (vd->guest.format != VNC_SERVER_FB_FORMAT) {
-                qemu_pixman_linebuf_fill(tmpbuf, vd->guest.fb, width, 0, y);
-                guest_ptr = (uint8_t *)pixman_image_get_data(tmpbuf);
-            } else {
-                guest_ptr = guest_row;
-            }
-            server_ptr = server_row;
+    } else {
+        guest_row0 = (uint8_t *)pixman_image_get_data(vd->guest.fb);
+        guest_stride = pixman_image_get_stride(vd->guest.fb);
+    }
+    server_row0 = (uint8_t *)pixman_image_get_data(vd->server);
+    server_stride = pixman_image_get_stride(vd->server);
+
+    y = 0;
+    for (;;) {
+        int x;
+        uint8_t *guest_ptr, *server_ptr;
+        unsigned long offset = find_next_bit((unsigned long *) &vd->guest.dirty,
+                                             height * VNC_DIRTY_BPL(&vd->guest),
+                                             y * VNC_DIRTY_BPL(&vd->guest));
+        if (offset == height * VNC_DIRTY_BPL(&vd->guest)) {
+            /* no more dirty bits */
+            break;
+        }
+        y = offset / VNC_DIRTY_BPL(&vd->guest);
 
-            for (x = 0; x + VNC_DIRTY_PIXELS_PER_BIT - 1 < width;
-                 x += VNC_DIRTY_PIXELS_PER_BIT, guest_ptr += cmp_bytes,
-                 server_ptr += cmp_bytes) {
-                if (!test_and_clear_bit((x / VNC_DIRTY_PIXELS_PER_BIT),
-                    vd->guest.dirty[y])) {
-                    continue;
-                }
-                if (memcmp(server_ptr, guest_ptr, cmp_bytes) == 0) {
-                    continue;
-                }
-                memcpy(server_ptr, guest_ptr, cmp_bytes);
-                if (!vd->non_adaptive)
-                    vnc_rect_updated(vd, x, y, &tv);
-                QTAILQ_FOREACH(vs, &vd->clients, next) {
-                    set_bit((x / VNC_DIRTY_PIXELS_PER_BIT), vs->dirty[y]);
-                }
-                has_dirty++;
+        server_ptr = server_row0 + y * server_stride;
+
+        if (vd->guest.format != VNC_SERVER_FB_FORMAT) {
+            qemu_pixman_linebuf_fill(tmpbuf, vd->guest.fb, width, 0, y);
+            guest_ptr = (uint8_t *)pixman_image_get_data(tmpbuf);
+        } else {
+            guest_ptr = guest_row0 + y * guest_stride;
+        }
+
+        for (x = offset % VNC_DIRTY_BPL(&vd->guest);
+             x + VNC_DIRTY_PIXELS_PER_BIT - 1 < width;
+             x += VNC_DIRTY_PIXELS_PER_BIT, guest_ptr += cmp_bytes,
+             server_ptr += cmp_bytes) {
+            if (!test_and_clear_bit((x / VNC_DIRTY_PIXELS_PER_BIT),
+                vd->guest.dirty[y])) {
+                continue;
+            }
+            if (memcmp(server_ptr, guest_ptr, cmp_bytes) == 0) {
+                continue;
+            }
+            memcpy(server_ptr, guest_ptr, cmp_bytes);
+            if (!vd->non_adaptive) {
+                vnc_rect_updated(vd, x, y, &tv);
             }
+            QTAILQ_FOREACH(vs, &vd->clients, next) {
+                set_bit((x / VNC_DIRTY_PIXELS_PER_BIT), vs->dirty[y]);
+            }
+            has_dirty++;
         }
-        guest_row  += pixman_image_get_stride(vd->guest.fb);
-        server_row += pixman_image_get_stride(vd->server);
+
+        y++;
     }
     qemu_pixman_image_unref(tmpbuf);
     return has_dirty;
diff --git a/ui/vnc.h b/ui/vnc.h
index 4a8f33c..07e1f59 100644
--- a/ui/vnc.h
+++ b/ui/vnc.h
@@ -88,6 +88,10 @@ typedef void VncSendHextileTile(VncState *vs,
 /* VNC_DIRTY_BITS is the number of bits in the dirty bitmap. */
 #define VNC_DIRTY_BITS (VNC_MAX_WIDTH / VNC_DIRTY_PIXELS_PER_BIT)
 
+/* VNC_DIRTY_BPL (BPL = bits per line) might be greater than
+ * VNC_DIRTY_BITS due to alignment */
+#define VNC_DIRTY_BPL(x) (sizeof((x)->dirty) / VNC_MAX_HEIGHT * BITS_PER_BYTE)
+
 #define VNC_STAT_RECT  64
 #define VNC_STAT_COLS (VNC_MAX_WIDTH / VNC_STAT_RECT)
 #define VNC_STAT_ROWS (VNC_MAX_HEIGHT / VNC_STAT_RECT)
-- 
1.7.9.5

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [Qemu-devel] [PATCHv3 4/6] ui/vnc: optimize clearing in find_and_clear_dirty_height()
  2014-01-05 18:02 [Qemu-devel] [PATCHv3 0/6] ui/vnc: update optimizations Peter Lieven
                   ` (2 preceding siblings ...)
  2014-01-05 18:02 ` [Qemu-devel] [PATCHv3 3/6] ui/vnc: optimize dirty bitmap tracking Peter Lieven
@ 2014-01-05 18:02 ` Peter Lieven
  2014-01-05 18:02 ` [Qemu-devel] [PATCHv3 5/6] ui/vnc: optimize setting in vnc_dpy_update() Peter Lieven
  2014-01-05 18:02 ` [Qemu-devel] [PATCHv3 6/6] ui/vnc: disable adaptive update calculations if not needed Peter Lieven
  5 siblings, 0 replies; 14+ messages in thread
From: Peter Lieven @ 2014-01-05 18:02 UTC (permalink / raw)
  To: qemu-devel; +Cc: sw, Peter Lieven, aliguori

The following artifical test (just the bitmap operation part) running
vnc_update_client 65536 times on a 2560x2048 surface illustrates the
performance difference:

All bits clean - vnc_update_client_new: 0.07 secs
                 vnc_update_client_new2: 0.07 secs
                 vnc_update_client_old: 10.98 secs

All bits dirty - vnc_update_client_new: 11.26 secs
               - vnc_update_client_new2: 0.29 secs
                 vnc_update_client_old: 20.19 secs

Few bits dirty - vnc_update_client_new: 0.07 secs
               - vnc_update_client_new2: 0.07 secs
                 vnc_update_client_old: 10.98 secs

vnc_update_client_new2 shows the performance of vnc_update_client
with this patch added.

Comparing with the test run of the last patch the performance
is at least unchanged while it is significantly improved
for the all bits dirty case.

Signed-off-by: Peter Lieven <pl@kamp.de>
---
 ui/vnc.c |    5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/ui/vnc.c b/ui/vnc.c
index 6a0c03e..980df34 100644
--- a/ui/vnc.c
+++ b/ui/vnc.c
@@ -876,13 +876,10 @@ static int find_and_clear_dirty_height(struct VncState *vs,
     int h;
 
     for (h = 1; h < (height - y); h++) {
-        int tmp_x;
         if (!test_bit(last_x, vs->dirty[y + h])) {
             break;
         }
-        for (tmp_x = last_x; tmp_x < x; tmp_x++) {
-            clear_bit(tmp_x, vs->dirty[y + h]);
-        }
+        bitmap_clear(vs->dirty[y + h], last_x, x - last_x);
     }
 
     return h;
-- 
1.7.9.5

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [Qemu-devel] [PATCHv3 5/6] ui/vnc: optimize setting in vnc_dpy_update()
  2014-01-05 18:02 [Qemu-devel] [PATCHv3 0/6] ui/vnc: update optimizations Peter Lieven
                   ` (3 preceding siblings ...)
  2014-01-05 18:02 ` [Qemu-devel] [PATCHv3 4/6] ui/vnc: optimize clearing in find_and_clear_dirty_height() Peter Lieven
@ 2014-01-05 18:02 ` Peter Lieven
  2014-01-05 18:02 ` [Qemu-devel] [PATCHv3 6/6] ui/vnc: disable adaptive update calculations if not needed Peter Lieven
  5 siblings, 0 replies; 14+ messages in thread
From: Peter Lieven @ 2014-01-05 18:02 UTC (permalink / raw)
  To: qemu-devel; +Cc: sw, Peter Lieven, aliguori

Signed-off-by: Peter Lieven <pl@kamp.de>
---
 ui/vnc.c |    6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/ui/vnc.c b/ui/vnc.c
index 980df34..805c47a 100644
--- a/ui/vnc.c
+++ b/ui/vnc.c
@@ -430,7 +430,6 @@ static int vnc_refresh_server_surface(VncDisplay *vd);
 static void vnc_dpy_update(DisplayChangeListener *dcl,
                            int x, int y, int w, int h)
 {
-    int i;
     VncDisplay *vd = container_of(dcl, VncDisplay, dcl);
     struct VncSurface *s = &vd->guest;
     int width = surface_width(vd->ds);
@@ -451,9 +450,8 @@ static void vnc_dpy_update(DisplayChangeListener *dcl,
     h = MIN(h, height);
 
     for (; y < h; y++) {
-        for (i = 0; i < w; i += VNC_DIRTY_PIXELS_PER_BIT) {
-            set_bit((x + i) / VNC_DIRTY_PIXELS_PER_BIT, s->dirty[y]);
-        }
+        bitmap_set(s->dirty[y], x / VNC_DIRTY_PIXELS_PER_BIT,
+                   w / VNC_DIRTY_PIXELS_PER_BIT);
     }
 }
 
-- 
1.7.9.5

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [Qemu-devel] [PATCHv3 6/6] ui/vnc: disable adaptive update calculations if not needed
  2014-01-05 18:02 [Qemu-devel] [PATCHv3 0/6] ui/vnc: update optimizations Peter Lieven
                   ` (4 preceding siblings ...)
  2014-01-05 18:02 ` [Qemu-devel] [PATCHv3 5/6] ui/vnc: optimize setting in vnc_dpy_update() Peter Lieven
@ 2014-01-05 18:02 ` Peter Lieven
  5 siblings, 0 replies; 14+ messages in thread
From: Peter Lieven @ 2014-01-05 18:02 UTC (permalink / raw)
  To: qemu-devel; +Cc: sw, Peter Lieven, aliguori

Signed-off-by: Peter Lieven <pl@kamp.de>
---
 ui/vnc.c |    9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/ui/vnc.c b/ui/vnc.c
index 805c47a..cdb1f4f 100644
--- a/ui/vnc.c
+++ b/ui/vnc.c
@@ -3171,7 +3171,9 @@ void vnc_display_open(DisplayState *ds, const char *display, Error **errp)
             acl = 1;
 #endif
         } else if (strncmp(options, "lossy", 5) == 0) {
+#ifdef CONFIG_VNC_JPEG
             vs->lossy = true;
+#endif
         } else if (strncmp(options, "non-adaptive", 12) == 0) {
             vs->non_adaptive = true;
         } else if (strncmp(options, "share=", 6) == 0) {
@@ -3188,6 +3190,13 @@ void vnc_display_open(DisplayState *ds, const char *display, Error **errp)
         }
     }
 
+    /* adaptive updates are only used with tight encoding and
+     * if lossy updates are enabled so we can disable all the
+     * calculations otherwise */
+    if (!vs->lossy) {
+        vs->non_adaptive = true;
+    }
+
 #ifdef CONFIG_VNC_TLS
     if (acl && x509 && vs->tls.x509verify) {
         if (!(vs->tls.acl = qemu_acl_init("vnc.x509dname"))) {
-- 
1.7.9.5

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: [Qemu-devel] [PATCHv3 1/6] ui/vnc: introduce VNC_DIRTY_PIXELS_PER_BIT macro
  2014-01-05 18:02 ` [Qemu-devel] [PATCHv3 1/6] ui/vnc: introduce VNC_DIRTY_PIXELS_PER_BIT macro Peter Lieven
@ 2014-01-06  6:52   ` Wenchao Xia
  2014-01-06  8:02     ` Peter Lieven
  0 siblings, 1 reply; 14+ messages in thread
From: Wenchao Xia @ 2014-01-06  6:52 UTC (permalink / raw)
  To: Peter Lieven, qemu-devel; +Cc: sw, aliguori

> @@ -781,10 +784,10 @@ static void vnc_dpy_copy(DisplayChangeListener *dcl,
>                   if ((s = w - w_lim) == 0)
>                       break;
>               } else if (!x) {
> -                s = (16 - (dst_x % 16));
> +                s = (16 - (dst_x % VNC_DIRTY_PIXELS_PER_BIT));

Should it be
s = (VNC_DIRTY_PIXELS_PER_BIT - (dst_x % VNC_DIRTY_PIXELS_PER_BIT));
?

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [Qemu-devel] [PATCHv3 2/6] ui/vnc: derive cmp_bytes from VNC_DIRTY_PIXELS_PER_BIT
  2014-01-05 18:02 ` [Qemu-devel] [PATCHv3 2/6] ui/vnc: derive cmp_bytes from VNC_DIRTY_PIXELS_PER_BIT Peter Lieven
@ 2014-01-06  7:03   ` Wenchao Xia
  0 siblings, 0 replies; 14+ messages in thread
From: Wenchao Xia @ 2014-01-06  7:03 UTC (permalink / raw)
  To: Peter Lieven, qemu-devel; +Cc: sw, aliguori

Reviewed-by: Wenchao Xia <xiawenc@linux.vnet.ibm.com>

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [Qemu-devel] [PATCHv3 1/6] ui/vnc: introduce VNC_DIRTY_PIXELS_PER_BIT macro
  2014-01-06  6:52   ` Wenchao Xia
@ 2014-01-06  8:02     ` Peter Lieven
  0 siblings, 0 replies; 14+ messages in thread
From: Peter Lieven @ 2014-01-06  8:02 UTC (permalink / raw)
  To: Wenchao Xia, qemu-devel; +Cc: sw, aliguori

On 06.01.2014 07:52, Wenchao Xia wrote:
>> @@ -781,10 +784,10 @@ static void vnc_dpy_copy(DisplayChangeListener *dcl,
>>                   if ((s = w - w_lim) == 0)
>>                       break;
>>               } else if (!x) {
>> -                s = (16 - (dst_x % 16));
>> +                s = (16 - (dst_x % VNC_DIRTY_PIXELS_PER_BIT));
> Should it be
> s = (VNC_DIRTY_PIXELS_PER_BIT - (dst_x % VNC_DIRTY_PIXELS_PER_BIT));
> ?
>

Thanks, you are right.

Peter

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [Qemu-devel] [PATCHv3 3/6] ui/vnc: optimize dirty bitmap tracking
  2014-01-05 18:02 ` [Qemu-devel] [PATCHv3 3/6] ui/vnc: optimize dirty bitmap tracking Peter Lieven
@ 2014-01-06 10:08   ` Wenchao Xia
  2014-01-06 13:31     ` Peter Lieven
  2014-01-06 17:37     ` Peter Lieven
  0 siblings, 2 replies; 14+ messages in thread
From: Wenchao Xia @ 2014-01-06 10:08 UTC (permalink / raw)
  To: Peter Lieven, qemu-devel; +Cc: sw, aliguori

于 2014/1/6 2:02, Peter Lieven 写道:
> vnc_update_client currently scans the dirty bitmap of each client
> bitwise which is a very costly operation if only few bits are dirty.
> vnc_refresh_server_surface does almost the same.
> this patch optimizes both by utilizing the heavily optimized
> function find_next_bit to find the offset of the next dirty
> bit in the dirty bitmaps.
> 
> The following artifical test (just the bitmap operation part) running
> vnc_update_client 65536 times on a 2560x2048 surface illustrates the
> performance difference:
> 
> All bits clean - vnc_update_client_new: 0.07 secs
>                   vnc_update_client_old: 10.98 secs
> 
> All bits dirty - vnc_update_client_new: 11.26 secs
>                   vnc_update_client_old: 20.19 secs
> 
> Few bits dirty - vnc_update_client_new: 0.08 secs
>                   vnc_update_client_old: 10.98 secs
> 
> The case for all bits dirty is still rather slow, this
> is due to the implementation of find_and_clear_dirty_height.
> This will be addresses in a separate patch.
> 
> Signed-off-by: Peter Lieven <pl@kamp.de>
> ---
>   ui/vnc.c |  154 +++++++++++++++++++++++++++++++++-----------------------------
>   ui/vnc.h |    4 ++
>   2 files changed, 87 insertions(+), 71 deletions(-)
> 
> diff --git a/ui/vnc.c b/ui/vnc.c
> index 1d2aa1a..6a0c03e 100644
> --- a/ui/vnc.c
> +++ b/ui/vnc.c
> @@ -572,6 +572,14 @@ void *vnc_server_fb_ptr(VncDisplay *vd, int x, int y)
>       ptr += x * VNC_SERVER_FB_BYTES;
>       return ptr;
>   }
> +/* this sets only the visible pixels of a dirty bitmap */
> +#define VNC_SET_VISIBLE_PIXELS_DIRTY(bitmap, w, h) {\
> +        int y;\
> +        memset(bitmap, 0x00, sizeof(bitmap));\
> +        for (y = 0; y < h; y++) {\
> +            bitmap_set(bitmap[y], 0, w / VNC_DIRTY_PIXELS_PER_BIT);\

  Will it be a problem when vnc's width % VNC_DIRTY_PIXELS_PER_BIT != 0?
Although it is a rare case, but I think it is better round it up, since
"v" and "VNC_DIRTY_PIXELS_PER_BIT" are variables. A macro computing it
would be nice:

#define VNC_DIRTY_BITS_FROM_WIDTH(w) (w + VNC_DIRTY_PIXELS_PER_BIT - 1/
VNC_DIRTY_PIXELS_PER_BIT)
#define VNC_DIRTY_BITS (VNC_DIRTY_BITS_FROM_WIDTH(VNC_MAX_WIDTH)

then here:
    bitmap_set(bitmap[y], 0, VNC_DIRTY_BITS_FROM_WIDTH(w));

Or simply warn or coredump when v % VNC_DIRTY_PIXELS_PER_BIT != 0.

Also, in vnc.h:
/* VNC_MAX_WIDTH must be a multiple of 16. */
#define VNC_MAX_WIDTH 2560
#define VNC_MAX_HEIGHT 2048

Maybe it should be updated as:
/* VNC_MAX_WIDTH must be a multiple of VNC_DIRTY_PIXELS_PER_BIT. */

> +        } \
> +    }
> 
>   static void vnc_dpy_switch(DisplayChangeListener *dcl,
>                              DisplaySurface *surface)
> @@ -597,7 +605,9 @@ static void vnc_dpy_switch(DisplayChangeListener *dcl,
>       qemu_pixman_image_unref(vd->guest.fb);
>       vd->guest.fb = pixman_image_ref(surface->image);
>       vd->guest.format = surface->format;
> -    memset(vd->guest.dirty, 0xFF, sizeof(vd->guest.dirty));
> +    VNC_SET_VISIBLE_PIXELS_DIRTY(vd->guest.dirty,
> +                                 surface_width(vd->ds),
> +                                 surface_height(vd->ds));
> 
>       QTAILQ_FOREACH(vs, &vd->clients, next) {
>           vnc_colordepth(vs);
> @@ -605,7 +615,9 @@ static void vnc_dpy_switch(DisplayChangeListener *dcl,
>           if (vs->vd->cursor) {
>               vnc_cursor_define(vs);
>           }
> -        memset(vs->dirty, 0xFF, sizeof(vs->dirty));
> +        VNC_SET_VISIBLE_PIXELS_DIRTY(vs->dirty,
> +                                     surface_width(vd->ds),
> +                                     surface_height(vd->ds));
>       }
>   }
> 
> @@ -889,10 +901,9 @@ static int vnc_update_client(VncState *vs, int has_dirty)
>           VncDisplay *vd = vs->vd;
>           VncJob *job;
>           int y;
> -        int width, height;
> +        int height;
>           int n = 0;
> 
> -
>           if (vs->output.offset && !vs->audio_cap && !vs->force_update)
>               /* kernel send buffers are full -> drop frames to throttle */
>               return 0;
> @@ -908,39 +919,27 @@ static int vnc_update_client(VncState *vs, int has_dirty)
>            */
>           job = vnc_job_new(vs);
> 
> -        width = MIN(pixman_image_get_width(vd->server), vs->client_width);
>           height = MIN(pixman_image_get_height(vd->server), vs->client_height);
> 
> -        for (y = 0; y < height; y++) {
> -            int x;
> -            int last_x = -1;
> -            for (x = 0; x < width / VNC_DIRTY_PIXELS_PER_BIT; x++) {
> -                if (test_and_clear_bit(x, vs->dirty[y])) {
> -                    if (last_x == -1) {
> -                        last_x = x;
> -                    }
> -                } else {
> -                    if (last_x != -1) {
> -                        int h = find_and_clear_dirty_height(vs, y, last_x, x,
> -                                                            height);
> -
> -                        n += vnc_job_add_rect(job,
> -                                              last_x * VNC_DIRTY_PIXELS_PER_BIT,
> -                                              y,
> -                                              (x - last_x) *
> -                                              VNC_DIRTY_PIXELS_PER_BIT,
> -                                              h);
> -                    }
> -                    last_x = -1;
> -                }
> -            }
> -            if (last_x != -1) {
> -                int h = find_and_clear_dirty_height(vs, y, last_x, x, height);
> -                n += vnc_job_add_rect(job, last_x * VNC_DIRTY_PIXELS_PER_BIT,
> -                                      y,
> -                                      (x - last_x) * VNC_DIRTY_PIXELS_PER_BIT,
> -                                      h);
> +        y = 0;
> +        for (;;) {
> +            int x, h;
> +            unsigned long x2;
> +            unsigned long offset = find_next_bit((unsigned long *) &vs->dirty,
> +                                                 height * VNC_DIRTY_BPL(vs),
> +                                                 y * VNC_DIRTY_BPL(vs));
> +            if (offset == height * VNC_DIRTY_BPL(vs)) {
> +                /* no more dirty bits */
> +                break;
>               }
> +            y = offset / VNC_DIRTY_BPL(vs);
> +            x = offset % VNC_DIRTY_BPL(vs);
> +            x2 = find_next_zero_bit((unsigned long *) &vs->dirty[y],
> +                                    VNC_DIRTY_BPL(vs), x);
> +            bitmap_clear(vs->dirty[y], x, x2 - x);
> +            h = find_and_clear_dirty_height(vs, y, x, x2, height);
> +            n += vnc_job_add_rect(job, x * VNC_DIRTY_PIXELS_PER_BIT, y,
> +                                  (x2 - x) * VNC_DIRTY_PIXELS_PER_BIT, h);
>           }

Minor comments:
  VNC_DIRTY_BPL(vs) is accessing memory by pointer, should we use a
variable instead of VNC_DIRTY_BPL(vs) in every place, in case of
compiler didn't optimize it for us?

> 
>           vnc_job_push(job);
> @@ -2678,8 +2677,8 @@ static int vnc_refresh_server_surface(VncDisplay *vd)
>       int width = pixman_image_get_width(vd->guest.fb);
>       int height = pixman_image_get_height(vd->guest.fb);
>       int y;
> -    uint8_t *guest_row;
> -    uint8_t *server_row;
> +    uint8_t *guest_row0 = NULL, *server_row0;
  Any reason that rename those variable?

> +    int guest_stride = 0, server_stride;
>       int cmp_bytes;
>       VncState *vs;
>       int has_dirty = 0;
> @@ -2704,44 +2703,57 @@ static int vnc_refresh_server_surface(VncDisplay *vd)
>       if (vd->guest.format != VNC_SERVER_FB_FORMAT) {
>           int width = pixman_image_get_width(vd->server);
>           tmpbuf = qemu_pixman_linebuf_create(VNC_SERVER_FB_FORMAT, width);
> -    }
> -    guest_row = (uint8_t *)pixman_image_get_data(vd->guest.fb);
> -    server_row = (uint8_t *)pixman_image_get_data(vd->server);
> -    for (y = 0; y < height; y++) {
> -        if (!bitmap_empty(vd->guest.dirty[y], VNC_DIRTY_BITS)) {
> -            int x;
> -            uint8_t *guest_ptr;
> -            uint8_t *server_ptr;
> -
> -            if (vd->guest.format != VNC_SERVER_FB_FORMAT) {
> -                qemu_pixman_linebuf_fill(tmpbuf, vd->guest.fb, width, 0, y);
> -                guest_ptr = (uint8_t *)pixman_image_get_data(tmpbuf);
> -            } else {
> -                guest_ptr = guest_row;
> -            }
> -            server_ptr = server_row;
> +    } else {
> +        guest_row0 = (uint8_t *)pixman_image_get_data(vd->guest.fb);
> +        guest_stride = pixman_image_get_stride(vd->guest.fb);
> +    }
> +    server_row0 = (uint8_t *)pixman_image_get_data(vd->server);
> +    server_stride = pixman_image_get_stride(vd->server);
> +
> +    y = 0;
> +    for (;;) {
> +        int x;
> +        uint8_t *guest_ptr, *server_ptr;
> +        unsigned long offset = find_next_bit((unsigned long *) &vd->guest.dirty,
> +                                             height * VNC_DIRTY_BPL(&vd->guest),
> +                                             y * VNC_DIRTY_BPL(&vd->guest));
> +        if (offset == height * VNC_DIRTY_BPL(&vd->guest)) {
> +            /* no more dirty bits */
> +            break;
> +        }
> +        y = offset / VNC_DIRTY_BPL(&vd->guest);
> 
> -            for (x = 0; x + VNC_DIRTY_PIXELS_PER_BIT - 1 < width;
> -                 x += VNC_DIRTY_PIXELS_PER_BIT, guest_ptr += cmp_bytes,
> -                 server_ptr += cmp_bytes) {
> -                if (!test_and_clear_bit((x / VNC_DIRTY_PIXELS_PER_BIT),
> -                    vd->guest.dirty[y])) {
> -                    continue;
> -                }
> -                if (memcmp(server_ptr, guest_ptr, cmp_bytes) == 0) {
> -                    continue;
> -                }
> -                memcpy(server_ptr, guest_ptr, cmp_bytes);
> -                if (!vd->non_adaptive)
> -                    vnc_rect_updated(vd, x, y, &tv);
> -                QTAILQ_FOREACH(vs, &vd->clients, next) {
> -                    set_bit((x / VNC_DIRTY_PIXELS_PER_BIT), vs->dirty[y]);
> -                }
> -                has_dirty++;
> +        server_ptr = server_row0 + y * server_stride;
> +
> +        if (vd->guest.format != VNC_SERVER_FB_FORMAT) {
> +            qemu_pixman_linebuf_fill(tmpbuf, vd->guest.fb, width, 0, y);
> +            guest_ptr = (uint8_t *)pixman_image_get_data(tmpbuf);
> +        } else {
> +            guest_ptr = guest_row0 + y * guest_stride;
> +        }
> +
> +        for (x = offset % VNC_DIRTY_BPL(&vd->guest);
> +             x + VNC_DIRTY_PIXELS_PER_BIT - 1 < width;
> +             x += VNC_DIRTY_PIXELS_PER_BIT, guest_ptr += cmp_bytes,
> +             server_ptr += cmp_bytes) {
> +            if (!test_and_clear_bit((x / VNC_DIRTY_PIXELS_PER_BIT),
> +                vd->guest.dirty[y])) {
> +                continue;
> +            }
> +            if (memcmp(server_ptr, guest_ptr, cmp_bytes) == 0) {
> +                continue;
> +            }
> +            memcpy(server_ptr, guest_ptr, cmp_bytes);
> +            if (!vd->non_adaptive) {
> +                vnc_rect_updated(vd, x, y, &tv);
>               }
> +            QTAILQ_FOREACH(vs, &vd->clients, next) {
> +                set_bit((x / VNC_DIRTY_PIXELS_PER_BIT), vs->dirty[y]);
> +            }
> +            has_dirty++;
>           }
> -        guest_row  += pixman_image_get_stride(vd->guest.fb);
> -        server_row += pixman_image_get_stride(vd->server);
> +
> +        y++;
>       }
>       qemu_pixman_image_unref(tmpbuf);
>       return has_dirty;
> diff --git a/ui/vnc.h b/ui/vnc.h
> index 4a8f33c..07e1f59 100644
> --- a/ui/vnc.h
> +++ b/ui/vnc.h
> @@ -88,6 +88,10 @@ typedef void VncSendHextileTile(VncState *vs,
>   /* VNC_DIRTY_BITS is the number of bits in the dirty bitmap. */
>   #define VNC_DIRTY_BITS (VNC_MAX_WIDTH / VNC_DIRTY_PIXELS_PER_BIT)
> 
> +/* VNC_DIRTY_BPL (BPL = bits per line) might be greater than
> + * VNC_DIRTY_BITS due to alignment */
> +#define VNC_DIRTY_BPL(x) (sizeof((x)->dirty) / VNC_MAX_HEIGHT * BITS_PER_BYTE)
> +
>   #define VNC_STAT_RECT  64
>   #define VNC_STAT_COLS (VNC_MAX_WIDTH / VNC_STAT_RECT)
>   #define VNC_STAT_ROWS (VNC_MAX_HEIGHT / VNC_STAT_RECT)
> 

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [Qemu-devel] [PATCHv3 3/6] ui/vnc: optimize dirty bitmap tracking
  2014-01-06 10:08   ` Wenchao Xia
@ 2014-01-06 13:31     ` Peter Lieven
  2014-01-07  2:00       ` Wenchao Xia
  2014-01-06 17:37     ` Peter Lieven
  1 sibling, 1 reply; 14+ messages in thread
From: Peter Lieven @ 2014-01-06 13:31 UTC (permalink / raw)
  To: Wenchao Xia, qemu-devel; +Cc: sw, aliguori

On 06.01.2014 11:08, Wenchao Xia wrote:
> 于 2014/1/6 2:02, Peter Lieven 写道:
>> vnc_update_client currently scans the dirty bitmap of each client
>> bitwise which is a very costly operation if only few bits are dirty.
>> vnc_refresh_server_surface does almost the same.
>> this patch optimizes both by utilizing the heavily optimized
>> function find_next_bit to find the offset of the next dirty
>> bit in the dirty bitmaps.
>>
>> The following artifical test (just the bitmap operation part) running
>> vnc_update_client 65536 times on a 2560x2048 surface illustrates the
>> performance difference:
>>
>> All bits clean - vnc_update_client_new: 0.07 secs
>>                   vnc_update_client_old: 10.98 secs
>>
>> All bits dirty - vnc_update_client_new: 11.26 secs
>>                   vnc_update_client_old: 20.19 secs
>>
>> Few bits dirty - vnc_update_client_new: 0.08 secs
>>                   vnc_update_client_old: 10.98 secs
>>
>> The case for all bits dirty is still rather slow, this
>> is due to the implementation of find_and_clear_dirty_height.
>> This will be addresses in a separate patch.
>>
>> Signed-off-by: Peter Lieven <pl@kamp.de>
>> ---
>>   ui/vnc.c |  154 +++++++++++++++++++++++++++++++++-----------------------------
>>   ui/vnc.h |    4 ++
>>   2 files changed, 87 insertions(+), 71 deletions(-)
>>
>> diff --git a/ui/vnc.c b/ui/vnc.c
>> index 1d2aa1a..6a0c03e 100644
>> --- a/ui/vnc.c
>> +++ b/ui/vnc.c
>> @@ -572,6 +572,14 @@ void *vnc_server_fb_ptr(VncDisplay *vd, int x, int y)
>>       ptr += x * VNC_SERVER_FB_BYTES;
>>       return ptr;
>>   }
>> +/* this sets only the visible pixels of a dirty bitmap */
>> +#define VNC_SET_VISIBLE_PIXELS_DIRTY(bitmap, w, h) {\
>> +        int y;\
>> +        memset(bitmap, 0x00, sizeof(bitmap));\
>> +        for (y = 0; y < h; y++) {\
>> +            bitmap_set(bitmap[y], 0, w / VNC_DIRTY_PIXELS_PER_BIT);\
>   Will it be a problem when vnc's width % VNC_DIRTY_PIXELS_PER_BIT != 0?
> Although it is a rare case, but I think it is better round it up, since
> "v" and "VNC_DIRTY_PIXELS_PER_BIT" are variables. A macro computing it
> would be nice:
Good point, I will use DIV_ROUND_UP here.
>
> #define VNC_DIRTY_BITS_FROM_WIDTH(w) (w + VNC_DIRTY_PIXELS_PER_BIT - 1/
> VNC_DIRTY_PIXELS_PER_BIT)
> #define VNC_DIRTY_BITS (VNC_DIRTY_BITS_FROM_WIDTH(VNC_MAX_WIDTH)
>
> then here:
>     bitmap_set(bitmap[y], 0, VNC_DIRTY_BITS_FROM_WIDTH(w));
>
> Or simply warn or coredump when v % VNC_DIRTY_PIXELS_PER_BIT != 0.
>
> Also, in vnc.h:
> /* VNC_MAX_WIDTH must be a multiple of 16. */
> #define VNC_MAX_WIDTH 2560
> #define VNC_MAX_HEIGHT 2048
>
> Maybe it should be updated as:
> /* VNC_MAX_WIDTH must be a multiple of VNC_DIRTY_PIXELS_PER_BIT. */
correct. will fix as well.
>
>> +        } \
>> +    }
>>
>>   static void vnc_dpy_switch(DisplayChangeListener *dcl,
>>                              DisplaySurface *surface)
>> @@ -597,7 +605,9 @@ static void vnc_dpy_switch(DisplayChangeListener *dcl,
>>       qemu_pixman_image_unref(vd->guest.fb);
>>       vd->guest.fb = pixman_image_ref(surface->image);
>>       vd->guest.format = surface->format;
>> -    memset(vd->guest.dirty, 0xFF, sizeof(vd->guest.dirty));
>> +    VNC_SET_VISIBLE_PIXELS_DIRTY(vd->guest.dirty,
>> +                                 surface_width(vd->ds),
>> +                                 surface_height(vd->ds));
>>
>>       QTAILQ_FOREACH(vs, &vd->clients, next) {
>>           vnc_colordepth(vs);
>> @@ -605,7 +615,9 @@ static void vnc_dpy_switch(DisplayChangeListener *dcl,
>>           if (vs->vd->cursor) {
>>               vnc_cursor_define(vs);
>>           }
>> -        memset(vs->dirty, 0xFF, sizeof(vs->dirty));
>> +        VNC_SET_VISIBLE_PIXELS_DIRTY(vs->dirty,
>> +                                     surface_width(vd->ds),
>> +                                     surface_height(vd->ds));
>>       }
>>   }
>>
>> @@ -889,10 +901,9 @@ static int vnc_update_client(VncState *vs, int has_dirty)
>>           VncDisplay *vd = vs->vd;
>>           VncJob *job;
>>           int y;
>> -        int width, height;
>> +        int height;
>>           int n = 0;
>>
>> -
>>           if (vs->output.offset && !vs->audio_cap && !vs->force_update)
>>               /* kernel send buffers are full -> drop frames to throttle */
>>               return 0;
>> @@ -908,39 +919,27 @@ static int vnc_update_client(VncState *vs, int has_dirty)
>>            */
>>           job = vnc_job_new(vs);
>>
>> -        width = MIN(pixman_image_get_width(vd->server), vs->client_width);
>>           height = MIN(pixman_image_get_height(vd->server), vs->client_height);
>>
>> -        for (y = 0; y < height; y++) {
>> -            int x;
>> -            int last_x = -1;
>> -            for (x = 0; x < width / VNC_DIRTY_PIXELS_PER_BIT; x++) {
>> -                if (test_and_clear_bit(x, vs->dirty[y])) {
>> -                    if (last_x == -1) {
>> -                        last_x = x;
>> -                    }
>> -                } else {
>> -                    if (last_x != -1) {
>> -                        int h = find_and_clear_dirty_height(vs, y, last_x, x,
>> -                                                            height);
>> -
>> -                        n += vnc_job_add_rect(job,
>> -                                              last_x * VNC_DIRTY_PIXELS_PER_BIT,
>> -                                              y,
>> -                                              (x - last_x) *
>> -                                              VNC_DIRTY_PIXELS_PER_BIT,
>> -                                              h);
>> -                    }
>> -                    last_x = -1;
>> -                }
>> -            }
>> -            if (last_x != -1) {
>> -                int h = find_and_clear_dirty_height(vs, y, last_x, x, height);
>> -                n += vnc_job_add_rect(job, last_x * VNC_DIRTY_PIXELS_PER_BIT,
>> -                                      y,
>> -                                      (x - last_x) * VNC_DIRTY_PIXELS_PER_BIT,
>> -                                      h);
>> +        y = 0;
>> +        for (;;) {
>> +            int x, h;
>> +            unsigned long x2;
>> +            unsigned long offset = find_next_bit((unsigned long *) &vs->dirty,
>> +                                                 height * VNC_DIRTY_BPL(vs),
>> +                                                 y * VNC_DIRTY_BPL(vs));
>> +            if (offset == height * VNC_DIRTY_BPL(vs)) {
>> +                /* no more dirty bits */
>> +                break;
>>               }
>> +            y = offset / VNC_DIRTY_BPL(vs);
>> +            x = offset % VNC_DIRTY_BPL(vs);
>> +            x2 = find_next_zero_bit((unsigned long *) &vs->dirty[y],
>> +                                    VNC_DIRTY_BPL(vs), x);
>> +            bitmap_clear(vs->dirty[y], x, x2 - x);
>> +            h = find_and_clear_dirty_height(vs, y, x, x2, height);
>> +            n += vnc_job_add_rect(job, x * VNC_DIRTY_PIXELS_PER_BIT, y,
>> +                                  (x2 - x) * VNC_DIRTY_PIXELS_PER_BIT, h);
>>           }
> Minor comments:
>   VNC_DIRTY_BPL(vs) is accessing memory by pointer, should we use a
> variable instead of VNC_DIRTY_BPL(vs) in every place, in case of
> compiler didn't optimize it for us?
I am pretty sure that sizeof is evaluated at compile time or do you have other
evidence?
>
>>           vnc_job_push(job);
>> @@ -2678,8 +2677,8 @@ static int vnc_refresh_server_surface(VncDisplay *vd)
>>       int width = pixman_image_get_width(vd->guest.fb);
>>       int height = pixman_image_get_height(vd->guest.fb);
>>       int y;
>> -    uint8_t *guest_row;
>> -    uint8_t *server_row;
>> +    uint8_t *guest_row0 = NULL, *server_row0;
>   Any reason that rename those variable?
Its actually a pointer to row0 and not to any specific row. This is why
I renamed it.
>
>> +    int guest_stride = 0, server_stride;
>>       int cmp_bytes;
>>       VncState *vs;
>>       int has_dirty = 0;
>> @@ -2704,44 +2703,57 @@ static int vnc_refresh_server_surface(VncDisplay *vd)
>>       if (vd->guest.format != VNC_SERVER_FB_FORMAT) {
>>           int width = pixman_image_get_width(vd->server);
>>           tmpbuf = qemu_pixman_linebuf_create(VNC_SERVER_FB_FORMAT, width);
>> -    }
>> -    guest_row = (uint8_t *)pixman_image_get_data(vd->guest.fb);
>> -    server_row = (uint8_t *)pixman_image_get_data(vd->server);
>> -    for (y = 0; y < height; y++) {
>> -        if (!bitmap_empty(vd->guest.dirty[y], VNC_DIRTY_BITS)) {
>> -            int x;
>> -            uint8_t *guest_ptr;
>> -            uint8_t *server_ptr;
>> -
>> -            if (vd->guest.format != VNC_SERVER_FB_FORMAT) {
>> -                qemu_pixman_linebuf_fill(tmpbuf, vd->guest.fb, width, 0, y);
>> -                guest_ptr = (uint8_t *)pixman_image_get_data(tmpbuf);
>> -            } else {
>> -                guest_ptr = guest_row;
>> -            }
>> -            server_ptr = server_row;
>> +    } else {
>> +        guest_row0 = (uint8_t *)pixman_image_get_data(vd->guest.fb);
>> +        guest_stride = pixman_image_get_stride(vd->guest.fb);
>> +    }
>> +    server_row0 = (uint8_t *)pixman_image_get_data(vd->server);
>> +    server_stride = pixman_image_get_stride(vd->server);
>> +
>> +    y = 0;
>> +    for (;;) {
>> +        int x;
>> +        uint8_t *guest_ptr, *server_ptr;
>> +        unsigned long offset = find_next_bit((unsigned long *) &vd->guest.dirty,
>> +                                             height * VNC_DIRTY_BPL(&vd->guest),
>> +                                             y * VNC_DIRTY_BPL(&vd->guest));
>> +        if (offset == height * VNC_DIRTY_BPL(&vd->guest)) {
>> +            /* no more dirty bits */
>> +            break;
>> +        }
>> +        y = offset / VNC_DIRTY_BPL(&vd->guest);
>>
>> -            for (x = 0; x + VNC_DIRTY_PIXELS_PER_BIT - 1 < width;
>> -                 x += VNC_DIRTY_PIXELS_PER_BIT, guest_ptr += cmp_bytes,
>> -                 server_ptr += cmp_bytes) {
>> -                if (!test_and_clear_bit((x / VNC_DIRTY_PIXELS_PER_BIT),
>> -                    vd->guest.dirty[y])) {
>> -                    continue;
>> -                }
>> -                if (memcmp(server_ptr, guest_ptr, cmp_bytes) == 0) {
>> -                    continue;
>> -                }
>> -                memcpy(server_ptr, guest_ptr, cmp_bytes);
>> -                if (!vd->non_adaptive)
>> -                    vnc_rect_updated(vd, x, y, &tv);
>> -                QTAILQ_FOREACH(vs, &vd->clients, next) {
>> -                    set_bit((x / VNC_DIRTY_PIXELS_PER_BIT), vs->dirty[y]);
>> -                }
>> -                has_dirty++;
>> +        server_ptr = server_row0 + y * server_stride;
>> +
>> +        if (vd->guest.format != VNC_SERVER_FB_FORMAT) {
>> +            qemu_pixman_linebuf_fill(tmpbuf, vd->guest.fb, width, 0, y);
>> +            guest_ptr = (uint8_t *)pixman_image_get_data(tmpbuf);
>> +        } else {
>> +            guest_ptr = guest_row0 + y * guest_stride;
>> +        }
>> +
>> +        for (x = offset % VNC_DIRTY_BPL(&vd->guest);
>> +             x + VNC_DIRTY_PIXELS_PER_BIT - 1 < width;
>> +             x += VNC_DIRTY_PIXELS_PER_BIT, guest_ptr += cmp_bytes,
>> +             server_ptr += cmp_bytes) {
>> +            if (!test_and_clear_bit((x / VNC_DIRTY_PIXELS_PER_BIT),
>> +                vd->guest.dirty[y])) {
>> +                continue;
>> +            }
>> +            if (memcmp(server_ptr, guest_ptr, cmp_bytes) == 0) {
>> +                continue;
>> +            }
>> +            memcpy(server_ptr, guest_ptr, cmp_bytes);
>> +            if (!vd->non_adaptive) {
>> +                vnc_rect_updated(vd, x, y, &tv);
>>               }
>> +            QTAILQ_FOREACH(vs, &vd->clients, next) {
>> +                set_bit((x / VNC_DIRTY_PIXELS_PER_BIT), vs->dirty[y]);
>> +            }
>> +            has_dirty++;
>>           }
>> -        guest_row  += pixman_image_get_stride(vd->guest.fb);
>> -        server_row += pixman_image_get_stride(vd->server);
>> +
>> +        y++;
>>       }
>>       qemu_pixman_image_unref(tmpbuf);
>>       return has_dirty;
>> diff --git a/ui/vnc.h b/ui/vnc.h
>> index 4a8f33c..07e1f59 100644
>> --- a/ui/vnc.h
>> +++ b/ui/vnc.h
>> @@ -88,6 +88,10 @@ typedef void VncSendHextileTile(VncState *vs,
>>   /* VNC_DIRTY_BITS is the number of bits in the dirty bitmap. */
>>   #define VNC_DIRTY_BITS (VNC_MAX_WIDTH / VNC_DIRTY_PIXELS_PER_BIT)
>>
>> +/* VNC_DIRTY_BPL (BPL = bits per line) might be greater than
>> + * VNC_DIRTY_BITS due to alignment */
>> +#define VNC_DIRTY_BPL(x) (sizeof((x)->dirty) / VNC_MAX_HEIGHT * BITS_PER_BYTE)
>> +
>>   #define VNC_STAT_RECT  64
>>   #define VNC_STAT_COLS (VNC_MAX_WIDTH / VNC_STAT_RECT)
>>   #define VNC_STAT_ROWS (VNC_MAX_HEIGHT / VNC_STAT_RECT)
>>

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [Qemu-devel] [PATCHv3 3/6] ui/vnc: optimize dirty bitmap tracking
  2014-01-06 10:08   ` Wenchao Xia
  2014-01-06 13:31     ` Peter Lieven
@ 2014-01-06 17:37     ` Peter Lieven
  1 sibling, 0 replies; 14+ messages in thread
From: Peter Lieven @ 2014-01-06 17:37 UTC (permalink / raw)
  To: Wenchao Xia, qemu-devel; +Cc: sw, aliguori

On 06.01.2014 11:08, Wenchao Xia wrote:
> 于 2014/1/6 2:02, Peter Lieven 写道:
>> vnc_update_client currently scans the dirty bitmap of each client
>> bitwise which is a very costly operation if only few bits are dirty.
>> vnc_refresh_server_surface does almost the same.
>> this patch optimizes both by utilizing the heavily optimized
>> function find_next_bit to find the offset of the next dirty
>> bit in the dirty bitmaps.
>>
>> The following artifical test (just the bitmap operation part) running
>> vnc_update_client 65536 times on a 2560x2048 surface illustrates the
>> performance difference:
>>
>> All bits clean - vnc_update_client_new: 0.07 secs
>>                   vnc_update_client_old: 10.98 secs
>>
>> All bits dirty - vnc_update_client_new: 11.26 secs
>>                   vnc_update_client_old: 20.19 secs
>>
>> Few bits dirty - vnc_update_client_new: 0.08 secs
>>                   vnc_update_client_old: 10.98 secs
>>
>> The case for all bits dirty is still rather slow, this
>> is due to the implementation of find_and_clear_dirty_height.
>> This will be addresses in a separate patch.
>>
>> Signed-off-by: Peter Lieven <pl@kamp.de>
>> ---
>>   ui/vnc.c |  154 +++++++++++++++++++++++++++++++++-----------------------------
>>   ui/vnc.h |    4 ++
>>   2 files changed, 87 insertions(+), 71 deletions(-)
>>
>> diff --git a/ui/vnc.c b/ui/vnc.c
>> index 1d2aa1a..6a0c03e 100644
>> --- a/ui/vnc.c
>> +++ b/ui/vnc.c
>> @@ -572,6 +572,14 @@ void *vnc_server_fb_ptr(VncDisplay *vd, int x, int y)
>>       ptr += x * VNC_SERVER_FB_BYTES;
>>       return ptr;
>>   }
>> +/* this sets only the visible pixels of a dirty bitmap */
>> +#define VNC_SET_VISIBLE_PIXELS_DIRTY(bitmap, w, h) {\
>> +        int y;\
>> +        memset(bitmap, 0x00, sizeof(bitmap));\
>> +        for (y = 0; y < h; y++) {\
>> +            bitmap_set(bitmap[y], 0, w / VNC_DIRTY_PIXELS_PER_BIT);\
>   Will it be a problem when vnc's width % VNC_DIRTY_PIXELS_PER_BIT != 0?
> Although it is a rare case, but I think it is better round it up, since
> "v" and "VNC_DIRTY_PIXELS_PER_BIT" are variables. A macro computing it
> would be nice:
>
> #define VNC_DIRTY_BITS_FROM_WIDTH(w) (w + VNC_DIRTY_PIXELS_PER_BIT - 1/
> VNC_DIRTY_PIXELS_PER_BIT)
> #define VNC_DIRTY_BITS (VNC_DIRTY_BITS_FROM_WIDTH(VNC_MAX_WIDTH)
>
> then here:
>     bitmap_set(bitmap[y], 0, VNC_DIRTY_BITS_FROM_WIDTH(w));
>
> Or simply warn or coredump when v % VNC_DIRTY_PIXELS_PER_BIT != 0.
>
> Also, in vnc.h:
> /* VNC_MAX_WIDTH must be a multiple of 16. */
> #define VNC_MAX_WIDTH 2560
> #define VNC_MAX_HEIGHT 2048
>
> Maybe it should be updated as:
> /* VNC_MAX_WIDTH must be a multiple of VNC_DIRTY_PIXELS_PER_BIT. */
>
>> +        } \
>> +    }
>>
>>   static void vnc_dpy_switch(DisplayChangeListener *dcl,
>>                              DisplaySurface *surface)
>> @@ -597,7 +605,9 @@ static void vnc_dpy_switch(DisplayChangeListener *dcl,
>>       qemu_pixman_image_unref(vd->guest.fb);
>>       vd->guest.fb = pixman_image_ref(surface->image);
>>       vd->guest.format = surface->format;
>> -    memset(vd->guest.dirty, 0xFF, sizeof(vd->guest.dirty));
>> +    VNC_SET_VISIBLE_PIXELS_DIRTY(vd->guest.dirty,
>> +                                 surface_width(vd->ds),
>> +                                 surface_height(vd->ds));
>>
>>       QTAILQ_FOREACH(vs, &vd->clients, next) {
>>           vnc_colordepth(vs);
>> @@ -605,7 +615,9 @@ static void vnc_dpy_switch(DisplayChangeListener *dcl,
>>           if (vs->vd->cursor) {
>>               vnc_cursor_define(vs);
>>           }
>> -        memset(vs->dirty, 0xFF, sizeof(vs->dirty));
>> +        VNC_SET_VISIBLE_PIXELS_DIRTY(vs->dirty,
>> +                                     surface_width(vd->ds),
>> +                                     surface_height(vd->ds));
>>       }
>>   }
>>
>> @@ -889,10 +901,9 @@ static int vnc_update_client(VncState *vs, int has_dirty)
>>           VncDisplay *vd = vs->vd;
>>           VncJob *job;
>>           int y;
>> -        int width, height;
>> +        int height;
>>           int n = 0;
>>
>> -
>>           if (vs->output.offset && !vs->audio_cap && !vs->force_update)
>>               /* kernel send buffers are full -> drop frames to throttle */
>>               return 0;
>> @@ -908,39 +919,27 @@ static int vnc_update_client(VncState *vs, int has_dirty)
>>            */
>>           job = vnc_job_new(vs);
>>
>> -        width = MIN(pixman_image_get_width(vd->server), vs->client_width);
>>           height = MIN(pixman_image_get_height(vd->server), vs->client_height);
>>
>> -        for (y = 0; y < height; y++) {
>> -            int x;
>> -            int last_x = -1;
>> -            for (x = 0; x < width / VNC_DIRTY_PIXELS_PER_BIT; x++) {
>> -                if (test_and_clear_bit(x, vs->dirty[y])) {
>> -                    if (last_x == -1) {
>> -                        last_x = x;
>> -                    }
>> -                } else {
>> -                    if (last_x != -1) {
>> -                        int h = find_and_clear_dirty_height(vs, y, last_x, x,
>> -                                                            height);
>> -
>> -                        n += vnc_job_add_rect(job,
>> -                                              last_x * VNC_DIRTY_PIXELS_PER_BIT,
>> -                                              y,
>> -                                              (x - last_x) *
>> -                                              VNC_DIRTY_PIXELS_PER_BIT,
>> -                                              h);
>> -                    }
>> -                    last_x = -1;
>> -                }
>> -            }
>> -            if (last_x != -1) {
>> -                int h = find_and_clear_dirty_height(vs, y, last_x, x, height);
>> -                n += vnc_job_add_rect(job, last_x * VNC_DIRTY_PIXELS_PER_BIT,
>> -                                      y,
>> -                                      (x - last_x) * VNC_DIRTY_PIXELS_PER_BIT,
>> -                                      h);
>> +        y = 0;
>> +        for (;;) {
>> +            int x, h;
>> +            unsigned long x2;
>> +            unsigned long offset = find_next_bit((unsigned long *) &vs->dirty,
>> +                                                 height * VNC_DIRTY_BPL(vs),
>> +                                                 y * VNC_DIRTY_BPL(vs));
>> +            if (offset == height * VNC_DIRTY_BPL(vs)) {
>> +                /* no more dirty bits */
>> +                break;
>>               }
>> +            y = offset / VNC_DIRTY_BPL(vs);
>> +            x = offset % VNC_DIRTY_BPL(vs);
>> +            x2 = find_next_zero_bit((unsigned long *) &vs->dirty[y],
>> +                                    VNC_DIRTY_BPL(vs), x);
>> +            bitmap_clear(vs->dirty[y], x, x2 - x);
>> +            h = find_and_clear_dirty_height(vs, y, x, x2, height);
>> +            n += vnc_job_add_rect(job, x * VNC_DIRTY_PIXELS_PER_BIT, y,
>> +                                  (x2 - x) * VNC_DIRTY_PIXELS_PER_BIT, h);
>>           }
> Minor comments:
>   VNC_DIRTY_BPL(vs) is accessing memory by pointer, should we use a
> variable instead of VNC_DIRTY_BPL(vs) in every place, in case of
> compiler didn't optimize it for us?
>
>>           vnc_job_push(job);
>> @@ -2678,8 +2677,8 @@ static int vnc_refresh_server_surface(VncDisplay *vd)
>>       int width = pixman_image_get_width(vd->guest.fb);
>>       int height = pixman_image_get_height(vd->guest.fb);
>>       int y;
>> -    uint8_t *guest_row;
>> -    uint8_t *server_row;
>> +    uint8_t *guest_row0 = NULL, *server_row0;
>   Any reason that rename those variable?
>
>> +    int guest_stride = 0, server_stride;
>>       int cmp_bytes;
>>       VncState *vs;
>>       int has_dirty = 0;
>> @@ -2704,44 +2703,57 @@ static int vnc_refresh_server_surface(VncDisplay *vd)
>>       if (vd->guest.format != VNC_SERVER_FB_FORMAT) {
>>           int width = pixman_image_get_width(vd->server);
>>           tmpbuf = qemu_pixman_linebuf_create(VNC_SERVER_FB_FORMAT, width);
>> -    }
>> -    guest_row = (uint8_t *)pixman_image_get_data(vd->guest.fb);
>> -    server_row = (uint8_t *)pixman_image_get_data(vd->server);
>> -    for (y = 0; y < height; y++) {
>> -        if (!bitmap_empty(vd->guest.dirty[y], VNC_DIRTY_BITS)) {
>> -            int x;
>> -            uint8_t *guest_ptr;
>> -            uint8_t *server_ptr;
>> -
>> -            if (vd->guest.format != VNC_SERVER_FB_FORMAT) {
>> -                qemu_pixman_linebuf_fill(tmpbuf, vd->guest.fb, width, 0, y);
>> -                guest_ptr = (uint8_t *)pixman_image_get_data(tmpbuf);
>> -            } else {
>> -                guest_ptr = guest_row;
>> -            }
>> -            server_ptr = server_row;
>> +    } else {
>> +        guest_row0 = (uint8_t *)pixman_image_get_data(vd->guest.fb);
>> +        guest_stride = pixman_image_get_stride(vd->guest.fb);
>> +    }
>> +    server_row0 = (uint8_t *)pixman_image_get_data(vd->server);
>> +    server_stride = pixman_image_get_stride(vd->server);
>> +
>> +    y = 0;
>> +    for (;;) {
>> +        int x;
>> +        uint8_t *guest_ptr, *server_ptr;
>> +        unsigned long offset = find_next_bit((unsigned long *) &vd->guest.dirty,
>> +                                             height * VNC_DIRTY_BPL(&vd->guest),
>> +                                             y * VNC_DIRTY_BPL(&vd->guest));
>> +        if (offset == height * VNC_DIRTY_BPL(&vd->guest)) {
>> +            /* no more dirty bits */
>> +            break;
>> +        }
>> +        y = offset / VNC_DIRTY_BPL(&vd->guest);
>>
>> -            for (x = 0; x + VNC_DIRTY_PIXELS_PER_BIT - 1 < width;
>> -                 x += VNC_DIRTY_PIXELS_PER_BIT, guest_ptr += cmp_bytes,
>> -                 server_ptr += cmp_bytes) {
>> -                if (!test_and_clear_bit((x / VNC_DIRTY_PIXELS_PER_BIT),
>> -                    vd->guest.dirty[y])) {
>> -                    continue;
>> -                }
>> -                if (memcmp(server_ptr, guest_ptr, cmp_bytes) == 0) {
>> -                    continue;
>> -                }
>> -                memcpy(server_ptr, guest_ptr, cmp_bytes);
>> -                if (!vd->non_adaptive)
>> -                    vnc_rect_updated(vd, x, y, &tv);
>> -                QTAILQ_FOREACH(vs, &vd->clients, next) {
>> -                    set_bit((x / VNC_DIRTY_PIXELS_PER_BIT), vs->dirty[y]);
>> -                }
>> -                has_dirty++;
>> +        server_ptr = server_row0 + y * server_stride;
>> +
>> +        if (vd->guest.format != VNC_SERVER_FB_FORMAT) {
>> +            qemu_pixman_linebuf_fill(tmpbuf, vd->guest.fb, width, 0, y);
>> +            guest_ptr = (uint8_t *)pixman_image_get_data(tmpbuf);
>> +        } else {
>> +            guest_ptr = guest_row0 + y * guest_stride;
>> +        }
>> +
>> +        for (x = offset % VNC_DIRTY_BPL(&vd->guest);
>> +             x + VNC_DIRTY_PIXELS_PER_BIT - 1 < width;
>> +             x += VNC_DIRTY_PIXELS_PER_BIT, guest_ptr += cmp_bytes,
>> +             server_ptr += cmp_bytes) {
>> +            if (!test_and_clear_bit((x / VNC_DIRTY_PIXELS_PER_BIT),
>> +                vd->guest.dirty[y])) {
>> +                continue;
>> +            }
>> +            if (memcmp(server_ptr, guest_ptr, cmp_bytes) == 0) {
>> +                continue;
>> +            }
>> +            memcpy(server_ptr, guest_ptr, cmp_bytes);
>> +            if (!vd->non_adaptive) {
>> +                vnc_rect_updated(vd, x, y, &tv);
>>               }
>> +            QTAILQ_FOREACH(vs, &vd->clients, next) {
>> +                set_bit((x / VNC_DIRTY_PIXELS_PER_BIT), vs->dirty[y]);
>> +            }
>> +            has_dirty++;
>>           }
>> -        guest_row  += pixman_image_get_stride(vd->guest.fb);
>> -        server_row += pixman_image_get_stride(vd->server);
>> +
>> +        y++;
>>       }
>>       qemu_pixman_image_unref(tmpbuf);
>>       return has_dirty;
>> diff --git a/ui/vnc.h b/ui/vnc.h
>> index 4a8f33c..07e1f59 100644
>> --- a/ui/vnc.h
>> +++ b/ui/vnc.h
>> @@ -88,6 +88,10 @@ typedef void VncSendHextileTile(VncState *vs,
>>   /* VNC_DIRTY_BITS is the number of bits in the dirty bitmap. */
>>   #define VNC_DIRTY_BITS (VNC_MAX_WIDTH / VNC_DIRTY_PIXELS_PER_BIT)
>>
>> +/* VNC_DIRTY_BPL (BPL = bits per line) might be greater than
>> + * VNC_DIRTY_BITS due to alignment */
>> +#define VNC_DIRTY_BPL(x) (sizeof((x)->dirty) / VNC_MAX_HEIGHT * BITS_PER_BYTE)
>> +
>>   #define VNC_STAT_RECT  64
>>   #define VNC_STAT_COLS (VNC_MAX_WIDTH / VNC_STAT_RECT)
>>   #define VNC_STAT_ROWS (VNC_MAX_HEIGHT / VNC_STAT_RECT)
>>

If you or anyone else further want to test. The x offset calculation in vnc_refresh_server_surface was wrong.
I will send an updated series addresseing Wenchaos comments later this week.

diff --git a/ui/vnc.c b/ui/vnc.c
index cf9aa4a..3b076ee 100644
--- a/ui/vnc.c
+++ b/ui/vnc.c
@@ -2717,8 +2717,9 @@ static int vnc_refresh_server_surface(VncDisplay *vd)
break;
}
y = offset / VNC_DIRTY_BPL(&vd->guest);
-
- server_ptr = server_row0 + y * server_stride;
+ x = offset % VNC_DIRTY_BPL(&vd->guest);
+
+ server_ptr = server_row0 + y * server_stride + x * cmp_bytes;

if (vd->guest.format != VNC_SERVER_FB_FORMAT) {
qemu_pixman_linebuf_fill(tmpbuf, vd->guest.fb, width, 0, y);
@@ -2726,13 +2727,11 @@ static int vnc_refresh_server_surface(VncDisplay *vd)
} else {
guest_ptr = guest_row0 + y * guest_stride;
}
+ guest_ptr += x * cmp_bytes;

- for (x = offset % VNC_DIRTY_BPL(&vd->guest);
- x + VNC_DIRTY_PIXELS_PER_BIT - 1 < width;
- x += VNC_DIRTY_PIXELS_PER_BIT, guest_ptr += cmp_bytes,
- server_ptr += cmp_bytes) {
- if (!test_and_clear_bit((x / VNC_DIRTY_PIXELS_PER_BIT),
- vd->guest.dirty[y])) {
+ for (;x < DIV_ROUND_UP(width, VNC_DIRTY_PIXELS_PER_BIT);
+ x++, guest_ptr += cmp_bytes, server_ptr += cmp_bytes) {
+ if (!test_and_clear_bit(x, vd->guest.dirty[y])) {
continue;
}
if (memcmp(server_ptr, guest_ptr, cmp_bytes) == 0) {
@@ -2740,10 +2739,11 @@ static int vnc_refresh_server_surface(VncDisplay *vd)
}
memcpy(server_ptr, guest_ptr, cmp_bytes);
if (!vd->non_adaptive) {
- vnc_rect_updated(vd, x, y, &tv);
+ vnc_rect_updated(vd, x * VNC_DIRTY_PIXELS_PER_BIT,
+ y, &tv);
}
QTAILQ_FOREACH(vs, &vd->clients, next) {
- set_bit((x / VNC_DIRTY_PIXELS_PER_BIT), vs->dirty[y]);
+ set_bit(x, vs->dirty[y]);
}
has_dirty++;
}

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: [Qemu-devel] [PATCHv3 3/6] ui/vnc: optimize dirty bitmap tracking
  2014-01-06 13:31     ` Peter Lieven
@ 2014-01-07  2:00       ` Wenchao Xia
  0 siblings, 0 replies; 14+ messages in thread
From: Wenchao Xia @ 2014-01-07  2:00 UTC (permalink / raw)
  To: Peter Lieven, qemu-devel; +Cc: sw, aliguori

于 2014/1/6 21:31, Peter Lieven 写道:
> On 06.01.2014 11:08, Wenchao Xia wrote:
>> 于 2014/1/6 2:02, Peter Lieven 写道:
>>> vnc_update_client currently scans the dirty bitmap of each client
>>> bitwise which is a very costly operation if only few bits are dirty.
>>> vnc_refresh_server_surface does almost the same.
>>> this patch optimizes both by utilizing the heavily optimized
>>> function find_next_bit to find the offset of the next dirty
>>> bit in the dirty bitmaps.
>>>
>>> The following artifical test (just the bitmap operation part) running
>>> vnc_update_client 65536 times on a 2560x2048 surface illustrates the
>>> performance difference:
>>>
>>> All bits clean - vnc_update_client_new: 0.07 secs
>>>                    vnc_update_client_old: 10.98 secs
>>>
>>> All bits dirty - vnc_update_client_new: 11.26 secs
>>>                    vnc_update_client_old: 20.19 secs
>>>
>>> Few bits dirty - vnc_update_client_new: 0.08 secs
>>>                    vnc_update_client_old: 10.98 secs
>>>
>>> The case for all bits dirty is still rather slow, this
>>> is due to the implementation of find_and_clear_dirty_height.
>>> This will be addresses in a separate patch.
>>>
>>> Signed-off-by: Peter Lieven <pl@kamp.de>
>>> ---
>>>    ui/vnc.c |  154 +++++++++++++++++++++++++++++++++-----------------------------
>>>    ui/vnc.h |    4 ++
>>>    2 files changed, 87 insertions(+), 71 deletions(-)
>>>
>>> diff --git a/ui/vnc.c b/ui/vnc.c
>>> index 1d2aa1a..6a0c03e 100644
>>> --- a/ui/vnc.c
>>> +++ b/ui/vnc.c
>>> @@ -572,6 +572,14 @@ void *vnc_server_fb_ptr(VncDisplay *vd, int x, int y)
>>>        ptr += x * VNC_SERVER_FB_BYTES;
>>>        return ptr;
>>>    }
>>> +/* this sets only the visible pixels of a dirty bitmap */
>>> +#define VNC_SET_VISIBLE_PIXELS_DIRTY(bitmap, w, h) {\
>>> +        int y;\
>>> +        memset(bitmap, 0x00, sizeof(bitmap));\
>>> +        for (y = 0; y < h; y++) {\
>>> +            bitmap_set(bitmap[y], 0, w / VNC_DIRTY_PIXELS_PER_BIT);\
>>    Will it be a problem when vnc's width % VNC_DIRTY_PIXELS_PER_BIT != 0?
>> Although it is a rare case, but I think it is better round it up, since
>> "v" and "VNC_DIRTY_PIXELS_PER_BIT" are variables. A macro computing it
>> would be nice:
> Good point, I will use DIV_ROUND_UP here.
>>
>> #define VNC_DIRTY_BITS_FROM_WIDTH(w) (w + VNC_DIRTY_PIXELS_PER_BIT - 1/
>> VNC_DIRTY_PIXELS_PER_BIT)
>> #define VNC_DIRTY_BITS (VNC_DIRTY_BITS_FROM_WIDTH(VNC_MAX_WIDTH)
>>
>> then here:
>>      bitmap_set(bitmap[y], 0, VNC_DIRTY_BITS_FROM_WIDTH(w));
>>
>> Or simply warn or coredump when v % VNC_DIRTY_PIXELS_PER_BIT != 0.
>>
>> Also, in vnc.h:
>> /* VNC_MAX_WIDTH must be a multiple of 16. */
>> #define VNC_MAX_WIDTH 2560
>> #define VNC_MAX_HEIGHT 2048
>>
>> Maybe it should be updated as:
>> /* VNC_MAX_WIDTH must be a multiple of VNC_DIRTY_PIXELS_PER_BIT. */
> correct. will fix as well.
>>
>>> +        } \
>>> +    }
>>>
>>>    static void vnc_dpy_switch(DisplayChangeListener *dcl,
>>>                               DisplaySurface *surface)
>>> @@ -597,7 +605,9 @@ static void vnc_dpy_switch(DisplayChangeListener *dcl,
>>>        qemu_pixman_image_unref(vd->guest.fb);
>>>        vd->guest.fb = pixman_image_ref(surface->image);
>>>        vd->guest.format = surface->format;
>>> -    memset(vd->guest.dirty, 0xFF, sizeof(vd->guest.dirty));
>>> +    VNC_SET_VISIBLE_PIXELS_DIRTY(vd->guest.dirty,
>>> +                                 surface_width(vd->ds),
>>> +                                 surface_height(vd->ds));
>>>
>>>        QTAILQ_FOREACH(vs, &vd->clients, next) {
>>>            vnc_colordepth(vs);
>>> @@ -605,7 +615,9 @@ static void vnc_dpy_switch(DisplayChangeListener *dcl,
>>>            if (vs->vd->cursor) {
>>>                vnc_cursor_define(vs);
>>>            }
>>> -        memset(vs->dirty, 0xFF, sizeof(vs->dirty));
>>> +        VNC_SET_VISIBLE_PIXELS_DIRTY(vs->dirty,
>>> +                                     surface_width(vd->ds),
>>> +                                     surface_height(vd->ds));
>>>        }
>>>    }
>>>
>>> @@ -889,10 +901,9 @@ static int vnc_update_client(VncState *vs, int has_dirty)
>>>            VncDisplay *vd = vs->vd;
>>>            VncJob *job;
>>>            int y;
>>> -        int width, height;
>>> +        int height;
>>>            int n = 0;
>>>
>>> -
>>>            if (vs->output.offset && !vs->audio_cap && !vs->force_update)
>>>                /* kernel send buffers are full -> drop frames to throttle */
>>>                return 0;
>>> @@ -908,39 +919,27 @@ static int vnc_update_client(VncState *vs, int has_dirty)
>>>             */
>>>            job = vnc_job_new(vs);
>>>
>>> -        width = MIN(pixman_image_get_width(vd->server), vs->client_width);
>>>            height = MIN(pixman_image_get_height(vd->server), vs->client_height);
>>>
>>> -        for (y = 0; y < height; y++) {
>>> -            int x;
>>> -            int last_x = -1;
>>> -            for (x = 0; x < width / VNC_DIRTY_PIXELS_PER_BIT; x++) {
>>> -                if (test_and_clear_bit(x, vs->dirty[y])) {
>>> -                    if (last_x == -1) {
>>> -                        last_x = x;
>>> -                    }
>>> -                } else {
>>> -                    if (last_x != -1) {
>>> -                        int h = find_and_clear_dirty_height(vs, y, last_x, x,
>>> -                                                            height);
>>> -
>>> -                        n += vnc_job_add_rect(job,
>>> -                                              last_x * VNC_DIRTY_PIXELS_PER_BIT,
>>> -                                              y,
>>> -                                              (x - last_x) *
>>> -                                              VNC_DIRTY_PIXELS_PER_BIT,
>>> -                                              h);
>>> -                    }
>>> -                    last_x = -1;
>>> -                }
>>> -            }
>>> -            if (last_x != -1) {
>>> -                int h = find_and_clear_dirty_height(vs, y, last_x, x, height);
>>> -                n += vnc_job_add_rect(job, last_x * VNC_DIRTY_PIXELS_PER_BIT,
>>> -                                      y,
>>> -                                      (x - last_x) * VNC_DIRTY_PIXELS_PER_BIT,
>>> -                                      h);
>>> +        y = 0;
>>> +        for (;;) {
>>> +            int x, h;
>>> +            unsigned long x2;
>>> +            unsigned long offset = find_next_bit((unsigned long *) &vs->dirty,
>>> +                                                 height * VNC_DIRTY_BPL(vs),
>>> +                                                 y * VNC_DIRTY_BPL(vs));
>>> +            if (offset == height * VNC_DIRTY_BPL(vs)) {
>>> +                /* no more dirty bits */
>>> +                break;
>>>                }
>>> +            y = offset / VNC_DIRTY_BPL(vs);
>>> +            x = offset % VNC_DIRTY_BPL(vs);
>>> +            x2 = find_next_zero_bit((unsigned long *) &vs->dirty[y],
>>> +                                    VNC_DIRTY_BPL(vs), x);
>>> +            bitmap_clear(vs->dirty[y], x, x2 - x);
>>> +            h = find_and_clear_dirty_height(vs, y, x, x2, height);
>>> +            n += vnc_job_add_rect(job, x * VNC_DIRTY_PIXELS_PER_BIT, y,
>>> +                                  (x2 - x) * VNC_DIRTY_PIXELS_PER_BIT, h);
>>>            }
>> Minor comments:
>>    VNC_DIRTY_BPL(vs) is accessing memory by pointer, should we use a
>> variable instead of VNC_DIRTY_BPL(vs) in every place, in case of
>> compiler didn't optimize it for us?
> I am pretty sure that sizeof is evaluated at compile time or do you have other
> evidence?

  You are right, it is sizeof((x)->dirty), I missed the bracket before.

>>
>>>            vnc_job_push(job);
>>> @@ -2678,8 +2677,8 @@ static int vnc_refresh_server_surface(VncDisplay *vd)
>>>        int width = pixman_image_get_width(vd->guest.fb);
>>>        int height = pixman_image_get_height(vd->guest.fb);
>>>        int y;
>>> -    uint8_t *guest_row;
>>> -    uint8_t *server_row;
>>> +    uint8_t *guest_row0 = NULL, *server_row0;
>>    Any reason that rename those variable?
> Its actually a pointer to row0 and not to any specific row. This is why
> I renamed it.

  I see, thanks for the explanation.

>>
>>> +    int guest_stride = 0, server_stride;
>>>        int cmp_bytes;
>>>        VncState *vs;
>>>        int has_dirty = 0;
>>> @@ -2704,44 +2703,57 @@ static int vnc_refresh_server_surface(VncDisplay *vd)
>>>        if (vd->guest.format != VNC_SERVER_FB_FORMAT) {
>>>            int width = pixman_image_get_width(vd->server);
>>>            tmpbuf = qemu_pixman_linebuf_create(VNC_SERVER_FB_FORMAT, width);
>>> -    }
>>> -    guest_row = (uint8_t *)pixman_image_get_data(vd->guest.fb);
>>> -    server_row = (uint8_t *)pixman_image_get_data(vd->server);
>>> -    for (y = 0; y < height; y++) {
>>> -        if (!bitmap_empty(vd->guest.dirty[y], VNC_DIRTY_BITS)) {
>>> -            int x;
>>> -            uint8_t *guest_ptr;
>>> -            uint8_t *server_ptr;
>>> -
>>> -            if (vd->guest.format != VNC_SERVER_FB_FORMAT) {
>>> -                qemu_pixman_linebuf_fill(tmpbuf, vd->guest.fb, width, 0, y);
>>> -                guest_ptr = (uint8_t *)pixman_image_get_data(tmpbuf);
>>> -            } else {
>>> -                guest_ptr = guest_row;
>>> -            }
>>> -            server_ptr = server_row;
>>> +    } else {
>>> +        guest_row0 = (uint8_t *)pixman_image_get_data(vd->guest.fb);
>>> +        guest_stride = pixman_image_get_stride(vd->guest.fb);
>>> +    }
>>> +    server_row0 = (uint8_t *)pixman_image_get_data(vd->server);
>>> +    server_stride = pixman_image_get_stride(vd->server);
>>> +
>>> +    y = 0;
>>> +    for (;;) {
>>> +        int x;
>>> +        uint8_t *guest_ptr, *server_ptr;
>>> +        unsigned long offset = find_next_bit((unsigned long *) &vd->guest.dirty,
>>> +                                             height * VNC_DIRTY_BPL(&vd->guest),
>>> +                                             y * VNC_DIRTY_BPL(&vd->guest));
>>> +        if (offset == height * VNC_DIRTY_BPL(&vd->guest)) {
>>> +            /* no more dirty bits */
>>> +            break;
>>> +        }
>>> +        y = offset / VNC_DIRTY_BPL(&vd->guest);
>>>
>>> -            for (x = 0; x + VNC_DIRTY_PIXELS_PER_BIT - 1 < width;
>>> -                 x += VNC_DIRTY_PIXELS_PER_BIT, guest_ptr += cmp_bytes,
>>> -                 server_ptr += cmp_bytes) {
>>> -                if (!test_and_clear_bit((x / VNC_DIRTY_PIXELS_PER_BIT),
>>> -                    vd->guest.dirty[y])) {
>>> -                    continue;
>>> -                }
>>> -                if (memcmp(server_ptr, guest_ptr, cmp_bytes) == 0) {
>>> -                    continue;
>>> -                }
>>> -                memcpy(server_ptr, guest_ptr, cmp_bytes);
>>> -                if (!vd->non_adaptive)
>>> -                    vnc_rect_updated(vd, x, y, &tv);
>>> -                QTAILQ_FOREACH(vs, &vd->clients, next) {
>>> -                    set_bit((x / VNC_DIRTY_PIXELS_PER_BIT), vs->dirty[y]);
>>> -                }
>>> -                has_dirty++;
>>> +        server_ptr = server_row0 + y * server_stride;
>>> +
>>> +        if (vd->guest.format != VNC_SERVER_FB_FORMAT) {
>>> +            qemu_pixman_linebuf_fill(tmpbuf, vd->guest.fb, width, 0, y);
>>> +            guest_ptr = (uint8_t *)pixman_image_get_data(tmpbuf);
>>> +        } else {
>>> +            guest_ptr = guest_row0 + y * guest_stride;
>>> +        }
>>> +
>>> +        for (x = offset % VNC_DIRTY_BPL(&vd->guest);
>>> +             x + VNC_DIRTY_PIXELS_PER_BIT - 1 < width;
>>> +             x += VNC_DIRTY_PIXELS_PER_BIT, guest_ptr += cmp_bytes,
>>> +             server_ptr += cmp_bytes) {
>>> +            if (!test_and_clear_bit((x / VNC_DIRTY_PIXELS_PER_BIT),
>>> +                vd->guest.dirty[y])) {
>>> +                continue;
>>> +            }
>>> +            if (memcmp(server_ptr, guest_ptr, cmp_bytes) == 0) {
>>> +                continue;
>>> +            }
>>> +            memcpy(server_ptr, guest_ptr, cmp_bytes);
>>> +            if (!vd->non_adaptive) {
>>> +                vnc_rect_updated(vd, x, y, &tv);
>>>                }
>>> +            QTAILQ_FOREACH(vs, &vd->clients, next) {
>>> +                set_bit((x / VNC_DIRTY_PIXELS_PER_BIT), vs->dirty[y]);
>>> +            }
>>> +            has_dirty++;
>>>            }
>>> -        guest_row  += pixman_image_get_stride(vd->guest.fb);
>>> -        server_row += pixman_image_get_stride(vd->server);
>>> +
>>> +        y++;
>>>        }
>>>        qemu_pixman_image_unref(tmpbuf);
>>>        return has_dirty;
>>> diff --git a/ui/vnc.h b/ui/vnc.h
>>> index 4a8f33c..07e1f59 100644
>>> --- a/ui/vnc.h
>>> +++ b/ui/vnc.h
>>> @@ -88,6 +88,10 @@ typedef void VncSendHextileTile(VncState *vs,
>>>    /* VNC_DIRTY_BITS is the number of bits in the dirty bitmap. */
>>>    #define VNC_DIRTY_BITS (VNC_MAX_WIDTH / VNC_DIRTY_PIXELS_PER_BIT)
>>>
>>> +/* VNC_DIRTY_BPL (BPL = bits per line) might be greater than
>>> + * VNC_DIRTY_BITS due to alignment */
>>> +#define VNC_DIRTY_BPL(x) (sizeof((x)->dirty) / VNC_MAX_HEIGHT * BITS_PER_BYTE)
>>> +
>>>    #define VNC_STAT_RECT  64
>>>    #define VNC_STAT_COLS (VNC_MAX_WIDTH / VNC_STAT_RECT)
>>>    #define VNC_STAT_ROWS (VNC_MAX_HEIGHT / VNC_STAT_RECT)
>>>
> 
> 
> 

^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2014-01-07  2:01 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-01-05 18:02 [Qemu-devel] [PATCHv3 0/6] ui/vnc: update optimizations Peter Lieven
2014-01-05 18:02 ` [Qemu-devel] [PATCHv3 1/6] ui/vnc: introduce VNC_DIRTY_PIXELS_PER_BIT macro Peter Lieven
2014-01-06  6:52   ` Wenchao Xia
2014-01-06  8:02     ` Peter Lieven
2014-01-05 18:02 ` [Qemu-devel] [PATCHv3 2/6] ui/vnc: derive cmp_bytes from VNC_DIRTY_PIXELS_PER_BIT Peter Lieven
2014-01-06  7:03   ` Wenchao Xia
2014-01-05 18:02 ` [Qemu-devel] [PATCHv3 3/6] ui/vnc: optimize dirty bitmap tracking Peter Lieven
2014-01-06 10:08   ` Wenchao Xia
2014-01-06 13:31     ` Peter Lieven
2014-01-07  2:00       ` Wenchao Xia
2014-01-06 17:37     ` Peter Lieven
2014-01-05 18:02 ` [Qemu-devel] [PATCHv3 4/6] ui/vnc: optimize clearing in find_and_clear_dirty_height() Peter Lieven
2014-01-05 18:02 ` [Qemu-devel] [PATCHv3 5/6] ui/vnc: optimize setting in vnc_dpy_update() Peter Lieven
2014-01-05 18:02 ` [Qemu-devel] [PATCHv3 6/6] ui/vnc: disable adaptive update calculations if not needed Peter Lieven

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.