[Qemu-devel] [RFC] qemu VGA endian swap low level drawing changes

* [Qemu-devel] [RFC] qemu VGA endian swap low level drawing changes
@ 2014-06-17  3:07 Benjamin Herrenschmidt
  2014-06-17  4:40 ` Paolo Bonzini
  2014-06-17 10:45 ` Gerd Hoffmann
  0 siblings, 2 replies; 66+ messages in thread
From: Benjamin Herrenschmidt @ 2014-06-17  3:07 UTC (permalink / raw)
  To: qemu-devel
  Cc: Alexey Kardashevskiy, Paolo Bonzini, Gerd Hoffmann, Alexander Graf

Hi folks !

WARNING: The patch below is NOT intended for merging for rather obvious
reasons in its current form :-)

So basically, we have a problem we need to solve in qemu powerpc related
to the emulated VGA, as I mentioned in an earlier mail yesterday.

The core issue stems from the fact that graphic stacks including X and a
number of other components along the way are terminally broken if the
framebuffer endianness doesn't match the guest endianness. We can
discuss the historical reasons for that fact if you wish and we can
discuss why it is almost unfixable separately.

This is an obvious problem for qemu on ppc64 since we now support having
either BE or LE guests.

At the moment, we have a broken patch in offb that makes the guest
sort-of limp along with the reversed framebuffer but that patch is
broken in several ways so I've reverted it upstream and X wouldn't work
anyway.

So we need to dynamically be able to change the byte order of the VGA
framebuffer in qemu.

This email is NOT about discussing the mechanisms on how we trigger that
byteswap, ie whether we emulate a new register for the guest to
configure its endian or we use the existing PAPR H_SET_MODE, or a
combination of these etc... I will discuss that separately.

This is purely about discussing the changes to vga.c and vga-templates.h
to be able to handle the swapping in a runtime-decided way rather than
compile time.

I've come up with the proof of concept below which changes the various
line drawing functions in vga-template to select the right ld/st
function. However that adds a per-line test (overhead I can hear some
people shouting ! :-).

A better approach might be to just add new set of functions to
vga_draw_line_table[]. Ie change:

enum {
    VGA_DRAW_LINE2,
    VGA_DRAW_LINE2D2,
    VGA_DRAW_LINE4,
    VGA_DRAW_LINE4D2,
    VGA_DRAW_LINE8D2,
    VGA_DRAW_LINE8,
    VGA_DRAW_LINE15,
    VGA_DRAW_LINE16,
    VGA_DRAW_LINE24,
    VGA_DRAW_LINE32,
    VGA_DRAW_LINE_NB,
};

to something like

enum {
    VGA_DRAW_LINE2,
    VGA_DRAW_LINE2D2,
    VGA_DRAW_LINE4,
    VGA_DRAW_LINE4D2,
    VGA_DRAW_LINE8D2,
    VGA_DRAW_LINE8,
    VGA_DRAW_LINE15_LE,
    VGA_DRAW_LINE16_LE,
    VGA_DRAW_LINE24_LE,
    VGA_DRAW_LINE32_LE,
    VGA_DRAW_LINE15_BE,
    VGA_DRAW_LINE16_BE,
    VGA_DRAW_LINE24_BE,
    VGA_DRAW_LINE32_BE,
    VGA_DRAW_LINE_NB,
};

And add some more macros to generate them.

That means less runtime overhead (though I don't think the overhead is
huge) at the expense of bigger code size.

What do you prefer ?

Proof-of-concept code here:

diff --git a/hw/display/vga.c b/hw/display/vga.c
index 8cd6afe..c6e2e96 100644
--- a/hw/display/vga.c
+++ b/hw/display/vga.c
@@ -983,27 +983,72 @@ typedef void vga_draw_glyph9_func(uint8_t *d, int linesize,
 typedef void vga_draw_line_func(VGACommonState *s1, uint8_t *d,
                                 const uint8_t *s, int width);
 
+#if 0
+
+static inline bool vga_is_big_endian(VGACommonState *s)
+{
+#ifdef TARGET_WORDS_BIGENDIAN
+    return true;
+#else
+    return true;
+#endif
+}
+
+#else
+
+#ifdef TARGET_WORDS_BIGENDIAN
+static bool vga_is_be = true;
+#else
+static bool vga_is_be = false;
+#endif
+
+void vga_set_big_endian(bool be)
+{
+    vga_is_be = be;
+}
+
+static inline bool vga_is_big_endian(VGACommonState *s)
+{
+    return vga_is_be;
+}
+
+#endif
+
+static inline bool vga_need_swap(VGACommonState *s, bool target_be)
+{
+#ifdef HOST_WORDS_BIGENDIAN
+    return !target_be;
+#else
+    return target_be;
+#endif
+}
+
+
+#define BGR_FORMAT 0
 #define DEPTH 8
 #include "vga_template.h"
 
+#define BGR_FORMAT 0
 #define DEPTH 15
 #include "vga_template.h"
 
-#define BGR_FORMAT
+#define BGR_FORMAT 1
 #define DEPTH 15
 #include "vga_template.h"
 
+#define BGR_FORMAT 0
 #define DEPTH 16
 #include "vga_template.h"
 
-#define BGR_FORMAT
+#define BGR_FORMAT 1
 #define DEPTH 16
 #include "vga_template.h"
 
+#define BGR_FORMAT 0
 #define DEPTH 32
 #include "vga_template.h"
 
-#define BGR_FORMAT
+#define BGR_FORMAT 1
 #define DEPTH 32
 #include "vga_template.h"
 
@@ -1645,11 +1690,9 @@ static void vga_draw_graphic(VGACommonState *s, int full_update)
     uint8_t *d;
     uint32_t v, addr1, addr;
     vga_draw_line_func *vga_draw_line;
-#if defined(HOST_WORDS_BIGENDIAN) == defined(TARGET_WORDS_BIGENDIAN)
-    static const bool byteswap = false;
-#else
-    static const bool byteswap = true;
-#endif
+    bool byteswap;
+
+    byteswap = vga_need_swap(s, vga_is_big_endian(s));
 
     full_update |= update_basic_params(s);
 
@@ -1691,7 +1734,8 @@ static void vga_draw_graphic(VGACommonState *s, int full_update)
     if (s->line_offset != s->last_line_offset ||
         disp_width != s->last_width ||
         height != s->last_height ||
-        s->last_depth != depth) {
+        s->last_depth != depth ||
+        s->last_bswap != byteswap) {
         if (depth == 32 || (depth == 16 && !byteswap)) {
             surface = qemu_create_displaysurface_from(disp_width,
                     height, depth, s->line_offset,
@@ -1707,6 +1751,7 @@ static void vga_draw_graphic(VGACommonState *s, int full_update)
         s->last_height = height;
         s->last_line_offset = s->line_offset;
         s->last_depth = depth;
+	s->last_bswap = byteswap;
         full_update = 1;
     } else if (is_buffer_shared(surface) &&
                (full_update || surface_data(surface) != s->vram_ptr
diff --git a/hw/display/vga_int.h b/hw/display/vga_int.h
index 5320abd..129360f 100644
--- a/hw/display/vga_int.h
+++ b/hw/display/vga_int.h
@@ -148,6 +148,7 @@ typedef struct VGACommonState {
     uint32_t last_width, last_height; /* in chars or pixels */
     uint32_t last_scr_width, last_scr_height; /* in pixels */
     uint32_t last_depth; /* in bits */
+    bool last_bswap;
     uint8_t cursor_start, cursor_end;
     bool cursor_visible_phase;
     int64_t cursor_blink_time;
@@ -205,6 +206,8 @@ uint32_t vbe_ioport_read_data(void *opaque, uint32_t addr);
 void vbe_ioport_write_index(void *opaque, uint32_t addr, uint32_t val);
 void vbe_ioport_write_data(void *opaque, uint32_t addr, uint32_t val);
 
+void vga_set_big_endian(bool be);
+
 extern const uint8_t sr_mask[8];
 extern const uint8_t gr_mask[16];
 
diff --git a/hw/display/vga_template.h b/hw/display/vga_template.h
index 90ec9c2..320a2a0 100644
--- a/hw/display/vga_template.h
+++ b/hw/display/vga_template.h
@@ -35,13 +35,13 @@
 #error unsupport depth
 #endif
 
-#ifdef BGR_FORMAT
+#if BGR_FORMAT
 #define PIXEL_NAME glue(DEPTH, bgr)
 #else
 #define PIXEL_NAME DEPTH
 #endif /* BGR_FORMAT */
 
-#if DEPTH != 15 && !defined(BGR_FORMAT)
+#if DEPTH != 15 && !BGR_FORMAT
 
 static inline void glue(vga_draw_glyph_line_, DEPTH)(uint8_t *d,
                                                      uint32_t font_data,
@@ -353,23 +353,37 @@ static void glue(vga_draw_line8_, DEPTH)(VGACommonState *s1, uint8_t *d,
 static void glue(vga_draw_line15_, PIXEL_NAME)(VGACommonState *s1, uint8_t *d,
                                           const uint8_t *s, int width)
 {
-#if DEPTH == 15 && defined(HOST_WORDS_BIGENDIAN) == defined(TARGET_WORDS_BIGENDIAN)
-    memcpy(d, s, width * 2);
-#else
-    int w;
-    uint32_t v, r, g, b;
-
-    w = width;
-    do {
-        v = lduw_p((void *)s);
-        r = (v >> 7) & 0xf8;
-        g = (v >> 2) & 0xf8;
-        b = (v << 3) & 0xf8;
-        ((PIXEL_TYPE *)d)[0] = glue(rgb_to_pixel, PIXEL_NAME)(r, g, b);
-        s += 2;
-        d += BPP;
-    } while (--w != 0);
-#endif
+    bool is_be = vga_is_big_endian(s1);
+
+    if (DEPTH == 15 && !vga_need_swap(s1, is_be)) {
+        memcpy(d, s, width * 2);
+    } else {
+        int w;
+        uint32_t v, r, g, b;
+
+        w = width;
+        if (is_be) {
+            do {
+                v = lduw_be_p((void *)s);
+                r = (v >> 7) & 0xf8;
+                g = (v >> 2) & 0xf8;
+                b = (v << 3) & 0xf8;
+                ((PIXEL_TYPE *)d)[0] = glue(rgb_to_pixel, PIXEL_NAME)(r, g, b);
+                s += 2;
+                d += BPP;
+            } while (--w != 0);
+        } else {
+            do {
+                v = lduw_le_p((void *)s);
+                r = (v >> 7) & 0xf8;
+                g = (v >> 2) & 0xf8;
+                b = (v << 3) & 0xf8;
+                ((PIXEL_TYPE *)d)[0] = glue(rgb_to_pixel, PIXEL_NAME)(r, g, b);
+                s += 2;
+                d += BPP;
+            } while (--w != 0);
+        }
+    }
 }
 
 /*
@@ -378,23 +392,37 @@ static void glue(vga_draw_line15_, PIXEL_NAME)(VGACommonState *s1, uint8_t *d,
 static void glue(vga_draw_line16_, PIXEL_NAME)(VGACommonState *s1, uint8_t *d,
                                           const uint8_t *s, int width)
 {
-#if DEPTH == 16 && defined(HOST_WORDS_BIGENDIAN) == defined(TARGET_WORDS_BIGENDIAN)
-    memcpy(d, s, width * 2);
-#else
-    int w;
-    uint32_t v, r, g, b;
-
-    w = width;
-    do {
-        v = lduw_p((void *)s);
-        r = (v >> 8) & 0xf8;
-        g = (v >> 3) & 0xfc;
-        b = (v << 3) & 0xf8;
-        ((PIXEL_TYPE *)d)[0] = glue(rgb_to_pixel, PIXEL_NAME)(r, g, b);
-        s += 2;
-        d += BPP;
-    } while (--w != 0);
-#endif
+    bool is_be = vga_is_big_endian(s1);
+
+    if (DEPTH == 16 && !vga_need_swap(s1, is_be)) {
+        memcpy(d, s, width * 2);
+    } else {
+        int w;
+        uint32_t v, r, g, b;
+
+        w = width;
+        if (is_be) {
+            do {
+                v = lduw_be_p((void *)s);
+                r = (v >> 8) & 0xf8;
+                g = (v >> 3) & 0xfc;
+                b = (v << 3) & 0xf8;
+                ((PIXEL_TYPE *)d)[0] = glue(rgb_to_pixel, PIXEL_NAME)(r, g, b);
+                s += 2;
+                d += BPP;
+            } while (--w != 0);
+	} else {
+            do {
+                v = lduw_le_p((void *)s);
+                r = (v >> 8) & 0xf8;
+                g = (v >> 3) & 0xfc;
+                b = (v << 3) & 0xf8;
+                ((PIXEL_TYPE *)d)[0] = glue(rgb_to_pixel, PIXEL_NAME)(r, g, b);
+                s += 2;
+                d += BPP;
+            } while (--w != 0);
+	}
+    }
 }
 
 /*
@@ -407,20 +435,25 @@ static void glue(vga_draw_line24_, PIXEL_NAME)(VGACommonState *s1, uint8_t *d,
     uint32_t r, g, b;
 
     w = width;
-    do {
-#if defined(TARGET_WORDS_BIGENDIAN)
-        r = s[0];
-        g = s[1];
-        b = s[2];
-#else
-        b = s[0];
-        g = s[1];
-        r = s[2];
-#endif
-        ((PIXEL_TYPE *)d)[0] = glue(rgb_to_pixel, PIXEL_NAME)(r, g, b);
-        s += 3;
-        d += BPP;
-    } while (--w != 0);
+    if (vga_is_big_endian(s1)) {
+        do {
+            r = s[0];
+            g = s[1];
+            b = s[2];
+            ((PIXEL_TYPE *)d)[0] = glue(rgb_to_pixel, PIXEL_NAME)(r, g, b);
+            s += 3;
+            d += BPP;
+        } while (--w != 0);
+    } else {
+        do {
+            b = s[0];
+            g = s[1];
+            r = s[2];
+            ((PIXEL_TYPE *)d)[0] = glue(rgb_to_pixel, PIXEL_NAME)(r, g, b);
+            s += 3;
+            d += BPP;
+        } while (--w != 0);
+    }
 }
 
 /*
@@ -429,28 +462,35 @@ static void glue(vga_draw_line24_, PIXEL_NAME)(VGACommonState *s1, uint8_t *d,
 static void glue(vga_draw_line32_, PIXEL_NAME)(VGACommonState *s1, uint8_t *d,
                                           const uint8_t *s, int width)
 {
-#if DEPTH == 32 && defined(HOST_WORDS_BIGENDIAN) == defined(TARGET_WORDS_BIGENDIAN) && !defined(BGR_FORMAT)
-    memcpy(d, s, width * 4);
-#else
-    int w;
-    uint32_t r, g, b;
-
-    w = width;
-    do {
-#if defined(TARGET_WORDS_BIGENDIAN)
-        r = s[1];
-        g = s[2];
-        b = s[3];
-#else
-        b = s[0];
-        g = s[1];
-        r = s[2];
-#endif
-        ((PIXEL_TYPE *)d)[0] = glue(rgb_to_pixel, PIXEL_NAME)(r, g, b);
-        s += 4;
-        d += BPP;
-    } while (--w != 0);
-#endif
+    bool is_be = vga_is_big_endian(s1);
+
+    if (DEPTH == 32 && !BGR_FORMAT && !vga_need_swap(s1, is_be)) {
+        memcpy(d, s, width * 4);
+    } else {
+        int w;
+        uint32_t r, g, b;
+
+        w = width;
+        if (is_be) {
+            do {
+                r = s[1];
+                g = s[2];
+                b = s[3];
+                ((PIXEL_TYPE *)d)[0] = glue(rgb_to_pixel, PIXEL_NAME)(r, g, b);
+                s += 4;
+                d += BPP;
+            } while (--w != 0);
+        } else {
+            do {
+                b = s[0];
+                g = s[1];
+                r = s[2];
+                ((PIXEL_TYPE *)d)[0] = glue(rgb_to_pixel, PIXEL_NAME)(r, g, b);
+                s += 4;
+                d += BPP;
+            } while (--w != 0);
+        }
+    }
 }
 
 #undef PUT_PIXEL2
diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c
index 0bae053..c8aaf3b 100644
--- a/hw/ppc/spapr_hcall.c
+++ b/hw/ppc/spapr_hcall.c
@@ -709,6 +709,8 @@ static target_ulong h_logical_dcbf(PowerPCCPU *cpu, sPAPREnvironment *spapr,
     return H_SUCCESS;
 }
 
+extern void vga_set_big_endian(bool trueis_be);
+
 static target_ulong h_set_mode(PowerPCCPU *cpu, sPAPREnvironment *spapr,
                                target_ulong opcode, target_ulong *args)
 {
@@ -733,6 +735,7 @@ static target_ulong h_set_mode(PowerPCCPU *cpu, sPAPREnvironment *spapr,
             CPU_FOREACH(cs) {
                 set_spr(cs, SPR_LPCR, 0, LPCR_ILE);
             }
+            vga_set_big_endian(true);
             ret = H_SUCCESS;
             break;
 
@@ -740,6 +743,7 @@ static target_ulong h_set_mode(PowerPCCPU *cpu, sPAPREnvironment *spapr,
             CPU_FOREACH(cs) {
                 set_spr(cs, SPR_LPCR, LPCR_ILE, LPCR_ILE);
             }
+            vga_set_big_endian(false);
             ret = H_SUCCESS;
             break;
 

^ permalink raw reply related	[flat|nested] 66+ messages in thread