Second try of clean ups and changes to hopefully improve 2D engine performance and fix a security bug in it. This one actually works with AmigaOS handling overlapping blits, fixes the overflow checks and adds Reviewed-by tags. Unless some problems are found this should be OK to merge. Regards, BALATON Zoltan BALATON Zoltan (7): sm501: Convert printf + abort to qemu_log_mask sm501: Shorten long variable names in sm501_2d_operation sm501: Use BIT(x) macro to shorten constant sm501: Clean up local variables in sm501_2d_operation sm501: Replace hand written implementation with pixman where possible sm501: Optimize small overlapping blits sm501: Remove obsolete changelog and todo comment hw/display/sm501.c | 311 ++++++++++++++++++++++++--------------------- 1 file changed, 164 insertions(+), 147 deletions(-) -- 2.21.3
Signed-off-by: BALATON Zoltan <balaton@eik.bme.hu> Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com> --- hw/display/sm501.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/hw/display/sm501.c b/hw/display/sm501.c index f42d05e1e4..97660090bb 100644 --- a/hw/display/sm501.c +++ b/hw/display/sm501.c @@ -701,7 +701,7 @@ static void sm501_2d_operation(SM501State *s) { /* obtain operation parameters */ int cmd = (s->twoD_control >> 16) & 0x1F; - int rtl = s->twoD_control & 0x8000000; + int rtl = s->twoD_control & BIT(27); int src_x = (s->twoD_source >> 16) & 0x01FFF; int src_y = s->twoD_source & 0xFFFF; int dst_x = (s->twoD_destination >> 16) & 0x01FFF; @@ -751,8 +751,7 @@ static void sm501_2d_operation(SM501State *s) } } - if ((s->twoD_source_base & 0x08000000) || - (s->twoD_destination_base & 0x08000000)) { + if (s->twoD_source_base & BIT(27) || s->twoD_destination_base & BIT(27)) { qemu_log_mask(LOG_UNIMP, "sm501: only local memory is supported.\n"); return; } -- 2.21.3
Make variables local to the block they are used in to make it clearer which operation they are needed for. Signed-off-by: BALATON Zoltan <balaton@eik.bme.hu> Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com> --- hw/display/sm501.c | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/hw/display/sm501.c b/hw/display/sm501.c index 97660090bb..5ed57703d8 100644 --- a/hw/display/sm501.c +++ b/hw/display/sm501.c @@ -699,28 +699,19 @@ static inline void hwc_invalidate(SM501State *s, int crt) static void sm501_2d_operation(SM501State *s) { - /* obtain operation parameters */ int cmd = (s->twoD_control >> 16) & 0x1F; int rtl = s->twoD_control & BIT(27); - int src_x = (s->twoD_source >> 16) & 0x01FFF; - int src_y = s->twoD_source & 0xFFFF; - int dst_x = (s->twoD_destination >> 16) & 0x01FFF; - int dst_y = s->twoD_destination & 0xFFFF; - int width = (s->twoD_dimension >> 16) & 0x1FFF; - int height = s->twoD_dimension & 0xFFFF; - uint32_t color = s->twoD_foreground; int format = (s->twoD_stretch >> 20) & 0x3; int rop_mode = (s->twoD_control >> 15) & 0x1; /* 1 for rop2, else rop3 */ /* 1 if rop2 source is the pattern, otherwise the source is the bitmap */ int rop2_source_is_pattern = (s->twoD_control >> 14) & 0x1; int rop = s->twoD_control & 0xFF; - uint32_t src_base = s->twoD_source_base & 0x03FFFFFF; + int dst_x = (s->twoD_destination >> 16) & 0x01FFF; + int dst_y = s->twoD_destination & 0xFFFF; + int width = (s->twoD_dimension >> 16) & 0x1FFF; + int height = s->twoD_dimension & 0xFFFF; uint32_t dst_base = s->twoD_destination_base & 0x03FFFFFF; - - /* get frame buffer info */ - uint8_t *src = s->local_mem + src_base; uint8_t *dst = s->local_mem + dst_base; - int src_pitch = s->twoD_pitch & 0x1FFF; int dst_pitch = (s->twoD_pitch >> 16) & 0x1FFF; int crt = (s->dc_crt_control & SM501_DC_CRT_CONTROL_SEL) ? 1 : 0; int fb_len = get_width(s, crt) * get_height(s, crt) * get_bpp(s, crt); @@ -758,6 +749,13 @@ static void sm501_2d_operation(SM501State *s) switch (cmd) { case 0x00: /* copy area */ + { + int src_x = (s->twoD_source >> 16) & 0x01FFF; + int src_y = s->twoD_source & 0xFFFF; + uint32_t src_base = s->twoD_source_base & 0x03FFFFFF; + uint8_t *src = s->local_mem + src_base; + int src_pitch = s->twoD_pitch & 0x1FFF; + #define COPY_AREA(_bpp, _pixel_type, rtl) { \ int y, x, index_d, index_s; \ for (y = 0; y < height; y++) { \ @@ -793,8 +791,11 @@ static void sm501_2d_operation(SM501State *s) break; } break; - + } case 0x01: /* fill rectangle */ + { + uint32_t color = s->twoD_foreground; + #define FILL_RECT(_bpp, _pixel_type) { \ int y, x; \ for (y = 0; y < height; y++) { \ @@ -819,7 +820,7 @@ static void sm501_2d_operation(SM501State *s) break; } break; - + } default: qemu_log_mask(LOG_UNIMP, "sm501: not implemented 2D operation: %d\n", cmd); -- 2.21.3
This increases readability and cleans up some confusing naming. Signed-off-by: BALATON Zoltan <balaton@eik.bme.hu> --- hw/display/sm501.c | 45 ++++++++++++++++++++++----------------------- 1 file changed, 22 insertions(+), 23 deletions(-) diff --git a/hw/display/sm501.c b/hw/display/sm501.c index bd3ccfe311..f42d05e1e4 100644 --- a/hw/display/sm501.c +++ b/hw/display/sm501.c @@ -700,17 +700,16 @@ static inline void hwc_invalidate(SM501State *s, int crt) static void sm501_2d_operation(SM501State *s) { /* obtain operation parameters */ - int operation = (s->twoD_control >> 16) & 0x1f; + int cmd = (s->twoD_control >> 16) & 0x1F; int rtl = s->twoD_control & 0x8000000; int src_x = (s->twoD_source >> 16) & 0x01FFF; int src_y = s->twoD_source & 0xFFFF; int dst_x = (s->twoD_destination >> 16) & 0x01FFF; int dst_y = s->twoD_destination & 0xFFFF; - int operation_width = (s->twoD_dimension >> 16) & 0x1FFF; - int operation_height = s->twoD_dimension & 0xFFFF; + int width = (s->twoD_dimension >> 16) & 0x1FFF; + int height = s->twoD_dimension & 0xFFFF; uint32_t color = s->twoD_foreground; - int format_flags = (s->twoD_stretch >> 20) & 0x3; - int addressing = (s->twoD_stretch >> 16) & 0xF; + int format = (s->twoD_stretch >> 20) & 0x3; int rop_mode = (s->twoD_control >> 15) & 0x1; /* 1 for rop2, else rop3 */ /* 1 if rop2 source is the pattern, otherwise the source is the bitmap */ int rop2_source_is_pattern = (s->twoD_control >> 14) & 0x1; @@ -721,12 +720,12 @@ static void sm501_2d_operation(SM501State *s) /* get frame buffer info */ uint8_t *src = s->local_mem + src_base; uint8_t *dst = s->local_mem + dst_base; - int src_width = s->twoD_pitch & 0x1FFF; - int dst_width = (s->twoD_pitch >> 16) & 0x1FFF; + int src_pitch = s->twoD_pitch & 0x1FFF; + int dst_pitch = (s->twoD_pitch >> 16) & 0x1FFF; int crt = (s->dc_crt_control & SM501_DC_CRT_CONTROL_SEL) ? 1 : 0; int fb_len = get_width(s, crt) * get_height(s, crt) * get_bpp(s, crt); - if (addressing != 0x0) { + if ((s->twoD_stretch >> 16) & 0xF) { qemu_log_mask(LOG_UNIMP, "sm501: only XY addressing is supported.\n"); return; } @@ -758,20 +757,20 @@ static void sm501_2d_operation(SM501State *s) return; } - switch (operation) { + switch (cmd) { case 0x00: /* copy area */ #define COPY_AREA(_bpp, _pixel_type, rtl) { \ int y, x, index_d, index_s; \ - for (y = 0; y < operation_height; y++) { \ - for (x = 0; x < operation_width; x++) { \ + for (y = 0; y < height; y++) { \ + for (x = 0; x < width; x++) { \ _pixel_type val; \ \ if (rtl) { \ - index_s = ((src_y - y) * src_width + src_x - x) * _bpp; \ - index_d = ((dst_y - y) * dst_width + dst_x - x) * _bpp; \ + index_s = ((src_y - y) * src_pitch + src_x - x) * _bpp; \ + index_d = ((dst_y - y) * dst_pitch + dst_x - x) * _bpp; \ } else { \ - index_s = ((src_y + y) * src_width + src_x + x) * _bpp; \ - index_d = ((dst_y + y) * dst_width + dst_x + x) * _bpp; \ + index_s = ((src_y + y) * src_pitch + src_x + x) * _bpp; \ + index_d = ((dst_y + y) * dst_pitch + dst_x + x) * _bpp; \ } \ if (rop_mode == 1 && rop == 5) { \ /* Invert dest */ \ @@ -783,7 +782,7 @@ static void sm501_2d_operation(SM501State *s) } \ } \ } - switch (format_flags) { + switch (format) { case 0: COPY_AREA(1, uint8_t, rtl); break; @@ -799,15 +798,15 @@ static void sm501_2d_operation(SM501State *s) case 0x01: /* fill rectangle */ #define FILL_RECT(_bpp, _pixel_type) { \ int y, x; \ - for (y = 0; y < operation_height; y++) { \ - for (x = 0; x < operation_width; x++) { \ - int index = ((dst_y + y) * dst_width + dst_x + x) * _bpp; \ + for (y = 0; y < height; y++) { \ + for (x = 0; x < width; x++) { \ + int index = ((dst_y + y) * dst_pitch + dst_x + x) * _bpp; \ *(_pixel_type *)&dst[index] = (_pixel_type)color; \ } \ } \ } - switch (format_flags) { + switch (format) { case 0: FILL_RECT(1, uint8_t); break; @@ -824,14 +823,14 @@ static void sm501_2d_operation(SM501State *s) default: qemu_log_mask(LOG_UNIMP, "sm501: not implemented 2D operation: %d\n", - operation); + cmd); return; } if (dst_base >= get_fb_addr(s, crt) && dst_base <= get_fb_addr(s, crt) + fb_len) { - int dst_len = MIN(fb_len, ((dst_y + operation_height - 1) * dst_width + - dst_x + operation_width) * (1 << format_flags)); + int dst_len = MIN(fb_len, ((dst_y + height - 1) * dst_pitch + + dst_x + width) * (1 << format)); if (dst_len) { memory_region_set_dirty(&s->local_mem_region, dst_base, dst_len); } -- 2.21.3
Also update copyright year for latest changes Signed-off-by: BALATON Zoltan <balaton@eik.bme.hu> Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com> --- hw/display/sm501.c | 19 +------------------ 1 file changed, 1 insertion(+), 18 deletions(-) diff --git a/hw/display/sm501.c b/hw/display/sm501.c index e7a9f77de7..edd8d24a76 100644 --- a/hw/display/sm501.c +++ b/hw/display/sm501.c @@ -2,7 +2,7 @@ * QEMU SM501 Device * * Copyright (c) 2008 Shin-ichiro KAWASAKI - * Copyright (c) 2016 BALATON Zoltan + * Copyright (c) 2016-2020 BALATON Zoltan * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -40,23 +40,6 @@ #include "ui/pixel_ops.h" #include "qemu/bswap.h" -/* - * Status: 2010/05/07 - * - Minimum implementation for Linux console : mmio regs and CRT layer. - * - 2D graphics acceleration partially supported : only fill rectangle. - * - * Status: 2016/12/04 - * - Misc fixes: endianness, hardware cursor - * - Panel support - * - * TODO: - * - Touch panel support - * - USB support - * - UART support - * - More 2D graphics engine support - * - Performance tuning - */ - /*#define DEBUG_SM501*/ /*#define DEBUG_BITBLT*/ -- 2.21.3
AmigaOS tends to do a lot of small blits (even 1 pixel). Avoid malloc overhead by keeping around a buffer for this and only alloc when blitting larger areas. Signed-off-by: BALATON Zoltan <balaton@eik.bme.hu> --- hw/display/sm501.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/hw/display/sm501.c b/hw/display/sm501.c index 8bf4d111f4..e7a9f77de7 100644 --- a/hw/display/sm501.c +++ b/hw/display/sm501.c @@ -750,6 +750,7 @@ static void sm501_2d_operation(SM501State *s) switch (cmd) { case 0: /* BitBlt */ { + static uint32_t tmp_buf[16384]; unsigned int src_x = (s->twoD_source >> 16) & 0x01FFF; unsigned int src_y = s->twoD_source & 0xFFFF; uint32_t src_base = s->twoD_source_base & 0x03FFFFFF; @@ -812,10 +813,14 @@ static void sm501_2d_operation(SM501State *s) de = db + width + height * (width + dst_pitch); if (rtl && ((db >= sb && db <= se) || (de >= sb && de <= se))) { /* regions may overlap: copy via temporary */ - int llb = width * (1 << format); + int free_buf = 0, llb = width * (1 << format); int tmp_stride = DIV_ROUND_UP(llb, sizeof(uint32_t)); - uint32_t *tmp = g_malloc(tmp_stride * sizeof(uint32_t) * - height); + uint32_t *tmp = tmp_buf; + + if (tmp_stride * sizeof(uint32_t) * height > sizeof(tmp_buf)) { + tmp = g_malloc(tmp_stride * sizeof(uint32_t) * height); + free_buf = 1; + } pixman_blt((uint32_t *)&s->local_mem[src_base], tmp, src_pitch * (1 << format) / sizeof(uint32_t), tmp_stride, 8 * (1 << format), 8 * (1 << format), @@ -825,7 +830,9 @@ static void sm501_2d_operation(SM501State *s) dst_pitch * (1 << format) / sizeof(uint32_t), 8 * (1 << format), 8 * (1 << format), 0, 0, dst_x, dst_y, width, height); - g_free(tmp); + if (free_buf) { + g_free(tmp); + } } else { pixman_blt((uint32_t *)&s->local_mem[src_base], (uint32_t *)&s->local_mem[dst_base], -- 2.21.3
Besides being faster this should also prevent malicious guests to abuse 2D engine to overwrite data or cause a crash. Signed-off-by: BALATON Zoltan <balaton@eik.bme.hu> --- hw/display/sm501.c | 207 ++++++++++++++++++++++++++------------------- 1 file changed, 119 insertions(+), 88 deletions(-) diff --git a/hw/display/sm501.c b/hw/display/sm501.c index 5ed57703d8..8bf4d111f4 100644 --- a/hw/display/sm501.c +++ b/hw/display/sm501.c @@ -706,13 +706,12 @@ static void sm501_2d_operation(SM501State *s) /* 1 if rop2 source is the pattern, otherwise the source is the bitmap */ int rop2_source_is_pattern = (s->twoD_control >> 14) & 0x1; int rop = s->twoD_control & 0xFF; - int dst_x = (s->twoD_destination >> 16) & 0x01FFF; - int dst_y = s->twoD_destination & 0xFFFF; - int width = (s->twoD_dimension >> 16) & 0x1FFF; - int height = s->twoD_dimension & 0xFFFF; + unsigned int dst_x = (s->twoD_destination >> 16) & 0x01FFF; + unsigned int dst_y = s->twoD_destination & 0xFFFF; + unsigned int width = (s->twoD_dimension >> 16) & 0x1FFF; + unsigned int height = s->twoD_dimension & 0xFFFF; uint32_t dst_base = s->twoD_destination_base & 0x03FFFFFF; - uint8_t *dst = s->local_mem + dst_base; - int dst_pitch = (s->twoD_pitch >> 16) & 0x1FFF; + unsigned int dst_pitch = (s->twoD_pitch >> 16) & 0x1FFF; int crt = (s->dc_crt_control & SM501_DC_CRT_CONTROL_SEL) ? 1 : 0; int fb_len = get_width(s, crt) * get_height(s, crt) * get_bpp(s, crt); @@ -721,104 +720,136 @@ static void sm501_2d_operation(SM501State *s) return; } - if (rop_mode == 0) { - if (rop != 0xcc) { - /* Anything other than plain copies are not supported */ - qemu_log_mask(LOG_UNIMP, "sm501: rop3 mode with rop %x is not " - "supported.\n", rop); - } - } else { - if (rop2_source_is_pattern && rop != 0x5) { - /* For pattern source, we support only inverse dest */ - qemu_log_mask(LOG_UNIMP, "sm501: rop2 source being the pattern and " - "rop %x is not supported.\n", rop); - } else { - if (rop != 0x5 && rop != 0xc) { - /* Anything other than plain copies or inverse dest is not - * supported */ - qemu_log_mask(LOG_UNIMP, "sm501: rop mode %x is not " - "supported.\n", rop); - } - } - } - if (s->twoD_source_base & BIT(27) || s->twoD_destination_base & BIT(27)) { qemu_log_mask(LOG_UNIMP, "sm501: only local memory is supported.\n"); return; } + if (!dst_pitch) { + qemu_log_mask(LOG_GUEST_ERROR, "sm501: Zero dest pitch.\n"); + return; + } + + if (!width || !height) { + qemu_log_mask(LOG_GUEST_ERROR, "sm501: Zero size 2D op.\n"); + return; + } + + if (rtl) { + dst_x -= width - 1; + dst_y -= height - 1; + } + + if (dst_base >= get_local_mem_size(s) || dst_base + + (dst_x + width + (dst_y + height) * (dst_pitch + width)) * + (1 << format) >= get_local_mem_size(s)) { + qemu_log_mask(LOG_GUEST_ERROR, "sm501: 2D op dest is outside vram.\n"); + return; + } + switch (cmd) { - case 0x00: /* copy area */ + case 0: /* BitBlt */ { - int src_x = (s->twoD_source >> 16) & 0x01FFF; - int src_y = s->twoD_source & 0xFFFF; + unsigned int src_x = (s->twoD_source >> 16) & 0x01FFF; + unsigned int src_y = s->twoD_source & 0xFFFF; uint32_t src_base = s->twoD_source_base & 0x03FFFFFF; - uint8_t *src = s->local_mem + src_base; - int src_pitch = s->twoD_pitch & 0x1FFF; - -#define COPY_AREA(_bpp, _pixel_type, rtl) { \ - int y, x, index_d, index_s; \ - for (y = 0; y < height; y++) { \ - for (x = 0; x < width; x++) { \ - _pixel_type val; \ - \ - if (rtl) { \ - index_s = ((src_y - y) * src_pitch + src_x - x) * _bpp; \ - index_d = ((dst_y - y) * dst_pitch + dst_x - x) * _bpp; \ - } else { \ - index_s = ((src_y + y) * src_pitch + src_x + x) * _bpp; \ - index_d = ((dst_y + y) * dst_pitch + dst_x + x) * _bpp; \ - } \ - if (rop_mode == 1 && rop == 5) { \ - /* Invert dest */ \ - val = ~*(_pixel_type *)&dst[index_d]; \ - } else { \ - val = *(_pixel_type *)&src[index_s]; \ - } \ - *(_pixel_type *)&dst[index_d] = val; \ - } \ - } \ - } - switch (format) { - case 0: - COPY_AREA(1, uint8_t, rtl); - break; - case 1: - COPY_AREA(2, uint16_t, rtl); - break; - case 2: - COPY_AREA(4, uint32_t, rtl); - break; + unsigned int src_pitch = s->twoD_pitch & 0x1FFF; + + if (!src_pitch) { + qemu_log_mask(LOG_GUEST_ERROR, "sm501: Zero src pitch.\n"); + return; + } + + if (rtl) { + src_x -= width - 1; + src_y -= height - 1; + } + + if (src_base >= get_local_mem_size(s) || src_base + + (src_x + width + (src_y + height) * (src_pitch + width)) * + (1 << format) >= get_local_mem_size(s)) { + qemu_log_mask(LOG_GUEST_ERROR, + "sm501: 2D op src is outside vram.\n"); + return; + } + + if ((rop_mode && rop == 0x5) || (!rop_mode && rop == 0x55)) { + /* Invert dest, is there a way to do this with pixman? */ + unsigned int x, y, i; + uint8_t *d = s->local_mem + dst_base; + + for (y = 0; y < height; y++) { + i = (dst_x + (dst_y + y) * dst_pitch) * (1 << format); + for (x = 0; x < width; x++, i += (1 << format)) { + switch (format) { + case 0: + d[i] = ~d[i]; + break; + case 1: + *(uint16_t *)&d[i] = ~*(uint16_t *)&d[i]; + break; + case 2: + *(uint32_t *)&d[i] = ~*(uint32_t *)&d[i]; + break; + } + } + } + } else { + /* Do copy src for unimplemented ops, better than unpainted area */ + if ((rop_mode && (rop != 0xc || rop2_source_is_pattern)) || + (!rop_mode && rop != 0xcc)) { + qemu_log_mask(LOG_UNIMP, + "sm501: rop%d op %x%s not implemented\n", + (rop_mode ? 2 : 3), rop, + (rop2_source_is_pattern ? + " with pattern source" : "")); + } + /* Check for overlaps, this could be made more exact */ + uint32_t sb, se, db, de; + sb = src_base + src_x + src_y * (width + src_pitch); + se = sb + width + height * (width + src_pitch); + db = dst_base + dst_x + dst_y * (width + dst_pitch); + de = db + width + height * (width + dst_pitch); + if (rtl && ((db >= sb && db <= se) || (de >= sb && de <= se))) { + /* regions may overlap: copy via temporary */ + int llb = width * (1 << format); + int tmp_stride = DIV_ROUND_UP(llb, sizeof(uint32_t)); + uint32_t *tmp = g_malloc(tmp_stride * sizeof(uint32_t) * + height); + pixman_blt((uint32_t *)&s->local_mem[src_base], tmp, + src_pitch * (1 << format) / sizeof(uint32_t), + tmp_stride, 8 * (1 << format), 8 * (1 << format), + src_x, src_y, 0, 0, width, height); + pixman_blt(tmp, (uint32_t *)&s->local_mem[dst_base], + tmp_stride, + dst_pitch * (1 << format) / sizeof(uint32_t), + 8 * (1 << format), 8 * (1 << format), + 0, 0, dst_x, dst_y, width, height); + g_free(tmp); + } else { + pixman_blt((uint32_t *)&s->local_mem[src_base], + (uint32_t *)&s->local_mem[dst_base], + src_pitch * (1 << format) / sizeof(uint32_t), + dst_pitch * (1 << format) / sizeof(uint32_t), + 8 * (1 << format), 8 * (1 << format), + src_x, src_y, dst_x, dst_y, width, height); + } } break; } - case 0x01: /* fill rectangle */ + case 1: /* Rectangle Fill */ { uint32_t color = s->twoD_foreground; -#define FILL_RECT(_bpp, _pixel_type) { \ - int y, x; \ - for (y = 0; y < height; y++) { \ - for (x = 0; x < width; x++) { \ - int index = ((dst_y + y) * dst_pitch + dst_x + x) * _bpp; \ - *(_pixel_type *)&dst[index] = (_pixel_type)color; \ - } \ - } \ - } - - switch (format) { - case 0: - FILL_RECT(1, uint8_t); - break; - case 1: - color = cpu_to_le16(color); - FILL_RECT(2, uint16_t); - break; - case 2: + if (format == 2) { color = cpu_to_le32(color); - FILL_RECT(4, uint32_t); - break; + } else if (format == 1) { + color = cpu_to_le16(color); } + + pixman_fill((uint32_t *)&s->local_mem[dst_base], + dst_pitch * (1 << format) / sizeof(uint32_t), + 8 * (1 << format), dst_x, dst_y, width, height, color); break; } default: -- 2.21.3
Some places already use qemu_log_mask() to log unimplemented features or errors but some others have printf() then abort(). Convert these to qemu_log_mask() and avoid aborting to prevent guests to easily cause denial of service. Signed-off-by: BALATON Zoltan <balaton@eik.bme.hu> Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com> --- hw/display/sm501.c | 57 ++++++++++++++++++++++------------------------ 1 file changed, 27 insertions(+), 30 deletions(-) diff --git a/hw/display/sm501.c b/hw/display/sm501.c index acc692531a..bd3ccfe311 100644 --- a/hw/display/sm501.c +++ b/hw/display/sm501.c @@ -727,8 +727,8 @@ static void sm501_2d_operation(SM501State *s) int fb_len = get_width(s, crt) * get_height(s, crt) * get_bpp(s, crt); if (addressing != 0x0) { - printf("%s: only XY addressing is supported.\n", __func__); - abort(); + qemu_log_mask(LOG_UNIMP, "sm501: only XY addressing is supported.\n"); + return; } if (rop_mode == 0) { @@ -754,8 +754,8 @@ static void sm501_2d_operation(SM501State *s) if ((s->twoD_source_base & 0x08000000) || (s->twoD_destination_base & 0x08000000)) { - printf("%s: only local memory is supported.\n", __func__); - abort(); + qemu_log_mask(LOG_UNIMP, "sm501: only local memory is supported.\n"); + return; } switch (operation) { @@ -823,9 +823,9 @@ static void sm501_2d_operation(SM501State *s) break; default: - printf("non-implemented SM501 2D operation. %d\n", operation); - abort(); - break; + qemu_log_mask(LOG_UNIMP, "sm501: not implemented 2D operation: %d\n", + operation); + return; } if (dst_base >= get_fb_addr(s, crt) && @@ -892,9 +892,8 @@ static uint64_t sm501_system_config_read(void *opaque, hwaddr addr, break; default: - printf("sm501 system config : not implemented register read." - " addr=%x\n", (int)addr); - abort(); + qemu_log_mask(LOG_UNIMP, "sm501: not implemented system config" + "register read. addr=%" HWADDR_PRIx "\n", addr); } return ret; @@ -948,15 +947,15 @@ static void sm501_system_config_write(void *opaque, hwaddr addr, break; case SM501_ENDIAN_CONTROL: if (value & 0x00000001) { - printf("sm501 system config : big endian mode not implemented.\n"); - abort(); + qemu_log_mask(LOG_UNIMP, "sm501: system config big endian mode not" + " implemented.\n"); } break; default: - printf("sm501 system config : not implemented register write." - " addr=%x, val=%x\n", (int)addr, (uint32_t)value); - abort(); + qemu_log_mask(LOG_UNIMP, "sm501: not implemented system config" + "register write. addr=%" HWADDR_PRIx + ", val=%" PRIx64 "\n", addr, value); } } @@ -1207,9 +1206,8 @@ static uint64_t sm501_disp_ctrl_read(void *opaque, hwaddr addr, break; default: - printf("sm501 disp ctrl : not implemented register read." - " addr=%x\n", (int)addr); - abort(); + qemu_log_mask(LOG_UNIMP, "sm501: not implemented disp ctrl register " + "read. addr=%" HWADDR_PRIx "\n", addr); } return ret; @@ -1345,9 +1343,9 @@ static void sm501_disp_ctrl_write(void *opaque, hwaddr addr, break; default: - printf("sm501 disp ctrl : not implemented register write." - " addr=%x, val=%x\n", (int)addr, (unsigned)value); - abort(); + qemu_log_mask(LOG_UNIMP, "sm501: not implemented disp ctrl register " + "write. addr=%" HWADDR_PRIx + ", val=%" PRIx64 "\n", addr, value); } } @@ -1433,9 +1431,8 @@ static uint64_t sm501_2d_engine_read(void *opaque, hwaddr addr, ret = 0; /* Should return interrupt status */ break; default: - printf("sm501 disp ctrl : not implemented register read." - " addr=%x\n", (int)addr); - abort(); + qemu_log_mask(LOG_UNIMP, "sm501: not implemented disp ctrl register " + "read. addr=%" HWADDR_PRIx "\n", addr); } return ret; @@ -1520,9 +1517,9 @@ static void sm501_2d_engine_write(void *opaque, hwaddr addr, /* ignored, writing 0 should clear interrupt status */ break; default: - printf("sm501 2d engine : not implemented register write." - " addr=%x, val=%x\n", (int)addr, (unsigned)value); - abort(); + qemu_log_mask(LOG_UNIMP, "sm501: not implemented 2d engine register " + "write. addr=%" HWADDR_PRIx + ", val=%" PRIx64 "\n", addr, value); } } @@ -1670,9 +1667,9 @@ static void sm501_update_display(void *opaque) draw_line = draw_line32_funcs[dst_depth_index]; break; default: - printf("sm501 update display : invalid control register value.\n"); - abort(); - break; + qemu_log_mask(LOG_GUEST_ERROR, "sm501: update display" + "invalid control register value.\n"); + return; } /* set up to draw hardware cursor */ -- 2.21.3
Patchew URL: https://patchew.org/QEMU/cover.1590089984.git.balaton@eik.bme.hu/ Hi, This series failed the docker-quick@centos7 build test. Please find the testing commands and their output below. If you have Docker installed, you can probably reproduce it locally. === TEST SCRIPT BEGIN === #!/bin/bash make docker-image-centos7 V=1 NETWORK=1 time make docker-test-quick@centos7 SHOW_ENV=1 J=14 NETWORK=1 === TEST SCRIPT END === TEST iotest-qcow2: 039 socket_accept failed: Resource temporarily unavailable ** ERROR:/tmp/qemu-test/src/tests/qtest/libqtest.c:301:qtest_init_without_qmp_handshake: assertion failed: (s->fd >= 0 && s->qmp_fd >= 0) /tmp/qemu-test/src/tests/qtest/libqtest.c:166: kill_qemu() tried to terminate QEMU process but encountered exit status 1 (expected 0) ERROR - Bail out! ERROR:/tmp/qemu-test/src/tests/qtest/libqtest.c:301:qtest_init_without_qmp_handshake: assertion failed: (s->fd >= 0 && s->qmp_fd >= 0) make: *** [check-qtest-x86_64] Error 1 make: *** Waiting for unfinished jobs.... TEST iotest-qcow2: 040 qemu-system-aarch64: -accel kvm: invalid accelerator kvm --- raise CalledProcessError(retcode, cmd) subprocess.CalledProcessError: Command '['sudo', '-n', 'docker', 'run', '--label', 'com.qemu.instance.uuid=41602df6cffc4816b3eba0e376c10099', '-u', '1001', '--security-opt', 'seccomp=unconfined', '--rm', '-e', 'TARGET_LIST=', '-e', 'EXTRA_CONFIGURE_OPTS=', '-e', 'V=', '-e', 'J=14', '-e', 'DEBUG=', '-e', 'SHOW_ENV=1', '-e', 'CCACHE_DIR=/var/tmp/ccache', '-v', '/home/patchew/.cache/qemu-docker-ccache:/var/tmp/ccache:z', '-v', '/var/tmp/patchew-tester-tmp-oarug6fo/src/docker-src.2020-05-21-21.34.23.8549:/var/tmp/qemu:z,ro', 'qemu:centos7', '/var/tmp/qemu/run', 'test-quick']' returned non-zero exit status 2. filter=--filter=label=com.qemu.instance.uuid=41602df6cffc4816b3eba0e376c10099 make[1]: *** [docker-run] Error 1 make[1]: Leaving directory `/var/tmp/patchew-tester-tmp-oarug6fo/src' make: *** [docker-run-test-quick@centos7] Error 2 real 16m0.876s user 0m10.783s The full log is available at http://patchew.org/logs/cover.1590089984.git.balaton@eik.bme.hu/testing.docker-quick@centos7/?type=message. --- Email generated automatically by Patchew [https://patchew.org/]. Please send your feedback to patchew-devel@redhat.com
On Thu, May 21, 2020 at 09:39:44PM +0200, BALATON Zoltan wrote: > Besides being faster this should also prevent malicious guests to > abuse 2D engine to overwrite data or cause a crash. > uint32_t src_base = s->twoD_source_base & 0x03FFFFFF; > - uint8_t *src = s->local_mem + src_base; > - val = *(_pixel_type *)&src[index_s]; \ Well, the advantage of *not* using pixman is that you can easily switch the code to use offsets instead of pointers, then apply the mask to the *final* offset to avoid oob data access: val = *(_pixel_type*)(&s->local_mem[(s->twoD_source_base + index_s) & 0x03FFFFFF]); > + if ((rop_mode && rop == 0x5) || (!rop_mode && rop == 0x55)) { > + /* Invert dest, is there a way to do this with pixman? */ PIXMAN_OP_XOR maybe? > + if (rtl && ((db >= sb && db <= se) || (de >= sb && de <= se))) { > + /* regions may overlap: copy via temporary */ The usual way for a hardware blitter is to have a direction bit, i.e. the guest os can ask to blit in top->bottom or bottom->top scanline ordering. The guest can use that to make sure the blit does not overwrite things. But note the guest can also intentionally use overlapping regions, i.e. memset(0) the first scanline, then use a blit with overlap to clear the whole screen. The later will surely break if you blit via temporary image ... > + pixman_blt((uint32_t *)&s->local_mem[src_base], > + (uint32_t *)&s->local_mem[dst_base], > + src_pitch * (1 << format) / sizeof(uint32_t), > + dst_pitch * (1 << format) / sizeof(uint32_t), > + 8 * (1 << format), 8 * (1 << format), > + src_x, src_y, dst_x, dst_y, width, height); See above, i'm not convinced pixman is the best way here. When using pixman I'd suggest: (1) src = pixman_image_create_bits_no_clear(...); (2) dst = pixman_image_create_bits_no_clear(...); (3) pixman_image_composite(PIXMAN_OP_SRC, src, NULL, dst, ...); (4) pixman_image_unref(src); (5) pixman_image_unref(dst); pixman_blt() is probably doing basically the same. The advantage of not using pixman_blt() is that (a) you can also use pixman ops other than PIXMAN_OP_SRC, and (b) you can have a helper function for (1)+(2) which very carefully applies sanity checks to make sure the pixman image created stays completely inside s->local_mem. (c) you have the option to completely rearrange the code flow, for example update the src pixman image whenever the guest touches src_base or src_pitch or format instead of having a create/op/unref cycle on every blitter op. > + pixman_fill((uint32_t *)&s->local_mem[dst_base], > + dst_pitch * (1 << format) / sizeof(uint32_t), > + 8 * (1 << format), dst_x, dst_y, width, height, color); (1) src = pixman_image_create_solid(...), otherwise same as above ;) take care, Gerd
On Tue, 26 May 2020, Gerd Hoffmann wrote: > On Thu, May 21, 2020 at 09:39:44PM +0200, BALATON Zoltan wrote: >> Besides being faster this should also prevent malicious guests to >> abuse 2D engine to overwrite data or cause a crash. > >> uint32_t src_base = s->twoD_source_base & 0x03FFFFFF; >> - uint8_t *src = s->local_mem + src_base; > >> - val = *(_pixel_type *)&src[index_s]; \ > > Well, the advantage of *not* using pixman is that you can easily switch > the code to use offsets instead of pointers, then apply the mask to the > *final* offset to avoid oob data access: The mask applied to src_base is not to prevent overflow but to implement register limits. Only these bits are valid if I remember correctly, so even if I use offsets I need to check for overflow. This patch basically does that by changing parameters to unsigned to prevent them being negative, checking values we multiply by to prevent them to be zero and then calculating first and last offset and check if they are within vram. (Unless of course I've made a mistake somewhere.) This should prevent overflow with one check and does not need to apply a mask at every step. The vram size can also be different so it's not a fixed mask anyway. If not using pixman then I'd need to reimplement optimised 2D ops that will likely never be as good as pixman and no point in doing it several times for every device model so I'd rather try to use pixman where possible unless a better library is available. > val = *(_pixel_type*)(&s->local_mem[(s->twoD_source_base + index_s) & 0x03FFFFFF]); > >> + if ((rop_mode && rop == 0x5) || (!rop_mode && rop == 0x55)) { >> + /* Invert dest, is there a way to do this with pixman? */ > > PIXMAN_OP_XOR maybe? Maybe, but looking at the pixman source I couldn't decide if UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_ia, d, src_ia); seen here: https://cgit.freedesktop.org/pixman/tree/pixman/pixman-combine32.c#n396 is really the same as s ^ d. >> + if (rtl && ((db >= sb && db <= se) || (de >= sb && de <= se))) { >> + /* regions may overlap: copy via temporary */ > > The usual way for a hardware blitter is to have a direction bit, i.e. > the guest os can ask to blit in top->bottom or bottom->top scanline > ordering. The guest can use that to make sure the blit does not Yes, this is the rtl above (right to left) and AmigaOS sets this most of the time so only relying on that to detect overlaps is not efficient. > overwrite things. But note the guest can also intentionally use > overlapping regions, i.e. memset(0) the first scanline, then use a blit > with overlap to clear the whole screen. The later will surely break if > you blit via temporary image ... Fortunately no guest code seems to do that so unless we find one needing it I don't worry much about such rare cases. It would be best if pixman supported this but while I've found patches were submitted they did not get merged so far so using a temporary seems to be the simplest way that works well enough for now. >> + pixman_blt((uint32_t *)&s->local_mem[src_base], >> + (uint32_t *)&s->local_mem[dst_base], >> + src_pitch * (1 << format) / sizeof(uint32_t), >> + dst_pitch * (1 << format) / sizeof(uint32_t), >> + 8 * (1 << format), 8 * (1 << format), >> + src_x, src_y, dst_x, dst_y, width, height); > > See above, i'm not convinced pixman is the best way here. > When using pixman I'd suggest: > > (1) src = pixman_image_create_bits_no_clear(...); > (2) dst = pixman_image_create_bits_no_clear(...); > (3) pixman_image_composite(PIXMAN_OP_SRC, src, NULL, dst, ...); > (4) pixman_image_unref(src); > (5) pixman_image_unref(dst); > > pixman_blt() is probably doing basically the same. Actually not the same, pixman_blt is faster operating directly on pointers while we need all the pixman_image overhead to use pixman_image_composite. Blitter is used for a lot of small ops (I've seen AmigaOS even call it with 1 pixel regions) so going through pixman_image every time does not seem to be efficient. To implement more complex ops this may be needed so I may try to figure that out later but I'd need some test cases to test if the results are correct. The current patches do the same as before (except for some rare overlapping cases as you noted above but we haven't observed any yet) and fix the overflows so this was the best I could do in the time I had. Maybe I try to improve this later but don't plan to rewrite it now. > The advantage of not > using pixman_blt() is that > > (a) you can also use pixman ops other than PIXMAN_OP_SRC, and > (b) you can have a helper function for (1)+(2) which very carefully > applies sanity checks to make sure the pixman image created stays > completely inside s->local_mem. > (c) you have the option to completely rearrange the code flow, for > example update the src pixman image whenever the guest touches > src_base or src_pitch or format instead of having a > create/op/unref cycle on every blitter op. From traces I think most guest would write bltter related regs on every op so probably not worth the hassle to try to update regions on register access and we could do it on every op, possibly optimising 1 pixel blits and small regions via some special cases but even then simple copy image is probably the most common op that might worth doing via pixman_blt as it's expected to be frequently used so the less overhead is the better. Therefore I'd only use image_composite for more complex ops but that's too much effort for a relatively unused device model. Maybe for ati-vga I'll try to make it better but first should fix microengine for that so drivers can talk to it. I'd rather spend my limited free time on that than further improving sm501 unless some bugs show up. >> + pixman_fill((uint32_t *)&s->local_mem[dst_base], >> + dst_pitch * (1 << format) / sizeof(uint32_t), >> + 8 * (1 << format), dst_x, dst_y, width, height, color); > > (1) src = pixman_image_create_solid(...), otherwise same as above ;) Same argument as composite_image and for fill we don't even have any advantage so while for composite implementing other ops is a reason to not use pixman_blt I see no reason to not go the fastest way for fill. Regards, BALATON Zoltan
Hi, > > Well, the advantage of *not* using pixman is that you can easily switch > > the code to use offsets instead of pointers, then apply the mask to the > > *final* offset to avoid oob data access: > > The mask applied to src_base is not to prevent overflow but to implement > register limits. Yea, that was just a quick sketch to outline the idea without checking all details. > This patch basically does > that by changing parameters to unsigned to prevent them being negative, > checking values we multiply by to prevent them to be zero and then > calculating first and last offset and check if they are within vram. Well. With cirrus this proved to be fragile. The checks missed corner cases and we've got a series of CVEs in the blitter code. Switching to offsets + masking every vram access (see commit ffaf85777828) stopped that. > (Unless > of course I've made a mistake somewhere.) Exactly ... > This should prevent overflow with > one check and does not need to apply a mask at every step. The vram size can > also be different so it's not a fixed mask anyway. > > If not using pixman then I'd need to reimplement optimised 2D ops that will > likely never be as good as pixman and no point in doing it several times for > every device model so I'd rather try to use pixman where possible unless a > better library is available. Yes, performance-wise pixman is clearly the better choice. At the end of the day it is a security vs performance trade off. > > > + if (rtl && ((db >= sb && db <= se) || (de >= sb && de <= se))) { > > > + /* regions may overlap: copy via temporary */ > > > > The usual way for a hardware blitter is to have a direction bit, i.e. > > the guest os can ask to blit in top->bottom or bottom->top scanline > > ordering. The guest can use that to make sure the blit does not > > Yes, this is the rtl above (right to left) and AmigaOS sets this most of the > time so only relying on that to detect overlaps is not efficient. Hmm, checking rtl like that doesn't look correct to me then. When using the blitter to move a window you have to set/clear rtl depending on whenever you move the window up or down on the screen, and src+dst regions can overlap in both cases ... > > overwrite things. But note the guest can also intentionally use > > overlapping regions, i.e. memset(0) the first scanline, then use a blit > > with overlap to clear the whole screen. The later will surely break if > > you blit via temporary image ... > > Fortunately no guest code seems to do that so unless we find one needing it > I don't worry much about such rare cases. Ok. > > > + pixman_blt((uint32_t *)&s->local_mem[src_base], > > > + (uint32_t *)&s->local_mem[dst_base], > > > + src_pitch * (1 << format) / sizeof(uint32_t), > > > + dst_pitch * (1 << format) / sizeof(uint32_t), > > > + 8 * (1 << format), 8 * (1 << format), > > > + src_x, src_y, dst_x, dst_y, width, height); > > > > See above, i'm not convinced pixman is the best way here. > > When using pixman I'd suggest: > > > > (1) src = pixman_image_create_bits_no_clear(...); > > (2) dst = pixman_image_create_bits_no_clear(...); > > (3) pixman_image_composite(PIXMAN_OP_SRC, src, NULL, dst, ...); > > (4) pixman_image_unref(src); > > (5) pixman_image_unref(dst); > > > > pixman_blt() is probably doing basically the same. > > Actually not the same, pixman_blt is faster operating directly on pointers > while we need all the pixman_image overhead to use pixman_image_composite. Ok (I didn't check the pixman code). Given the use case (run a computer museum ;) I think we can live with the flaws of the pixman approach. Security shouldn't be that much of an issue here. The behavior and blitter use pattern of the guests is known too and unlikely to change. take care, Gerd
Hello, On Wed, 27 May 2020, Gerd Hoffmann wrote: >>> Well, the advantage of *not* using pixman is that you can easily switch >>> the code to use offsets instead of pointers, then apply the mask to the >>> *final* offset to avoid oob data access: >> >> The mask applied to src_base is not to prevent overflow but to implement >> register limits. > > Yea, that was just a quick sketch to outline the idea without checking > all details. > >> This patch basically does >> that by changing parameters to unsigned to prevent them being negative, >> checking values we multiply by to prevent them to be zero and then >> calculating first and last offset and check if they are within vram. > > Well. With cirrus this proved to be fragile. The checks missed corner > cases and we've got a series of CVEs in the blitter code. Switching to > offsets + masking every vram access (see commit ffaf85777828) stopped > that. > >> (Unless >> of course I've made a mistake somewhere.) > > Exactly ... Hopefully we can make the checks correct eventually. I think for sm501 it should already be OK, I'll need to check ati-vga again because I think there may be still a mistake in that. (It does not help that every device encode these values differently in registers.) >> This should prevent overflow with >> one check and does not need to apply a mask at every step. The vram size can >> also be different so it's not a fixed mask anyway. >> >> If not using pixman then I'd need to reimplement optimised 2D ops that will >> likely never be as good as pixman and no point in doing it several times for >> every device model so I'd rather try to use pixman where possible unless a >> better library is available. > > Yes, performance-wise pixman is clearly the better choice. At the end > of the day it is a security vs performance trade off. I prefer performance here if security can be achieved without loss of performance with correct checks so rather fix the checks until they are correct than do additional things in a loop. >>>> + if (rtl && ((db >= sb && db <= se) || (de >= sb && de <= se))) { >>>> + /* regions may overlap: copy via temporary */ >>> >>> The usual way for a hardware blitter is to have a direction bit, i.e. >>> the guest os can ask to blit in top->bottom or bottom->top scanline >>> ordering. The guest can use that to make sure the blit does not >> >> Yes, this is the rtl above (right to left) and AmigaOS sets this most of the >> time so only relying on that to detect overlaps is not efficient. > > Hmm, checking rtl like that doesn't look correct to me then. When using > the blitter to move a window you have to set/clear rtl depending on > whenever you move the window up or down on the screen, and src+dst > regions can overlap in both cases ... Pixman does left to right, top to bottom so we don't need special handling for such blits, they will work even for overlapping areas. Doing non overlapping blits should also work with whatever direction (but AmigaOS seems to use rtl as default even for non overlapping, maybe hardware prefers that or was easier to code somehow). The only case where pixman does not work is reverse direction overlapping areas which is checked here, although becuase of different strides and offsets it's hard to check exactly so we only do a crude check to see if the memory areas are overlapping at all. This should catch all bad cases and maybe some good ones but checking for those is probably as expensive as doing the blit instead. As you said this may not work in some cases but until we come across such cases I'd go with this simpler solution because otherwise we likely need to implement our own optimised blit routine. Unlike ati-vga, sm501 does not have independent direction bits so rtl seems to mean both right to left and bottom to top. Ati-vga has different bit for bottom to top so those with left to right could still use pixman_blt calling it in a reverse counting loop for every line but I did not go for that optimisation yet. For sm501 there's no such option. Possible furher optimisation could be handling 1 pixel and small regions directly where the overhead of calling pixman may be bigger than the gain from its optimised routines but I would need to measure that for which I have no time. >>> overwrite things. But note the guest can also intentionally use >>> overlapping regions, i.e. memset(0) the first scanline, then use a blit >>> with overlap to clear the whole screen. The later will surely break if >>> you blit via temporary image ... >> >> Fortunately no guest code seems to do that so unless we find one needing it >> I don't worry much about such rare cases. > > Ok. > >>>> + pixman_blt((uint32_t *)&s->local_mem[src_base], >>>> + (uint32_t *)&s->local_mem[dst_base], >>>> + src_pitch * (1 << format) / sizeof(uint32_t), >>>> + dst_pitch * (1 << format) / sizeof(uint32_t), >>>> + 8 * (1 << format), 8 * (1 << format), >>>> + src_x, src_y, dst_x, dst_y, width, height); >>> >>> See above, i'm not convinced pixman is the best way here. >>> When using pixman I'd suggest: >>> >>> (1) src = pixman_image_create_bits_no_clear(...); >>> (2) dst = pixman_image_create_bits_no_clear(...); >>> (3) pixman_image_composite(PIXMAN_OP_SRC, src, NULL, dst, ...); >>> (4) pixman_image_unref(src); >>> (5) pixman_image_unref(dst); >>> >>> pixman_blt() is probably doing basically the same. >> >> Actually not the same, pixman_blt is faster operating directly on pointers >> while we need all the pixman_image overhead to use pixman_image_composite. > > Ok (I didn't check the pixman code). You should. It's seriously undocumented and using it seems to need digging the code or maybe I've missed all the wonderful documentation? > Given the use case (run a computer museum ;) I think we can live with > the flaws of the pixman approach. Security shouldn't be that much of an > issue here. The behavior and blitter use pattern of the guests is known > too and unlikely to change. To my knowledge the sm501 is only used on an SH4 machine, the sam460ex and to run MorphOS on mac99 but ati-vga is already better for the latter so these are not security critical in my opinion. Regards, BALATON Zoltan
On Thu, May 21, 2020 at 09:39:44PM +0200, BALATON Zoltan wrote:
> Second try of clean ups and changes to hopefully improve 2D engine
> performance and fix a security bug in it. This one actually works with
> AmigaOS handling overlapping blits, fixes the overflow checks and adds
> Reviewed-by tags. Unless some problems are found this should be OK to
> merge.
Added to vga queue.
thanks,
Gerd