diff --git a/hw/display/vga.c b/hw/display/vga.c
index 721309f1aa65e57922f121bbe874083a1dd2d0c9..451c3543bc881fd85544112077370c3ac60b2055 100644
--- a/hw/display/vga.c
+++ b/hw/display/vga.c
@@ -1374,10 +1374,14 @@ enum {
     VGA_DRAW_LINE4D2,
     VGA_DRAW_LINE8D2,
     VGA_DRAW_LINE8,
-    VGA_DRAW_LINE15,
-    VGA_DRAW_LINE16,
-    VGA_DRAW_LINE24,
-    VGA_DRAW_LINE32,
+    VGA_DRAW_LINE15_LE,
+    VGA_DRAW_LINE16_LE,
+    VGA_DRAW_LINE24_LE,
+    VGA_DRAW_LINE32_LE,
+    VGA_DRAW_LINE15_BE,
+    VGA_DRAW_LINE16_BE,
+    VGA_DRAW_LINE24_BE,
+    VGA_DRAW_LINE32_BE,
     VGA_DRAW_LINE_NB,
 };
 
@@ -1388,10 +1392,14 @@ static vga_draw_line_func * const vga_draw_line_table[VGA_DRAW_LINE_NB] = {
     vga_draw_line4d2,
     vga_draw_line8d2,
     vga_draw_line8,
-    vga_draw_line15,
-    vga_draw_line16,
-    vga_draw_line24,
-    vga_draw_line32,
+    vga_draw_line15_le,
+    vga_draw_line16_le,
+    vga_draw_line24_le,
+    vga_draw_line32_le,
+    vga_draw_line15_be,
+    vga_draw_line16_be,
+    vga_draw_line24_be,
+    vga_draw_line32_be,
 };
 
 static int vga_get_bpp(VGACommonState *s)
@@ -1464,10 +1472,15 @@ static void vga_draw_graphic(VGACommonState *s, int full_update)
     uint8_t *d;
     uint32_t v, addr1, addr;
     vga_draw_line_func *vga_draw_line;
-#if defined(HOST_WORDS_BIGENDIAN) == defined(TARGET_WORDS_BIGENDIAN)
-    static const bool byteswap = false;
+#if defined(TARGET_WORDS_BIGENDIAN)
+    static const bool big_endian_fb = true;
 #else
-    static const bool byteswap = true;
+    static const bool big_endian_fb = false;
+#endif
+#if defined(HOST_WORDS_BIGENDIAN)
+    static const bool byteswap = !big_endian_fb;
+#else
+    static const bool byteswap = big_endian_fb;
 #endif
 
     full_update |= update_basic_params(s);
@@ -1572,19 +1585,19 @@ static void vga_draw_graphic(VGACommonState *s, int full_update)
             bits = 8;
             break;
         case 15:
-            v = VGA_DRAW_LINE15;
+            v = big_endian_fb ? VGA_DRAW_LINE15_BE : VGA_DRAW_LINE15_LE;
             bits = 16;
             break;
         case 16:
-            v = VGA_DRAW_LINE16;
+            v = big_endian_fb ? VGA_DRAW_LINE16_BE : VGA_DRAW_LINE16_LE;
             bits = 16;
             break;
         case 24:
-            v = VGA_DRAW_LINE24;
+            v = big_endian_fb ? VGA_DRAW_LINE24_BE : VGA_DRAW_LINE24_LE;
             bits = 24;
             break;
         case 32:
-            v = VGA_DRAW_LINE32;
+            v = big_endian_fb ? VGA_DRAW_LINE32_BE : VGA_DRAW_LINE32_LE;
             bits = 32;
             break;
         }
diff --git a/hw/display/vga_template.h b/hw/display/vga_template.h
index 0660b520fb7e91328f1a19c3f86f4f554fe2d1ac..94f6de2046ff33445c07aa04060c83970fe778e6 100644
--- a/hw/display/vga_template.h
+++ b/hw/display/vga_template.h
@@ -278,21 +278,36 @@ static void vga_draw_line8(VGACommonState *s1, uint8_t *d,
     }
 }
 
-
-/* XXX: optimize */
-
 /*
  * 15 bit color
  */
-static void vga_draw_line15(VGACommonState *s1, uint8_t *d,
-                            const uint8_t *s, int width)
+static void vga_draw_line15_le(VGACommonState *s1, uint8_t *d,
+                               const uint8_t *s, int width)
 {
     int w;
     uint32_t v, r, g, b;
 
     w = width;
     do {
-        v = lduw_p((void *)s);
+        v = lduw_le_p((void *)s);
+        r = (v >> 7) & 0xf8;
+        g = (v >> 2) & 0xf8;
+        b = (v << 3) & 0xf8;
+        ((uint32_t *)d)[0] = rgb_to_pixel32(r, g, b);
+        s += 2;
+        d += 4;
+    } while (--w != 0);
+}
+
+static void vga_draw_line15_be(VGACommonState *s1, uint8_t *d,
+                               const uint8_t *s, int width)
+{
+    int w;
+    uint32_t v, r, g, b;
+
+    w = width;
+    do {
+        v = lduw_be_p((void *)s);
         r = (v >> 7) & 0xf8;
         g = (v >> 2) & 0xf8;
         b = (v << 3) & 0xf8;
@@ -305,15 +320,33 @@ static void vga_draw_line15(VGACommonState *s1, uint8_t *d,
 /*
  * 16 bit color
  */
-static void vga_draw_line16(VGACommonState *s1, uint8_t *d,
-                            const uint8_t *s, int width)
+static void vga_draw_line16_le(VGACommonState *s1, uint8_t *d,
+                               const uint8_t *s, int width)
 {
     int w;
     uint32_t v, r, g, b;
 
     w = width;
     do {
-        v = lduw_p((void *)s);
+        v = lduw_le_p((void *)s);
+        r = (v >> 8) & 0xf8;
+        g = (v >> 3) & 0xfc;
+        b = (v << 3) & 0xf8;
+        ((uint32_t *)d)[0] = rgb_to_pixel32(r, g, b);
+        s += 2;
+        d += 4;
+    } while (--w != 0);
+}
+
+static void vga_draw_line16_be(VGACommonState *s1, uint8_t *d,
+                               const uint8_t *s, int width)
+{
+    int w;
+    uint32_t v, r, g, b;
+
+    w = width;
+    do {
+        v = lduw_be_p((void *)s);
         r = (v >> 8) & 0xf8;
         g = (v >> 3) & 0xfc;
         b = (v << 3) & 0xf8;
@@ -326,23 +359,34 @@ static void vga_draw_line16(VGACommonState *s1, uint8_t *d,
 /*
  * 24 bit color
  */
-static void vga_draw_line24(VGACommonState *s1, uint8_t *d,
-                            const uint8_t *s, int width)
+static void vga_draw_line24_le(VGACommonState *s1, uint8_t *d,
+                               const uint8_t *s, int width)
 {
     int w;
     uint32_t r, g, b;
 
     w = width;
     do {
-#if defined(TARGET_WORDS_BIGENDIAN)
-        r = s[0];
-        g = s[1];
-        b = s[2];
-#else
         b = s[0];
         g = s[1];
         r = s[2];
-#endif
+        ((uint32_t *)d)[0] = rgb_to_pixel32(r, g, b);
+        s += 3;
+        d += 4;
+    } while (--w != 0);
+}
+
+static void vga_draw_line24_be(VGACommonState *s1, uint8_t *d,
+                               const uint8_t *s, int width)
+{
+    int w;
+    uint32_t r, g, b;
+
+    w = width;
+    do {
+        r = s[0];
+        g = s[1];
+        b = s[2];
         ((uint32_t *)d)[0] = rgb_to_pixel32(r, g, b);
         s += 3;
         d += 4;
@@ -352,10 +396,10 @@ static void vga_draw_line24(VGACommonState *s1, uint8_t *d,
 /*
  * 32 bit color
  */
-static void vga_draw_line32(VGACommonState *s1, uint8_t *d,
-                            const uint8_t *s, int width)
+static void vga_draw_line32_le(VGACommonState *s1, uint8_t *d,
+                               const uint8_t *s, int width)
 {
-#if defined(HOST_WORDS_BIGENDIAN) == defined(TARGET_WORDS_BIGENDIAN)
+#ifndef HOST_WORDS_BIGENDIAN
     memcpy(d, s, width * 4);
 #else
     int w;
@@ -363,15 +407,30 @@ static void vga_draw_line32(VGACommonState *s1, uint8_t *d,
 
     w = width;
     do {
-#if defined(TARGET_WORDS_BIGENDIAN)
-        r = s[1];
-        g = s[2];
-        b = s[3];
-#else
         b = s[0];
         g = s[1];
         r = s[2];
+        ((uint32_t *)d)[0] = rgb_to_pixel32(r, g, b);
+        s += 4;
+        d += 4;
+    } while (--w != 0);
 #endif
+}
+
+static void vga_draw_line32_be(VGACommonState *s1, uint8_t *d,
+                               const uint8_t *s, int width)
+{
+#ifdef HOST_WORDS_BIGENDIAN
+    memcpy(d, s, width * 4);
+#else
+    int w;
+    uint32_t r, g, b;
+
+    w = width;
+    do {
+        r = s[1];
+        g = s[2];
+        b = s[3];
         ((uint32_t *)d)[0] = rgb_to_pixel32(r, g, b);
         s += 4;
         d += 4;