diff --git a/cpu-all.h b/cpu-all.h
index fadf89cfc19eea64152a71857678984d40beceef..7102014c7b8d4a963f28e8eafe5d0a502e0f5a1e 100644
--- a/cpu-all.h
+++ b/cpu-all.h
@@ -973,6 +973,15 @@ void cpu_physical_sync_dirty_bitmap(target_phys_addr_t start_addr, target_phys_a
 void dump_exec_info(FILE *f,
                     int (*cpu_fprintf)(FILE *f, const char *fmt, ...));
 
+/* Coalesced MMIO regions are areas where write operations can be reordered.
+ * This usually implies that write operations are side-effect free.  This allows
+ * batching which can make a major impact on performance when using
+ * virtualization.
+ */
+void qemu_register_coalesced_mmio(target_phys_addr_t addr, ram_addr_t size);
+
+void qemu_unregister_coalesced_mmio(target_phys_addr_t addr, ram_addr_t size);
+
 /*******************************************/
 /* host CPU ticks (if available) */
 
diff --git a/exec.c b/exec.c
index 105812f7bacd9442bc8227734a249e352ceb0393..44f6a425565f05780b9b78c16e8d7cc5c503416c 100644
--- a/exec.c
+++ b/exec.c
@@ -2344,6 +2344,18 @@ ram_addr_t cpu_get_physical_page_desc(target_phys_addr_t addr)
     return p->phys_offset;
 }
 
+void qemu_register_coalesced_mmio(target_phys_addr_t addr, ram_addr_t size)
+{
+    if (kvm_enabled())
+        kvm_coalesce_mmio_region(addr, size);
+}
+
+void qemu_unregister_coalesced_mmio(target_phys_addr_t addr, ram_addr_t size)
+{
+    if (kvm_enabled())
+        kvm_uncoalesce_mmio_region(addr, size);
+}
+
 /* XXX: better than nothing */
 ram_addr_t qemu_ram_alloc(ram_addr_t size)
 {
diff --git a/hw/cirrus_vga.c b/hw/cirrus_vga.c
index 56907193d57eab91030483e5cfdff2229b8f57a0..83c5f40d0f25bab980c46697bbacd70995130911 100644
--- a/hw/cirrus_vga.c
+++ b/hw/cirrus_vga.c
@@ -3220,6 +3220,7 @@ static void cirrus_init_common(CirrusVGAState * s, int device_id, int is_pci)
                                            cirrus_vga_mem_write, s);
     cpu_register_physical_memory(isa_mem_base + 0x000a0000, 0x20000,
                                  s->vga_io_memory);
+    qemu_register_coalesced_mmio(isa_mem_base + 0x000a0000, 0x20000);
 
     s->sr[0x06] = 0x0f;
     if (device_id == CIRRUS_ID_CLGD5446) {
diff --git a/hw/e1000.c b/hw/e1000.c
index f07936fdc1d04b8687b101e1bd2d71d007a5b845..67a062a318431f2dc1b33af0e0160d621d6d00af 100644
--- a/hw/e1000.c
+++ b/hw/e1000.c
@@ -1001,10 +1001,22 @@ e1000_mmio_map(PCIDevice *pci_dev, int region_num,
                 uint32_t addr, uint32_t size, int type)
 {
     E1000State *d = (E1000State *)pci_dev;
+    int i;
+    const uint32_t excluded_regs[] = {
+        E1000_MDIC, E1000_ICR, E1000_ICS, E1000_IMS,
+        E1000_IMC, E1000_TCTL, E1000_TDT, PNPMMIO_SIZE
+    };
+
 
     DBGOUT(MMIO, "e1000_mmio_map addr=0x%08x 0x%08x\n", addr, size);
 
     cpu_register_physical_memory(addr, PNPMMIO_SIZE, d->mmio_index);
+    qemu_register_coalesced_mmio(addr, excluded_regs[0]);
+
+    for (i = 0; excluded_regs[i] != PNPMMIO_SIZE; i++)
+        qemu_register_coalesced_mmio(addr + excluded_regs[i] + 4,
+                                     excluded_regs[i + 1] -
+                                     excluded_regs[i] - 4);
 }
 
 void
diff --git a/hw/pci.c b/hw/pci.c
index a0f91a8c72bba42ae80da305234a4cda66c764e3..c48a75e9ec88618d248949778b7973bee2cd7ff5 100644
--- a/hw/pci.c
+++ b/hw/pci.c
@@ -279,6 +279,7 @@ static void pci_update_mappings(PCIDevice *d)
                         cpu_register_physical_memory(pci_to_cpu_addr(r->addr),
                                                      r->size,
                                                      IO_MEM_UNASSIGNED);
+                        qemu_unregister_coalesced_mmio(r->addr, r->size);
                     }
                 }
                 r->addr = new_addr;
diff --git a/hw/vga.c b/hw/vga.c
index 132dd0bd1a22e762345e091b32140ad8e786926b..29354f495773024a7b0a81696bee692a3543a12e 100644
--- a/hw/vga.c
+++ b/hw/vga.c
@@ -2256,6 +2256,7 @@ void vga_init(VGAState *s)
     vga_io_memory = cpu_register_io_memory(0, vga_mem_read, vga_mem_write, s);
     cpu_register_physical_memory(isa_mem_base + 0x000a0000, 0x20000,
                                  vga_io_memory);
+    qemu_register_coalesced_mmio(isa_mem_base + 0x000a0000, 0x20000);
 }
 
 /* Memory mapped interface */
@@ -2330,6 +2331,7 @@ static void vga_mm_init(VGAState *s, target_phys_addr_t vram_base,
     cpu_register_physical_memory(ctrl_base, 0x100000, s_ioport_ctrl);
     s->bank_offset = 0;
     cpu_register_physical_memory(vram_base + 0x000a0000, 0x20000, vga_io_memory);
+    qemu_register_coalesced_mmio(vram_base + 0x000a0000, 0x20000);
 }
 
 int isa_vga_init(DisplayState *ds, uint8_t *vga_ram_base,
diff --git a/kvm-all.c b/kvm-all.c
index 8615bf6b27c9aeffec42988c753397000a9dd0c6..dad80df581d5724f8e575cbbf5f5e4c48b20fe47 100644
--- a/kvm-all.c
+++ b/kvm-all.c
@@ -24,6 +24,9 @@
 #include "sysemu.h"
 #include "kvm.h"
 
+/* KVM uses PAGE_SIZE in it's definition of COALESCED_MMIO_MAX */
+#define PAGE_SIZE TARGET_PAGE_SIZE
+
 //#define DEBUG_KVM
 
 #ifdef DEBUG_KVM
@@ -52,6 +55,7 @@ struct KVMState
     KVMSlot slots[32];
     int fd;
     int vmfd;
+    int coalesced_mmio;
 };
 
 static KVMState *kvm_state;
@@ -228,6 +232,44 @@ out:
     qemu_free(d.dirty_bitmap);
 }
 
+int kvm_coalesce_mmio_region(target_phys_addr_t start, ram_addr_t size)
+{
+    int ret = -ENOSYS;
+#ifdef KVM_CAP_COALESCED_MMIO
+    KVMState *s = kvm_state;
+
+    if (s->coalesced_mmio) {
+        struct kvm_coalesced_mmio_zone zone;
+
+        zone.addr = start;
+        zone.size = size;
+
+        ret = kvm_vm_ioctl(s, KVM_REGISTER_COALESCED_MMIO, &zone);
+    }
+#endif
+
+    return ret;
+}
+
+int kvm_uncoalesce_mmio_region(target_phys_addr_t start, ram_addr_t size)
+{
+    int ret = -ENOSYS;
+#ifdef KVM_CAP_COALESCED_MMIO
+    KVMState *s = kvm_state;
+
+    if (s->coalesced_mmio) {
+        struct kvm_coalesced_mmio_zone zone;
+
+        zone.addr = start;
+        zone.size = size;
+
+        ret = kvm_vm_ioctl(s, KVM_UNREGISTER_COALESCED_MMIO, &zone);
+    }
+#endif
+
+    return ret;
+}
+
 int kvm_init(int smp_cpus)
 {
     KVMState *s;
@@ -298,6 +340,13 @@ int kvm_init(int smp_cpus)
         goto err;
     }
 
+    s->coalesced_mmio = 0;
+#ifdef KVM_CAP_COALESCED_MMIO
+    ret = kvm_ioctl(s, KVM_CHECK_EXTENSION, KVM_CAP_COALESCED_MMIO);
+    if (ret > 0)
+        s->coalesced_mmio = ret;
+#endif
+
     ret = kvm_arch_init(s, smp_cpus);
     if (ret < 0)
         goto err;
@@ -357,6 +406,27 @@ static int kvm_handle_io(CPUState *env, uint16_t port, void *data,
     return 1;
 }
 
+static void kvm_run_coalesced_mmio(CPUState *env, struct kvm_run *run)
+{
+#ifdef KVM_CAP_COALESCED_MMIO
+    KVMState *s = kvm_state;
+    if (s->coalesced_mmio) {
+        struct kvm_coalesced_mmio_ring *ring;
+
+        ring = (void *)run + (s->coalesced_mmio * TARGET_PAGE_SIZE);
+        while (ring->first != ring->last) {
+            struct kvm_coalesced_mmio *ent;
+
+            ent = &ring->coalesced_mmio[ring->first];
+
+            cpu_physical_memory_write(ent->phys_addr, ent->data, ent->len);
+            /* FIXME smp_wmb() */
+            ring->first = (ring->first + 1) % KVM_COALESCED_MMIO_MAX;
+        }
+    }
+#endif
+}
+
 int kvm_cpu_exec(CPUState *env)
 {
     struct kvm_run *run = env->kvm_run;
@@ -387,6 +457,8 @@ int kvm_cpu_exec(CPUState *env)
             abort();
         }
 
+        kvm_run_coalesced_mmio(env, run);
+
         ret = 0; /* exit loop */
         switch (run->exit_reason) {
         case KVM_EXIT_IO:
diff --git a/kvm.h b/kvm.h
index ec0083f7e8b1657abeba1454116d8dc79d4a4d82..ac464ab1bf9c4db7f9abc17bd63f5b7dde0bae8a 100644
--- a/kvm.h
+++ b/kvm.h
@@ -45,6 +45,9 @@ int kvm_log_stop(target_phys_addr_t phys_addr, target_phys_addr_t len);
 
 int kvm_has_sync_mmu(void);
 
+int kvm_coalesce_mmio_region(target_phys_addr_t start, ram_addr_t size);
+int kvm_uncoalesce_mmio_region(target_phys_addr_t start, ram_addr_t size);
+
 /* internal API */
 
 struct KVMState;