diff --git a/.gitignore b/.gitignore
index e32a58417aa441de7a518f0a6d15ce5b7c5c9d5b..090f974cb91c7e7b26616b09174af186624dd112 100644
--- a/.gitignore
+++ b/.gitignore
@@ -109,3 +109,4 @@ cscope.*
 tags
 TAGS
 *~
+/tests/qemu-iotests/common.env
diff --git a/MAINTAINERS b/MAINTAINERS
index 29c6834852d3226cd20dc8fc9d7e8911631036d1..430688dcab573d701cd0423cb54c29a730f8cacc 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -757,6 +757,7 @@ F: aio-*.c
 F: block*
 F: block/
 F: hw/block/
+F: migration/block*
 F: qemu-img*
 F: qemu-io*
 F: tests/image-fuzzer/
@@ -1105,7 +1106,6 @@ S: Supported
 F: block/ssh.c
 
 ARCHIPELAGO
-M: Chrysostomos Nanakos <cnanakos@grnet.gr>
 M: Chrysostomos Nanakos <chris@include.gr>
 S: Maintained
 F: block/archipelago.c
diff --git a/async.c b/async.c
index 3939b795e59bda9dfc488348382a91bd71162f90..2be88cc9e9a28093937423fc7e39474c0cd825bb 100644
--- a/async.c
+++ b/async.c
@@ -44,10 +44,12 @@ struct QEMUBH {
 QEMUBH *aio_bh_new(AioContext *ctx, QEMUBHFunc *cb, void *opaque)
 {
     QEMUBH *bh;
-    bh = g_new0(QEMUBH, 1);
-    bh->ctx = ctx;
-    bh->cb = cb;
-    bh->opaque = opaque;
+    bh = g_new(QEMUBH, 1);
+    *bh = (QEMUBH){
+        .ctx = ctx,
+        .cb = cb,
+        .opaque = opaque,
+    };
     qemu_mutex_lock(&ctx->bh_lock);
     bh->next = ctx->first_bh;
     /* Make sure that the members are ready before putting bh into list */
@@ -300,6 +302,7 @@ AioContext *aio_context_new(Error **errp)
         error_setg_errno(errp, -ret, "Failed to initialize event notifier");
         return NULL;
     }
+    g_source_set_can_recurse(&ctx->source, true);
     aio_set_event_notifier(ctx, &ctx->notifier,
                            (EventNotifierHandler *)
                            event_notifier_test_and_clear);
diff --git a/block.c b/block.c
index 4165d4265cd3ac6ac009f6af101d82d5de4440db..cbe4a32a5a32b65f846078e6457906f91592719b 100644
--- a/block.c
+++ b/block.c
@@ -97,6 +97,10 @@ static QTAILQ_HEAD(, BlockDriverState) graph_bdrv_states =
 static QLIST_HEAD(, BlockDriver) bdrv_drivers =
     QLIST_HEAD_INITIALIZER(bdrv_drivers);
 
+static void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector,
+                           int nr_sectors);
+static void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector,
+                             int nr_sectors);
 /* If non-zero, use only whitelisted block drivers */
 static int use_bdrv_whitelist;
 
@@ -303,15 +307,32 @@ void path_combine(char *dest, int dest_size,
     }
 }
 
-void bdrv_get_full_backing_filename(BlockDriverState *bs, char *dest, size_t sz)
+void bdrv_get_full_backing_filename_from_filename(const char *backed,
+                                                  const char *backing,
+                                                  char *dest, size_t sz,
+                                                  Error **errp)
 {
-    if (bs->backing_file[0] == '\0' || path_has_protocol(bs->backing_file)) {
-        pstrcpy(dest, sz, bs->backing_file);
+    if (backing[0] == '\0' || path_has_protocol(backing) ||
+        path_is_absolute(backing))
+    {
+        pstrcpy(dest, sz, backing);
+    } else if (backed[0] == '\0' || strstart(backed, "json:", NULL)) {
+        error_setg(errp, "Cannot use relative backing file names for '%s'",
+                   backed);
     } else {
-        path_combine(dest, sz, bs->filename, bs->backing_file);
+        path_combine(dest, sz, backed, backing);
     }
 }
 
+void bdrv_get_full_backing_filename(BlockDriverState *bs, char *dest, size_t sz,
+                                    Error **errp)
+{
+    char *backed = bs->exact_filename[0] ? bs->exact_filename : bs->filename;
+
+    bdrv_get_full_backing_filename_from_filename(backed, bs->backing_file,
+                                                 dest, sz, errp);
+}
+
 void bdrv_register(BlockDriver *bdrv)
 {
     /* Block drivers without coroutine functions need emulation */
@@ -1179,7 +1200,7 @@ void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd)
 
     bdrv_op_block_all(bs->backing_hd, bs->backing_blocker);
     /* Otherwise we won't be able to commit due to check in bdrv_commit */
-    bdrv_op_unblock(bs->backing_hd, BLOCK_OP_TYPE_COMMIT,
+    bdrv_op_unblock(bs->backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET,
                     bs->backing_blocker);
 out:
     bdrv_refresh_limits(bs, NULL);
@@ -1217,7 +1238,14 @@ int bdrv_open_backing_file(BlockDriverState *bs, QDict *options, Error **errp)
         QDECREF(options);
         goto free_exit;
     } else {
-        bdrv_get_full_backing_filename(bs, backing_filename, PATH_MAX);
+        bdrv_get_full_backing_filename(bs, backing_filename, PATH_MAX,
+                                       &local_err);
+        if (local_err) {
+            ret = -EINVAL;
+            error_propagate(errp, local_err);
+            QDECREF(options);
+            goto free_exit;
+        }
     }
 
     if (!bs->drv || !bs->drv->supports_backing) {
@@ -2188,8 +2216,8 @@ int bdrv_commit(BlockDriverState *bs)
         return -ENOTSUP;
     }
 
-    if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_COMMIT, NULL) ||
-        bdrv_op_is_blocked(bs->backing_hd, BLOCK_OP_TYPE_COMMIT, NULL)) {
+    if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_COMMIT_SOURCE, NULL) ||
+        bdrv_op_is_blocked(bs->backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET, NULL)) {
         return -EBUSY;
     }
 
@@ -3034,18 +3062,16 @@ static int coroutine_fn bdrv_aligned_preadv(BlockDriverState *bs,
 
         max_nb_sectors = ROUND_UP(MAX(0, total_sectors - sector_num),
                                   align >> BDRV_SECTOR_BITS);
-        if (max_nb_sectors > 0) {
+        if (nb_sectors < max_nb_sectors) {
+            ret = drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
+        } else if (max_nb_sectors > 0) {
             QEMUIOVector local_qiov;
-            size_t local_sectors;
-
-            max_nb_sectors = MIN(max_nb_sectors, SIZE_MAX / BDRV_SECTOR_BITS);
-            local_sectors = MIN(max_nb_sectors, nb_sectors);
 
             qemu_iovec_init(&local_qiov, qiov->niov);
             qemu_iovec_concat(&local_qiov, qiov, 0,
-                              local_sectors * BDRV_SECTOR_SIZE);
+                              max_nb_sectors * BDRV_SECTOR_SIZE);
 
-            ret = drv->bdrv_co_readv(bs, sector_num, local_sectors,
+            ret = drv->bdrv_co_readv(bs, sector_num, max_nb_sectors,
                                      &local_qiov);
 
             qemu_iovec_destroy(&local_qiov);
@@ -3218,6 +3244,9 @@ static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
 
         if (ret == -ENOTSUP) {
             /* Fall back to bounce buffer if write zeroes is unsupported */
+            int max_xfer_len = MIN_NON_ZERO(bs->bl.max_transfer_length,
+                                            MAX_WRITE_ZEROES_DEFAULT);
+            num = MIN(num, max_xfer_len);
             iov.iov_len = num * BDRV_SECTOR_SIZE;
             if (iov.iov_base == NULL) {
                 iov.iov_base = qemu_try_blockalign(bs, num * BDRV_SECTOR_SIZE);
@@ -3234,7 +3263,7 @@ static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
             /* Keep bounce buffer around if it is big enough for all
              * all future requests.
              */
-            if (num < max_write_zeroes) {
+            if (num < max_xfer_len) {
                 qemu_vfree(iov.iov_base);
                 iov.iov_base = NULL;
             }
@@ -5389,8 +5418,20 @@ void bdrv_dirty_iter_init(BlockDriverState *bs,
     hbitmap_iter_init(hbi, bitmap->bitmap, 0);
 }
 
-void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector,
-                    int nr_sectors)
+void bdrv_set_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap,
+                           int64_t cur_sector, int nr_sectors)
+{
+    hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
+}
+
+void bdrv_reset_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap,
+                             int64_t cur_sector, int nr_sectors)
+{
+    hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors);
+}
+
+static void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector,
+                           int nr_sectors)
 {
     BdrvDirtyBitmap *bitmap;
     QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
@@ -5398,7 +5439,8 @@ void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector,
     }
 }
 
-void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector, int nr_sectors)
+static void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector,
+                             int nr_sectors)
 {
     BdrvDirtyBitmap *bitmap;
     QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
@@ -5637,16 +5679,26 @@ void bdrv_img_create(const char *filename, const char *fmt,
     if (size == -1) {
         if (backing_file) {
             BlockDriverState *bs;
+            char *full_backing = g_new0(char, PATH_MAX);
             int64_t size;
             int back_flags;
 
+            bdrv_get_full_backing_filename_from_filename(filename, backing_file,
+                                                         full_backing, PATH_MAX,
+                                                         &local_err);
+            if (local_err) {
+                g_free(full_backing);
+                goto out;
+            }
+
             /* backing files always opened read-only */
             back_flags =
                 flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
 
             bs = NULL;
-            ret = bdrv_open(&bs, backing_file, NULL, NULL, back_flags,
+            ret = bdrv_open(&bs, full_backing, NULL, NULL, back_flags,
                             backing_drv, &local_err);
+            g_free(full_backing);
             if (ret < 0) {
                 goto out;
             }
diff --git a/block/backup.c b/block/backup.c
index 792e65514bfdaa75e95500cc711aee60c437ffd9..1c535b1ab986fc753d545a52afba76167162ae5f 100644
--- a/block/backup.c
+++ b/block/backup.c
@@ -360,6 +360,7 @@ static void coroutine_fn backup_run(void *opaque)
     hbitmap_free(job->bitmap);
 
     bdrv_iostatus_disable(target);
+    bdrv_op_unblock_all(target, job->common.blocker);
 
     data = g_malloc(sizeof(*data));
     data->ret = ret;
@@ -379,6 +380,11 @@ void backup_start(BlockDriverState *bs, BlockDriverState *target,
     assert(target);
     assert(cb);
 
+    if (bs == target) {
+        error_setg(errp, "Source and target cannot be the same");
+        return;
+    }
+
     if ((on_source_error == BLOCKDEV_ON_ERROR_STOP ||
          on_source_error == BLOCKDEV_ON_ERROR_ENOSPC) &&
         !bdrv_iostatus_is_enabled(bs)) {
@@ -386,6 +392,26 @@ void backup_start(BlockDriverState *bs, BlockDriverState *target,
         return;
     }
 
+    if (!bdrv_is_inserted(bs)) {
+        error_setg(errp, "Device is not inserted: %s",
+                   bdrv_get_device_name(bs));
+        return;
+    }
+
+    if (!bdrv_is_inserted(target)) {
+        error_setg(errp, "Device is not inserted: %s",
+                   bdrv_get_device_name(target));
+        return;
+    }
+
+    if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_BACKUP_SOURCE, errp)) {
+        return;
+    }
+
+    if (bdrv_op_is_blocked(target, BLOCK_OP_TYPE_BACKUP_TARGET, errp)) {
+        return;
+    }
+
     len = bdrv_getlength(bs);
     if (len < 0) {
         error_setg_errno(errp, -len, "unable to get length for '%s'",
@@ -399,6 +425,8 @@ void backup_start(BlockDriverState *bs, BlockDriverState *target,
         return;
     }
 
+    bdrv_op_block_all(target, job->common.blocker);
+
     job->on_source_error = on_source_error;
     job->on_target_error = on_target_error;
     job->target = target;
diff --git a/block/block-backend.c b/block/block-backend.c
index ef16d7356a87fb433390d2e9f539ebda637782a4..d00c129f15c848febf9f6ac7c54420e763efd3ef 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -260,9 +260,6 @@ int blk_attach_dev(BlockBackend *blk, void *dev)
     blk_ref(blk);
     blk->dev = dev;
     bdrv_iostatus_reset(blk->bs);
-
-    /* We're expecting I/O from the device so bump up coroutine pool size */
-    qemu_coroutine_adjust_pool_size(COROUTINE_POOL_RESERVATION);
     return 0;
 }
 
@@ -290,7 +287,6 @@ void blk_detach_dev(BlockBackend *blk, void *dev)
     blk->dev_ops = NULL;
     blk->dev_opaque = NULL;
     bdrv_set_guest_block_size(blk->bs, 512);
-    qemu_coroutine_adjust_pool_size(-COROUTINE_POOL_RESERVATION);
     blk_unref(blk);
 }
 
diff --git a/block/mirror.c b/block/mirror.c
index 2c6dd2a4c178bb01a427bd2a73e36032d78ea8d2..9019d1ba56c58cb54e06eb9db8b81e80545069e1 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -128,7 +128,8 @@ static void mirror_write_complete(void *opaque, int ret)
         BlockDriverState *source = s->common.bs;
         BlockErrorAction action;
 
-        bdrv_set_dirty(source, op->sector_num, op->nb_sectors);
+        bdrv_set_dirty_bitmap(source, s->dirty_bitmap, op->sector_num,
+                              op->nb_sectors);
         action = mirror_error_action(s, false, -ret);
         if (action == BLOCK_ERROR_ACTION_REPORT && s->ret >= 0) {
             s->ret = ret;
@@ -145,7 +146,8 @@ static void mirror_read_complete(void *opaque, int ret)
         BlockDriverState *source = s->common.bs;
         BlockErrorAction action;
 
-        bdrv_set_dirty(source, op->sector_num, op->nb_sectors);
+        bdrv_set_dirty_bitmap(source, s->dirty_bitmap, op->sector_num,
+                              op->nb_sectors);
         action = mirror_error_action(s, true, -ret);
         if (action == BLOCK_ERROR_ACTION_REPORT && s->ret >= 0) {
             s->ret = ret;
@@ -286,7 +288,8 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
         next_sector += sectors_per_chunk;
     }
 
-    bdrv_reset_dirty(source, sector_num, nb_sectors);
+    bdrv_reset_dirty_bitmap(source, s->dirty_bitmap, sector_num,
+                            nb_sectors);
 
     /* Copy the dirty cluster.  */
     s->in_flight++;
@@ -442,7 +445,7 @@ static void coroutine_fn mirror_run(void *opaque)
 
             assert(n > 0);
             if (ret == 1) {
-                bdrv_set_dirty(bs, sector_num, n);
+                bdrv_set_dirty_bitmap(bs, s->dirty_bitmap, sector_num, n);
                 sector_num = next;
             } else {
                 sector_num += n;
diff --git a/block/qapi.c b/block/qapi.c
index fa68ba731f39b4e36ac7339202b31a87e60a0b57..a6fd6f7ab225b7fa09f42038c45ea69365104121 100644
--- a/block/qapi.c
+++ b/block/qapi.c
@@ -214,7 +214,12 @@ void bdrv_query_image_info(BlockDriverState *bs,
         info->backing_filename = g_strdup(backing_filename);
         info->has_backing_filename = true;
         bdrv_get_full_backing_filename(bs, backing_filename2,
-                                       sizeof(backing_filename2));
+                                       sizeof(backing_filename2), &err);
+        if (err) {
+            error_propagate(errp, err);
+            qapi_free_ImageInfo(info);
+            return;
+        }
 
         if (strcmp(backing_filename, backing_filename2) != 0) {
             info->full_backing_filename =
diff --git a/block/vmdk.c b/block/vmdk.c
index bfff900ba6c893d7ce62b5470250c17103bc9b2e..52cb8888e55390b00b0e2fef208db6d137dddcd2 100644
--- a/block/vmdk.c
+++ b/block/vmdk.c
@@ -1891,8 +1891,19 @@ static int vmdk_create(const char *filename, QemuOpts *opts, Error **errp)
     }
     if (backing_file) {
         BlockDriverState *bs = NULL;
-        ret = bdrv_open(&bs, backing_file, NULL, NULL, BDRV_O_NO_BACKING, NULL,
+        char *full_backing = g_new0(char, PATH_MAX);
+        bdrv_get_full_backing_filename_from_filename(filename, backing_file,
+                                                     full_backing, PATH_MAX,
+                                                     &local_err);
+        if (local_err) {
+            g_free(full_backing);
+            error_propagate(errp, local_err);
+            ret = -ENOENT;
+            goto exit;
+        }
+        ret = bdrv_open(&bs, full_backing, NULL, NULL, BDRV_O_NO_BACKING, NULL,
                         errp);
+        g_free(full_backing);
         if (ret != 0) {
             goto exit;
         }
diff --git a/blockdev.c b/blockdev.c
index 5651a8e140b9da2880261049dfbe692058ac43b5..d59efd3f157e85f557c2043b27b943963ae855b6 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -1559,6 +1559,79 @@ static void drive_backup_clean(BlkTransactionState *common)
     }
 }
 
+typedef struct BlockdevBackupState {
+    BlkTransactionState common;
+    BlockDriverState *bs;
+    BlockJob *job;
+    AioContext *aio_context;
+} BlockdevBackupState;
+
+static void blockdev_backup_prepare(BlkTransactionState *common, Error **errp)
+{
+    BlockdevBackupState *state = DO_UPCAST(BlockdevBackupState, common, common);
+    BlockdevBackup *backup;
+    BlockDriverState *bs, *target;
+    Error *local_err = NULL;
+
+    assert(common->action->kind == TRANSACTION_ACTION_KIND_BLOCKDEV_BACKUP);
+    backup = common->action->blockdev_backup;
+
+    bs = bdrv_find(backup->device);
+    if (!bs) {
+        error_set(errp, QERR_DEVICE_NOT_FOUND, backup->device);
+        return;
+    }
+
+    target = bdrv_find(backup->target);
+    if (!target) {
+        error_set(errp, QERR_DEVICE_NOT_FOUND, backup->target);
+        return;
+    }
+
+    /* AioContext is released in .clean() */
+    state->aio_context = bdrv_get_aio_context(bs);
+    if (state->aio_context != bdrv_get_aio_context(target)) {
+        state->aio_context = NULL;
+        error_setg(errp, "Backup between two IO threads is not implemented");
+        return;
+    }
+    aio_context_acquire(state->aio_context);
+
+    qmp_blockdev_backup(backup->device, backup->target,
+                        backup->sync,
+                        backup->has_speed, backup->speed,
+                        backup->has_on_source_error, backup->on_source_error,
+                        backup->has_on_target_error, backup->on_target_error,
+                        &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        return;
+    }
+
+    state->bs = bs;
+    state->job = state->bs->job;
+}
+
+static void blockdev_backup_abort(BlkTransactionState *common)
+{
+    BlockdevBackupState *state = DO_UPCAST(BlockdevBackupState, common, common);
+    BlockDriverState *bs = state->bs;
+
+    /* Only cancel if it's the job we started */
+    if (bs && bs->job && bs->job == state->job) {
+        block_job_cancel_sync(bs->job);
+    }
+}
+
+static void blockdev_backup_clean(BlkTransactionState *common)
+{
+    BlockdevBackupState *state = DO_UPCAST(BlockdevBackupState, common, common);
+
+    if (state->aio_context) {
+        aio_context_release(state->aio_context);
+    }
+}
+
 static void abort_prepare(BlkTransactionState *common, Error **errp)
 {
     error_setg(errp, "Transaction aborted using Abort action");
@@ -1582,6 +1655,12 @@ static const BdrvActionOps actions[] = {
         .abort = drive_backup_abort,
         .clean = drive_backup_clean,
     },
+    [TRANSACTION_ACTION_KIND_BLOCKDEV_BACKUP] = {
+        .instance_size = sizeof(BlockdevBackupState),
+        .prepare = blockdev_backup_prepare,
+        .abort = blockdev_backup_abort,
+        .clean = blockdev_backup_clean,
+    },
     [TRANSACTION_ACTION_KIND_ABORT] = {
         .instance_size = sizeof(BlkTransactionState),
         .prepare = abort_prepare,
@@ -2139,7 +2218,7 @@ void qmp_block_commit(const char *device,
     /* drain all i/o before commits */
     bdrv_drain_all();
 
-    if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_COMMIT, errp)) {
+    if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_COMMIT_SOURCE, errp)) {
         goto out;
     }
 
@@ -2172,6 +2251,10 @@ void qmp_block_commit(const char *device,
 
     assert(bdrv_get_aio_context(base_bs) == aio_context);
 
+    if (bdrv_op_is_blocked(base_bs, BLOCK_OP_TYPE_COMMIT_TARGET, errp)) {
+        goto out;
+    }
+
     /* Do not allow attempts to commit an image into itself */
     if (top_bs == base_bs) {
         error_setg(errp, "cannot commit an image into itself");
@@ -2240,6 +2323,8 @@ void qmp_drive_backup(const char *device, const char *target,
     aio_context = bdrv_get_aio_context(bs);
     aio_context_acquire(aio_context);
 
+    /* Although backup_run has this check too, we need to use bs->drv below, so
+     * do an early check redundantly. */
     if (!bdrv_is_inserted(bs)) {
         error_set(errp, QERR_DEVICE_HAS_NO_MEDIUM, device);
         goto out;
@@ -2256,6 +2341,7 @@ void qmp_drive_backup(const char *device, const char *target,
         }
     }
 
+    /* Early check to avoid creating target */
     if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_BACKUP_SOURCE, errp)) {
         goto out;
     }
@@ -2323,6 +2409,57 @@ BlockDeviceInfoList *qmp_query_named_block_nodes(Error **errp)
     return bdrv_named_nodes_list();
 }
 
+void qmp_blockdev_backup(const char *device, const char *target,
+                         enum MirrorSyncMode sync,
+                         bool has_speed, int64_t speed,
+                         bool has_on_source_error,
+                         BlockdevOnError on_source_error,
+                         bool has_on_target_error,
+                         BlockdevOnError on_target_error,
+                         Error **errp)
+{
+    BlockDriverState *bs;
+    BlockDriverState *target_bs;
+    Error *local_err = NULL;
+    AioContext *aio_context;
+
+    if (!has_speed) {
+        speed = 0;
+    }
+    if (!has_on_source_error) {
+        on_source_error = BLOCKDEV_ON_ERROR_REPORT;
+    }
+    if (!has_on_target_error) {
+        on_target_error = BLOCKDEV_ON_ERROR_REPORT;
+    }
+
+    bs = bdrv_find(device);
+    if (!bs) {
+        error_set(errp, QERR_DEVICE_NOT_FOUND, device);
+        return;
+    }
+
+    aio_context = bdrv_get_aio_context(bs);
+    aio_context_acquire(aio_context);
+
+    target_bs = bdrv_find(target);
+    if (!target_bs) {
+        error_set(errp, QERR_DEVICE_NOT_FOUND, target);
+        goto out;
+    }
+
+    bdrv_ref(target_bs);
+    bdrv_set_aio_context(target_bs, aio_context);
+    backup_start(bs, target_bs, speed, sync, on_source_error, on_target_error,
+                 block_job_cb, bs, &local_err);
+    if (local_err != NULL) {
+        bdrv_unref(target_bs);
+        error_propagate(errp, local_err);
+    }
+out:
+    aio_context_release(aio_context);
+}
+
 #define DEFAULT_MIRROR_BUF_SIZE   (10 << 20)
 
 void qmp_drive_mirror(const char *device, const char *target,
diff --git a/coroutine-ucontext.c b/coroutine-ucontext.c
index 4bf2cde279b9ccab0d4b5e8b7b18948402a39749..259fcb48a42ab7346afac0708cfc6506add58675 100644
--- a/coroutine-ucontext.c
+++ b/coroutine-ucontext.c
@@ -25,7 +25,6 @@
 #include <stdlib.h>
 #include <setjmp.h>
 #include <stdint.h>
-#include <pthread.h>
 #include <ucontext.h>
 #include "qemu-common.h"
 #include "block/coroutine_int.h"
@@ -48,15 +47,8 @@ typedef struct {
 /**
  * Per-thread coroutine bookkeeping
  */
-typedef struct {
-    /** Currently executing coroutine */
-    Coroutine *current;
-
-    /** The default coroutine */
-    CoroutineUContext leader;
-} CoroutineThreadState;
-
-static pthread_key_t thread_state_key;
+static __thread CoroutineUContext leader;
+static __thread Coroutine *current;
 
 /*
  * va_args to makecontext() must be type 'int', so passing
@@ -68,36 +60,6 @@ union cc_arg {
     int i[2];
 };
 
-static CoroutineThreadState *coroutine_get_thread_state(void)
-{
-    CoroutineThreadState *s = pthread_getspecific(thread_state_key);
-
-    if (!s) {
-        s = g_malloc0(sizeof(*s));
-        s->current = &s->leader.base;
-        pthread_setspecific(thread_state_key, s);
-    }
-    return s;
-}
-
-static void qemu_coroutine_thread_cleanup(void *opaque)
-{
-    CoroutineThreadState *s = opaque;
-
-    g_free(s);
-}
-
-static void __attribute__((constructor)) coroutine_init(void)
-{
-    int ret;
-
-    ret = pthread_key_create(&thread_state_key, qemu_coroutine_thread_cleanup);
-    if (ret != 0) {
-        fprintf(stderr, "unable to create leader key: %s\n", strerror(errno));
-        abort();
-    }
-}
-
 static void coroutine_trampoline(int i0, int i1)
 {
     union cc_arg arg;
@@ -193,15 +155,23 @@ void qemu_coroutine_delete(Coroutine *co_)
     g_free(co);
 }
 
-CoroutineAction qemu_coroutine_switch(Coroutine *from_, Coroutine *to_,
-                                      CoroutineAction action)
+/* This function is marked noinline to prevent GCC from inlining it
+ * into coroutine_trampoline(). If we allow it to do that then it
+ * hoists the code to get the address of the TLS variable "current"
+ * out of the while() loop. This is an invalid transformation because
+ * the sigsetjmp() call may be called when running thread A but
+ * return in thread B, and so we might be in a different thread
+ * context each time round the loop.
+ */
+CoroutineAction __attribute__((noinline))
+qemu_coroutine_switch(Coroutine *from_, Coroutine *to_,
+                      CoroutineAction action)
 {
     CoroutineUContext *from = DO_UPCAST(CoroutineUContext, base, from_);
     CoroutineUContext *to = DO_UPCAST(CoroutineUContext, base, to_);
-    CoroutineThreadState *s = coroutine_get_thread_state();
     int ret;
 
-    s->current = to_;
+    current = to_;
 
     ret = sigsetjmp(from->env, 0);
     if (ret == 0) {
@@ -212,14 +182,13 @@ CoroutineAction qemu_coroutine_switch(Coroutine *from_, Coroutine *to_,
 
 Coroutine *qemu_coroutine_self(void)
 {
-    CoroutineThreadState *s = coroutine_get_thread_state();
-
-    return s->current;
+    if (!current) {
+        current = &leader.base;
+    }
+    return current;
 }
 
 bool qemu_in_coroutine(void)
 {
-    CoroutineThreadState *s = pthread_getspecific(thread_state_key);
-
-    return s && s->current->caller;
+    return current && current->caller;
 }
diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c
index 2a28978cba418f1b3228174d2ec6f4fa23b56d7d..39c5d7103c093b1e46637a014ccd19ddab16dc41 100644
--- a/hw/block/dataplane/virtio-blk.c
+++ b/hw/block/dataplane/virtio-blk.c
@@ -198,7 +198,8 @@ void virtio_blk_data_plane_create(VirtIODevice *vdev, VirtIOBlkConf *conf,
     blk_op_unblock(conf->conf.blk, BLOCK_OP_TYPE_DRIVE_DEL, s->blocker);
     blk_op_unblock(conf->conf.blk, BLOCK_OP_TYPE_BACKUP_SOURCE, s->blocker);
     blk_op_unblock(conf->conf.blk, BLOCK_OP_TYPE_CHANGE, s->blocker);
-    blk_op_unblock(conf->conf.blk, BLOCK_OP_TYPE_COMMIT, s->blocker);
+    blk_op_unblock(conf->conf.blk, BLOCK_OP_TYPE_COMMIT_SOURCE, s->blocker);
+    blk_op_unblock(conf->conf.blk, BLOCK_OP_TYPE_COMMIT_TARGET, s->blocker);
     blk_op_unblock(conf->conf.blk, BLOCK_OP_TYPE_EJECT, s->blocker);
     blk_op_unblock(conf->conf.blk, BLOCK_OP_TYPE_EXTERNAL_SNAPSHOT, s->blocker);
     blk_op_unblock(conf->conf.blk, BLOCK_OP_TYPE_INTERNAL_SNAPSHOT, s->blocker);
diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index aa1ed986d2cef1dd3b875170f3172a97f4ab1970..ce079aefdd615ac1e0e6db3e970c38706c91307c 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -476,7 +476,8 @@ static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
 
     switch (dw10) {
     case NVME_NUMBER_OF_QUEUES:
-        req->cqe.result = cpu_to_le32(n->num_queues);
+        req->cqe.result =
+            cpu_to_le32((n->num_queues - 1) | ((n->num_queues - 1) << 16));
         break;
     default:
         return NVME_INVALID_FIELD | NVME_DNR;
@@ -490,7 +491,8 @@ static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
 
     switch (dw10) {
     case NVME_NUMBER_OF_QUEUES:
-        req->cqe.result = cpu_to_le32(n->num_queues);
+        req->cqe.result =
+            cpu_to_le32((n->num_queues - 1) | ((n->num_queues - 1) << 16));
         break;
     default:
         return NVME_INVALID_FIELD | NVME_DNR;
@@ -813,7 +815,7 @@ static int nvme_init(PCIDevice *pci_dev)
     NVME_CAP_SET_CSS(n->bar.cap, 1);
     NVME_CAP_SET_MPSMAX(n->bar.cap, 4);
 
-    n->bar.vs = 0x00010001;
+    n->bar.vs = 0x00010100;
     n->bar.intmc = n->bar.intms = 0;
 
     for (i = 0; i < n->num_namespaces; i++) {
diff --git a/hw/ide/atapi.c b/hw/ide/atapi.c
index c63b7e556e0fb2af35da089cb024b118ee937a6d..a71e6e014faba42ba3fbab503820aadf1f117bec 100644
--- a/hw/ide/atapi.c
+++ b/hw/ide/atapi.c
@@ -621,20 +621,107 @@ static void cmd_request_sense(IDEState *s, uint8_t *buf)
 
 static void cmd_inquiry(IDEState *s, uint8_t *buf)
 {
+    uint8_t page_code = buf[2];
     int max_len = buf[4];
 
-    buf[0] = 0x05; /* CD-ROM */
-    buf[1] = 0x80; /* removable */
-    buf[2] = 0x00; /* ISO */
-    buf[3] = 0x21; /* ATAPI-2 (XXX: put ATAPI-4 ?) */
-    buf[4] = 31; /* additional length */
-    buf[5] = 0; /* reserved */
-    buf[6] = 0; /* reserved */
-    buf[7] = 0; /* reserved */
-    padstr8(buf + 8, 8, "QEMU");
-    padstr8(buf + 16, 16, "QEMU DVD-ROM");
-    padstr8(buf + 32, 4, s->version);
-    ide_atapi_cmd_reply(s, 36, max_len);
+    unsigned idx = 0;
+    unsigned size_idx;
+    unsigned preamble_len;
+
+    /* If the EVPD (Enable Vital Product Data) bit is set in byte 1,
+     * we are being asked for a specific page of info indicated by byte 2. */
+    if (buf[1] & 0x01) {
+        preamble_len = 4;
+        size_idx = 3;
+
+        buf[idx++] = 0x05;      /* CD-ROM */
+        buf[idx++] = page_code; /* Page Code */
+        buf[idx++] = 0x00;      /* reserved */
+        idx++;                  /* length (set later) */
+
+        switch (page_code) {
+        case 0x00:
+            /* Supported Pages: List of supported VPD responses. */
+            buf[idx++] = 0x00; /* 0x00: Supported Pages, and: */
+            buf[idx++] = 0x83; /* 0x83: Device Identification. */
+            break;
+
+        case 0x83:
+            /* Device Identification. Each entry is optional, but the entries
+             * included here are modeled after libata's VPD responses.
+             * If the response is given, at least one entry must be present. */
+
+            /* Entry 1: Serial */
+            if (idx + 24 > max_len) {
+                /* Not enough room for even the first entry: */
+                /* 4 byte header + 20 byte string */
+                ide_atapi_cmd_error(s, ILLEGAL_REQUEST,
+                                    ASC_DATA_PHASE_ERROR);
+                return;
+            }
+            buf[idx++] = 0x02; /* Ascii */
+            buf[idx++] = 0x00; /* Vendor Specific */
+            buf[idx++] = 0x00;
+            buf[idx++] = 20;   /* Remaining length */
+            padstr8(buf + idx, 20, s->drive_serial_str);
+            idx += 20;
+
+            /* Entry 2: Drive Model and Serial */
+            if (idx + 72 > max_len) {
+                /* 4 (header) + 8 (vendor) + 60 (model & serial) */
+                goto out;
+            }
+            buf[idx++] = 0x02; /* Ascii */
+            buf[idx++] = 0x01; /* T10 Vendor */
+            buf[idx++] = 0x00;
+            buf[idx++] = 68;
+            padstr8(buf + idx, 8, "ATA"); /* Generic T10 vendor */
+            idx += 8;
+            padstr8(buf + idx, 40, s->drive_model_str);
+            idx += 40;
+            padstr8(buf + idx, 20, s->drive_serial_str);
+            idx += 20;
+
+            /* Entry 3: WWN */
+            if (s->wwn && (idx + 12 <= max_len)) {
+                /* 4 byte header + 8 byte wwn */
+                buf[idx++] = 0x01; /* Binary */
+                buf[idx++] = 0x03; /* NAA */
+                buf[idx++] = 0x00;
+                buf[idx++] = 0x08;
+                stq_be_p(&buf[idx], s->wwn);
+                idx += 8;
+            }
+            break;
+
+        default:
+            /* SPC-3, revision 23 sec. 6.4 */
+            ide_atapi_cmd_error(s, ILLEGAL_REQUEST,
+                                ASC_INV_FIELD_IN_CMD_PACKET);
+            return;
+        }
+    } else {
+        preamble_len = 5;
+        size_idx = 4;
+
+        buf[0] = 0x05; /* CD-ROM */
+        buf[1] = 0x80; /* removable */
+        buf[2] = 0x00; /* ISO */
+        buf[3] = 0x21; /* ATAPI-2 (XXX: put ATAPI-4 ?) */
+        /* buf[size_idx] set below. */
+        buf[5] = 0;    /* reserved */
+        buf[6] = 0;    /* reserved */
+        buf[7] = 0;    /* reserved */
+        padstr8(buf + 8, 8, "QEMU");
+        padstr8(buf + 16, 16, "QEMU DVD-ROM");
+        padstr8(buf + 32, 4, s->version);
+        idx = 36;
+    }
+
+ out:
+    buf[size_idx] = idx - preamble_len;
+    ide_atapi_cmd_reply(s, idx, max_len);
+    return;
 }
 
 static void cmd_get_configuration(IDEState *s, uint8_t *buf)
diff --git a/hw/ide/internal.h b/hw/ide/internal.h
index 8a3eca40d27a6586a9879ffce49e3ad1f4375529..c998003bf3b66f59bda7d5b36924f3a95a94baab 100644
--- a/hw/ide/internal.h
+++ b/hw/ide/internal.h
@@ -296,6 +296,7 @@ typedef struct IDEDMAOps IDEDMAOps;
 #define ASC_INCOMPATIBLE_FORMAT              0x30
 #define ASC_MEDIUM_NOT_PRESENT               0x3a
 #define ASC_SAVING_PARAMETERS_NOT_SUPPORTED  0x39
+#define ASC_DATA_PHASE_ERROR                 0x4b
 #define ASC_MEDIA_REMOVAL_PREVENTED          0x53
 
 #define CFA_NO_ERROR            0x00
diff --git a/include/block/block.h b/include/block/block.h
index 6e7275d95b5eeff1f5fd6b62cd1c9949e6cc2968..3082d2b80e55fd765989fc1de08f1b1bfc0d1a59 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -133,7 +133,8 @@ typedef enum BlockOpType {
     BLOCK_OP_TYPE_BACKUP_SOURCE,
     BLOCK_OP_TYPE_BACKUP_TARGET,
     BLOCK_OP_TYPE_CHANGE,
-    BLOCK_OP_TYPE_COMMIT,
+    BLOCK_OP_TYPE_COMMIT_SOURCE,
+    BLOCK_OP_TYPE_COMMIT_TARGET,
     BLOCK_OP_TYPE_DATAPLANE,
     BLOCK_OP_TYPE_DRIVE_DEL,
     BLOCK_OP_TYPE_EJECT,
@@ -396,7 +397,11 @@ const char *bdrv_get_encrypted_filename(BlockDriverState *bs);
 void bdrv_get_backing_filename(BlockDriverState *bs,
                                char *filename, int filename_size);
 void bdrv_get_full_backing_filename(BlockDriverState *bs,
-                                    char *dest, size_t sz);
+                                    char *dest, size_t sz, Error **errp);
+void bdrv_get_full_backing_filename_from_filename(const char *backed,
+                                                  const char *backing,
+                                                  char *dest, size_t sz,
+                                                  Error **errp);
 int bdrv_is_snapshot(BlockDriverState *bs);
 
 int path_has_protocol(const char *path);
@@ -434,8 +439,10 @@ BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs, int granularity,
 void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap);
 BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs);
 int bdrv_get_dirty(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, int64_t sector);
-void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector, int nr_sectors);
-void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector, int nr_sectors);
+void bdrv_set_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap,
+                           int64_t cur_sector, int nr_sectors);
+void bdrv_reset_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap,
+                             int64_t cur_sector, int nr_sectors);
 void bdrv_dirty_iter_init(BlockDriverState *bs,
                           BdrvDirtyBitmap *bitmap, struct HBitmapIter *hbi);
 int64_t bdrv_get_dirty_count(BlockDriverState *bs, BdrvDirtyBitmap *bitmap);
diff --git a/include/block/coroutine.h b/include/block/coroutine.h
index 793df0ef8bdffe20e262df3a3b04f328bfaaac9e..20c027a7fd0f7c60765276a525b417bc133c4ba6 100644
--- a/include/block/coroutine.h
+++ b/include/block/coroutine.h
@@ -216,14 +216,4 @@ void coroutine_fn co_aio_sleep_ns(AioContext *ctx, QEMUClockType type,
  */
 void coroutine_fn yield_until_fd_readable(int fd);
 
-/**
- * Add or subtract from the coroutine pool size
- *
- * The coroutine implementation keeps a pool of coroutines to be reused by
- * qemu_coroutine_create().  This makes coroutine creation cheap.  Heavy
- * coroutine users should call this to reserve pool space.  Call it again with
- * a negative number to release pool space.
- */
-void qemu_coroutine_adjust_pool_size(int n);
-
 #endif /* QEMU_COROUTINE_H */
diff --git a/include/qemu/queue.h b/include/qemu/queue.h
index 42bcadfbb12c1ff48a3a0c680ec72efeafe9205a..a98eb3ad795537b31568d5881eecd7f7089abde9 100644
--- a/include/qemu/queue.h
+++ b/include/qemu/queue.h
@@ -191,8 +191,19 @@ struct {                                                                \
 } while (/*CONSTCOND*/0)
 
 #define QSLIST_INSERT_HEAD(head, elm, field) do {                        \
-        (elm)->field.sle_next = (head)->slh_first;                      \
-        (head)->slh_first = (elm);                                      \
+        (elm)->field.sle_next = (head)->slh_first;                       \
+        (head)->slh_first = (elm);                                       \
+} while (/*CONSTCOND*/0)
+
+#define QSLIST_INSERT_HEAD_ATOMIC(head, elm, field) do {                   \
+        do {                                                               \
+            (elm)->field.sle_next = (head)->slh_first;                     \
+        } while (atomic_cmpxchg(&(head)->slh_first, (elm)->field.sle_next, \
+                               (elm)) != (elm)->field.sle_next);           \
+} while (/*CONSTCOND*/0)
+
+#define QSLIST_MOVE_ATOMIC(dest, src) do {                               \
+        (dest)->slh_first = atomic_xchg(&(src)->slh_first, NULL);        \
 } while (/*CONSTCOND*/0)
 
 #define QSLIST_REMOVE_HEAD(head, field) do {                             \
diff --git a/include/qemu/thread.h b/include/qemu/thread.h
index f7e3b9b290247ed52f3c59dc47fbabfe44bafea5..e89fdc9785033b0bd00e494f091bc6c06074d986 100644
--- a/include/qemu/thread.h
+++ b/include/qemu/thread.h
@@ -61,4 +61,8 @@ bool qemu_thread_is_self(QemuThread *thread);
 void qemu_thread_exit(void *retval);
 void qemu_thread_naming(bool enable);
 
+struct Notifier;
+void qemu_thread_atexit_add(struct Notifier *notifier);
+void qemu_thread_atexit_remove(struct Notifier *notifier);
+
 #endif
diff --git a/migration/block.c b/migration/block.c
index 74d9eb125c7585bf10eae7060e98567d942323b6..0c7610600be4953a077f8ec2069db39a576478a2 100644
--- a/migration/block.c
+++ b/migration/block.c
@@ -303,7 +303,7 @@ static int mig_save_device_bulk(QEMUFile *f, BlkMigDevState *bmds)
     blk->aiocb = bdrv_aio_readv(bs, cur_sector, &blk->qiov,
                                 nr_sectors, blk_mig_read_cb, blk);
 
-    bdrv_reset_dirty(bs, cur_sector, nr_sectors);
+    bdrv_reset_dirty_bitmap(bs, bmds->dirty_bitmap, cur_sector, nr_sectors);
     qemu_mutex_unlock_iothread();
 
     bmds->cur_sector = cur_sector + nr_sectors;
@@ -496,7 +496,8 @@ static int mig_save_device_dirty(QEMUFile *f, BlkMigDevState *bmds,
                 g_free(blk);
             }
 
-            bdrv_reset_dirty(bmds->bs, sector, nr_sectors);
+            bdrv_reset_dirty_bitmap(bmds->bs, bmds->dirty_bitmap, sector,
+                                    nr_sectors);
             break;
         }
         sector += BDRV_SECTORS_PER_DIRTY_CHUNK;
@@ -765,8 +766,8 @@ static uint64_t block_save_pending(QEMUFile *f, void *opaque, uint64_t max_size)
                        block_mig_state.read_done * BLOCK_SIZE;
 
     /* Report at least one block pending during bulk phase */
-    if (pending == 0 && !block_mig_state.bulk_completed) {
-        pending = BLOCK_SIZE;
+    if (pending <= max_size && !block_mig_state.bulk_completed) {
+        pending = max_size + BLOCK_SIZE;
     }
     blk_mig_unlock();
     qemu_mutex_unlock_iothread();
diff --git a/qapi-schema.json b/qapi-schema.json
index 563b4ad98a813a395e830b647bb1723baf88a6fb..fbfc52f94dec7e0197c527603c53a58688d14aff 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -1254,11 +1254,19 @@
 #
 # A discriminated record of operations that can be performed with
 # @transaction.
+#
+# Since 1.1
+#
+# drive-backup since 1.6
+# abort since 1.6
+# blockdev-snapshot-internal-sync since 1.7
+# blockdev-backup since 2.3
 ##
 { 'union': 'TransactionAction',
   'data': {
        'blockdev-snapshot-sync': 'BlockdevSnapshot',
        'drive-backup': 'DriveBackup',
+       'blockdev-backup': 'BlockdevBackup',
        'abort': 'Abort',
        'blockdev-snapshot-internal-sync': 'BlockdevSnapshotInternal'
    } }
diff --git a/qapi/block-core.json b/qapi/block-core.json
index 6e8db15861dae24c4df7f98c709b389e7c69e128..80984d1660af56d46b41d53c6b01c35116945cb6 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -423,7 +423,7 @@
 # @device: #optional If the stats are for a virtual block device, the name
 #          corresponding to the virtual block device.
 #
-# @device: #optional The node name of the device. (Since 2.3)
+# @node-name: #optional The node name of the device. (Since 2.3)
 #
 # @stats:  A @BlockDeviceStats for the device.
 #
@@ -702,6 +702,41 @@
             '*on-source-error': 'BlockdevOnError',
             '*on-target-error': 'BlockdevOnError' } }
 
+##
+# @BlockdevBackup
+#
+# @device: the name of the device which should be copied.
+#
+# @target: the name of the backup target device.
+#
+# @sync: what parts of the disk image should be copied to the destination
+#        (all the disk, only the sectors allocated in the topmost image, or
+#        only new I/O).
+#
+# @speed: #optional the maximum speed, in bytes per second. The default is 0,
+#         for unlimited.
+#
+# @on-source-error: #optional the action to take on an error on the source,
+#                   default 'report'.  'stop' and 'enospc' can only be used
+#                   if the block device supports io-status (see BlockInfo).
+#
+# @on-target-error: #optional the action to take on an error on the target,
+#                   default 'report' (no limitations, since this applies to
+#                   a different block device than @device).
+#
+# Note that @on-source-error and @on-target-error only affect background I/O.
+# If an error occurs during a guest write request, the device's rerror/werror
+# actions will be used.
+#
+# Since: 2.3
+##
+{ 'type': 'BlockdevBackup',
+  'data': { 'device': 'str', 'target': 'str',
+            'sync': 'MirrorSyncMode',
+            '*speed': 'int',
+            '*on-source-error': 'BlockdevOnError',
+            '*on-target-error': 'BlockdevOnError' } }
+
 ##
 # @blockdev-snapshot-sync
 #
@@ -821,6 +856,25 @@
 ##
 { 'command': 'drive-backup', 'data': 'DriveBackup' }
 
+##
+# @blockdev-backup
+#
+# Start a point-in-time copy of a block device to a new destination.  The
+# status of ongoing blockdev-backup operations can be checked with
+# query-block-jobs where the BlockJobInfo.type field has the value 'backup'.
+# The operation can be stopped before it has completed using the
+# block-job-cancel command.
+#
+# For the arguments, see the documentation of BlockdevBackup.
+#
+# Returns: Nothing on success.
+#          If @device or @target is not a valid block device, DeviceNotFound.
+#
+# Since 2.3
+##
+{ 'command': 'blockdev-backup', 'data': 'BlockdevBackup' }
+
+
 ##
 # @query-named-block-nodes
 #
diff --git a/qemu-coroutine.c b/qemu-coroutine.c
index bd574aa1b57123acbc2b32300af8cb9d0248aa94..525247b0501dd2eff6a2185ac9fff08e57c2c216 100644
--- a/qemu-coroutine.c
+++ b/qemu-coroutine.c
@@ -15,31 +15,59 @@
 #include "trace.h"
 #include "qemu-common.h"
 #include "qemu/thread.h"
+#include "qemu/atomic.h"
 #include "block/coroutine.h"
 #include "block/coroutine_int.h"
 
 enum {
-    POOL_DEFAULT_SIZE = 64,
+    POOL_BATCH_SIZE = 64,
 };
 
 /** Free list to speed up creation */
-static QemuMutex pool_lock;
-static QSLIST_HEAD(, Coroutine) pool = QSLIST_HEAD_INITIALIZER(pool);
-static unsigned int pool_size;
-static unsigned int pool_max_size = POOL_DEFAULT_SIZE;
+static QSLIST_HEAD(, Coroutine) release_pool = QSLIST_HEAD_INITIALIZER(pool);
+static unsigned int release_pool_size;
+static __thread QSLIST_HEAD(, Coroutine) alloc_pool = QSLIST_HEAD_INITIALIZER(pool);
+static __thread unsigned int alloc_pool_size;
+static __thread Notifier coroutine_pool_cleanup_notifier;
+
+static void coroutine_pool_cleanup(Notifier *n, void *value)
+{
+    Coroutine *co;
+    Coroutine *tmp;
+
+    QSLIST_FOREACH_SAFE(co, &alloc_pool, pool_next, tmp) {
+        QSLIST_REMOVE_HEAD(&alloc_pool, pool_next);
+        qemu_coroutine_delete(co);
+    }
+}
 
 Coroutine *qemu_coroutine_create(CoroutineEntry *entry)
 {
     Coroutine *co = NULL;
 
     if (CONFIG_COROUTINE_POOL) {
-        qemu_mutex_lock(&pool_lock);
-        co = QSLIST_FIRST(&pool);
+        co = QSLIST_FIRST(&alloc_pool);
+        if (!co) {
+            if (release_pool_size > POOL_BATCH_SIZE) {
+                /* Slow path; a good place to register the destructor, too.  */
+                if (!coroutine_pool_cleanup_notifier.notify) {
+                    coroutine_pool_cleanup_notifier.notify = coroutine_pool_cleanup;
+                    qemu_thread_atexit_add(&coroutine_pool_cleanup_notifier);
+                }
+
+                /* This is not exact; there could be a little skew between
+                 * release_pool_size and the actual size of release_pool.  But
+                 * it is just a heuristic, it does not need to be perfect.
+                 */
+                alloc_pool_size = atomic_xchg(&release_pool_size, 0);
+                QSLIST_MOVE_ATOMIC(&alloc_pool, &release_pool);
+                co = QSLIST_FIRST(&alloc_pool);
+            }
+        }
         if (co) {
-            QSLIST_REMOVE_HEAD(&pool, pool_next);
-            pool_size--;
+            QSLIST_REMOVE_HEAD(&alloc_pool, pool_next);
+            alloc_pool_size--;
         }
-        qemu_mutex_unlock(&pool_lock);
     }
 
     if (!co) {
@@ -53,39 +81,24 @@ Coroutine *qemu_coroutine_create(CoroutineEntry *entry)
 
 static void coroutine_delete(Coroutine *co)
 {
+    co->caller = NULL;
+
     if (CONFIG_COROUTINE_POOL) {
-        qemu_mutex_lock(&pool_lock);
-        if (pool_size < pool_max_size) {
-            QSLIST_INSERT_HEAD(&pool, co, pool_next);
-            co->caller = NULL;
-            pool_size++;
-            qemu_mutex_unlock(&pool_lock);
+        if (release_pool_size < POOL_BATCH_SIZE * 2) {
+            QSLIST_INSERT_HEAD_ATOMIC(&release_pool, co, pool_next);
+            atomic_inc(&release_pool_size);
+            return;
+        }
+        if (alloc_pool_size < POOL_BATCH_SIZE) {
+            QSLIST_INSERT_HEAD(&alloc_pool, co, pool_next);
+            alloc_pool_size++;
             return;
         }
-        qemu_mutex_unlock(&pool_lock);
     }
 
     qemu_coroutine_delete(co);
 }
 
-static void __attribute__((constructor)) coroutine_pool_init(void)
-{
-    qemu_mutex_init(&pool_lock);
-}
-
-static void __attribute__((destructor)) coroutine_pool_cleanup(void)
-{
-    Coroutine *co;
-    Coroutine *tmp;
-
-    QSLIST_FOREACH_SAFE(co, &pool, pool_next, tmp) {
-        QSLIST_REMOVE_HEAD(&pool, pool_next);
-        qemu_coroutine_delete(co);
-    }
-
-    qemu_mutex_destroy(&pool_lock);
-}
-
 static void coroutine_swap(Coroutine *from, Coroutine *to)
 {
     CoroutineAction ret;
@@ -137,23 +150,3 @@ void coroutine_fn qemu_coroutine_yield(void)
     self->caller = NULL;
     coroutine_swap(self, to);
 }
-
-void qemu_coroutine_adjust_pool_size(int n)
-{
-    qemu_mutex_lock(&pool_lock);
-
-    pool_max_size += n;
-
-    /* Callers should never take away more than they added */
-    assert(pool_max_size >= POOL_DEFAULT_SIZE);
-
-    /* Trim oversized pool down to new max */
-    while (pool_size > pool_max_size) {
-        Coroutine *co = QSLIST_FIRST(&pool);
-        QSLIST_REMOVE_HEAD(&pool, pool_next);
-        pool_size--;
-        qemu_coroutine_delete(co);
-    }
-
-    qemu_mutex_unlock(&pool_lock);
-}
diff --git a/qmp-commands.hx b/qmp-commands.hx
index 6945d30198c99d374fe3dce42fe25ab1e2c0fff3..8957201f73ae14148652085f7c8b1e7bd1124699 100644
--- a/qmp-commands.hx
+++ b/qmp-commands.hx
@@ -1094,6 +1094,48 @@ Example:
                                                "sync": "full",
                                                "target": "backup.img" } }
 <- { "return": {} }
+
+EQMP
+
+    {
+        .name       = "blockdev-backup",
+        .args_type  = "sync:s,device:B,target:B,speed:i?,"
+                      "on-source-error:s?,on-target-error:s?",
+        .mhandler.cmd_new = qmp_marshal_input_blockdev_backup,
+    },
+
+SQMP
+blockdev-backup
+---------------
+
+The device version of drive-backup: this command takes an existing named device
+as backup target.
+
+Arguments:
+
+- "device": the name of the device which should be copied.
+            (json-string)
+- "target": the name of the backup target device. (json-string)
+- "sync": what parts of the disk image should be copied to the destination;
+          possibilities include "full" for all the disk, "top" for only the
+          sectors allocated in the topmost image, or "none" to only replicate
+          new I/O (MirrorSyncMode).
+- "speed": the maximum speed, in bytes per second (json-int, optional)
+- "on-source-error": the action to take on an error on the source, default
+                     'report'.  'stop' and 'enospc' can only be used
+                     if the block device supports io-status.
+                     (BlockdevOnError, optional)
+- "on-target-error": the action to take on an error on the target, default
+                     'report' (no limitations, since this applies to
+                     a different block device than device).
+                     (BlockdevOnError, optional)
+
+Example:
+-> { "execute": "blockdev-backup", "arguments": { "device": "src-id",
+                                                  "sync": "full",
+                                                  "target": "tgt-id" } }
+<- { "return": {} }
+
 EQMP
 
     {
diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index 053e4320fc4969219dade2cbe96ba0f7ac69a2e7..5df61f9aa97b8e0a05a33199ccd80dfe473a21ba 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -1639,7 +1639,13 @@ sub process {
 			#print "realcnt<$realcnt> ctx_cnt<$ctx_cnt>\n";
 			#print "pre<$pre_ctx>\nline<$line>\nctx<$ctx>\nnext<$lines[$ctx_ln - 1]>\n";
 
-			if ($ctx !~ /{\s*/ && defined($lines[$ctx_ln -1]) && $lines[$ctx_ln - 1] =~ /^\+\s*{/) {
+			# The length of the "previous line" is checked against 80 because it
+			# includes the + at the beginning of the line (if the actual line has
+			# 79 or 80 characters, it is no longer possible to add a space and an
+			# opening brace there)
+			if ($#ctx == 0 && $ctx !~ /{\s*/ &&
+			    defined($lines[$ctx_ln - 1]) && $lines[$ctx_ln - 1] =~ /^\+\s*{/ &&
+			    defined($lines[$ctx_ln - 2]) && length($lines[$ctx_ln - 2]) < 80) {
 				ERROR("that open brace { should be on the previous line\n" .
 					"$here\n$ctx\n$rawlines[$ctx_ln - 1]\n");
 			}
@@ -2542,7 +2548,10 @@ sub process {
 
 					substr($block, 0, length($cond), '');
 
-					$seen++ if ($block =~ /^\s*{/);
+					my $spaced_block = $block;
+					$spaced_block =~ s/\n\+/ /g;
+
+					$seen++ if ($spaced_block =~ /^\s*{/);
 
                                         print "APW: cond<$cond> block<$block> allowed<$allowed>\n"
                                             if $dbg_adv_apw;
diff --git a/tests/Makefile b/tests/Makefile
index e4ddb6a8c14bc2e00ad1a3dcea7f6e8cdc4a5dae..c2e2e52f22a53a9d1d099c3f6643d9f69b399330 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -298,7 +298,7 @@ tests/test-opts-visitor$(EXESUF): tests/test-opts-visitor.o $(test-qapi-obj-y) l
 tests/test-mul64$(EXESUF): tests/test-mul64.o libqemuutil.a
 tests/test-bitops$(EXESUF): tests/test-bitops.o libqemuutil.a
 
-libqos-obj-y = tests/libqos/pci.o tests/libqos/fw_cfg.o
+libqos-obj-y = tests/libqos/pci.o tests/libqos/fw_cfg.o tests/libqos/malloc.o
 libqos-obj-y += tests/libqos/i2c.o
 libqos-pc-obj-y = $(libqos-obj-y) tests/libqos/pci-pc.o
 libqos-pc-obj-y += tests/libqos/malloc-pc.o
diff --git a/tests/libqos/malloc-pc.c b/tests/libqos/malloc-pc.c
index f4218c64515223368afe229ee69f2c68c23cad21..c9c48fddc9893bf7da408114e31e2e2f7e46a65e 100644
--- a/tests/libqos/malloc-pc.c
+++ b/tests/libqos/malloc-pc.c
@@ -17,296 +17,28 @@
 #include "hw/nvram/fw_cfg.h"
 
 #include "qemu-common.h"
-#include "qemu/queue.h"
 #include <glib.h>
 
 #define PAGE_SIZE (4096)
 
-#define MLIST_ENTNAME entries
-typedef QTAILQ_HEAD(MemList, MemBlock) MemList;
-typedef struct MemBlock {
-    QTAILQ_ENTRY(MemBlock) MLIST_ENTNAME;
-    uint64_t size;
-    uint64_t addr;
-} MemBlock;
-
-typedef struct PCAlloc
-{
-    QGuestAllocator alloc;
-    PCAllocOpts opts;
-    uint64_t start;
-    uint64_t end;
-
-    MemList used;
-    MemList free;
-} PCAlloc;
-
-static MemBlock *mlist_new(uint64_t addr, uint64_t size)
-{
-    MemBlock *block;
-
-    if (!size) {
-        return NULL;
-    }
-    block = g_malloc0(sizeof(MemBlock));
-
-    block->addr = addr;
-    block->size = size;
-
-    return block;
-}
-
-static void mlist_delete(MemList *list, MemBlock *node)
-{
-    g_assert(list && node);
-    QTAILQ_REMOVE(list, node, MLIST_ENTNAME);
-    g_free(node);
-}
-
-static MemBlock *mlist_find_key(MemList *head, uint64_t addr)
-{
-    MemBlock *node;
-    QTAILQ_FOREACH(node, head, MLIST_ENTNAME) {
-        if (node->addr == addr) {
-            return node;
-        }
-    }
-    return NULL;
-}
-
-static MemBlock *mlist_find_space(MemList *head, uint64_t size)
-{
-    MemBlock *node;
-
-    QTAILQ_FOREACH(node, head, MLIST_ENTNAME) {
-        if (node->size >= size) {
-            return node;
-        }
-    }
-    return NULL;
-}
-
-static MemBlock *mlist_sort_insert(MemList *head, MemBlock *insr)
-{
-    MemBlock *node;
-    g_assert(head && insr);
-
-    QTAILQ_FOREACH(node, head, MLIST_ENTNAME) {
-        if (insr->addr < node->addr) {
-            QTAILQ_INSERT_BEFORE(node, insr, MLIST_ENTNAME);
-            return insr;
-        }
-    }
-
-    QTAILQ_INSERT_TAIL(head, insr, MLIST_ENTNAME);
-    return insr;
-}
-
-static inline uint64_t mlist_boundary(MemBlock *node)
-{
-    return node->size + node->addr;
-}
-
-static MemBlock *mlist_join(MemList *head, MemBlock *left, MemBlock *right)
-{
-    g_assert(head && left && right);
-
-    left->size += right->size;
-    mlist_delete(head, right);
-    return left;
-}
-
-static void mlist_coalesce(MemList *head, MemBlock *node)
-{
-    g_assert(node);
-    MemBlock *left;
-    MemBlock *right;
-    char merge;
-
-    do {
-        merge = 0;
-        left = QTAILQ_PREV(node, MemList, MLIST_ENTNAME);
-        right = QTAILQ_NEXT(node, MLIST_ENTNAME);
-
-        /* clowns to the left of me */
-        if (left && mlist_boundary(left) == node->addr) {
-            node = mlist_join(head, left, node);
-            merge = 1;
-        }
-
-        /* jokers to the right */
-        if (right && mlist_boundary(node) == right->addr) {
-            node = mlist_join(head, node, right);
-            merge = 1;
-        }
-
-    } while (merge);
-}
-
-static uint64_t pc_mlist_fulfill(PCAlloc *s, MemBlock *freenode, uint64_t size)
-{
-    uint64_t addr;
-    MemBlock *usednode;
-
-    g_assert(freenode);
-    g_assert_cmpint(freenode->size, >=, size);
-
-    addr = freenode->addr;
-    if (freenode->size == size) {
-        /* re-use this freenode as our used node */
-        QTAILQ_REMOVE(&s->free, freenode, MLIST_ENTNAME);
-        usednode = freenode;
-    } else {
-        /* adjust the free node and create a new used node */
-        freenode->addr += size;
-        freenode->size -= size;
-        usednode = mlist_new(addr, size);
-    }
-
-    mlist_sort_insert(&s->used, usednode);
-    return addr;
-}
-
-/* To assert the correctness of the list.
- * Used only if PC_ALLOC_PARANOID is set. */
-static void pc_mlist_check(PCAlloc *s)
-{
-    MemBlock *node;
-    uint64_t addr = s->start > 0 ? s->start - 1 : 0;
-    uint64_t next = s->start;
-
-    QTAILQ_FOREACH(node, &s->free, MLIST_ENTNAME) {
-        g_assert_cmpint(node->addr, >, addr);
-        g_assert_cmpint(node->addr, >=, next);
-        addr = node->addr;
-        next = node->addr + node->size;
-    }
-
-    addr = s->start > 0 ? s->start - 1 : 0;
-    next = s->start;
-    QTAILQ_FOREACH(node, &s->used, MLIST_ENTNAME) {
-        g_assert_cmpint(node->addr, >, addr);
-        g_assert_cmpint(node->addr, >=, next);
-        addr = node->addr;
-        next = node->addr + node->size;
-    }
-}
-
-static uint64_t pc_mlist_alloc(PCAlloc *s, uint64_t size)
-{
-    MemBlock *node;
-
-    node = mlist_find_space(&s->free, size);
-    if (!node) {
-        fprintf(stderr, "Out of guest memory.\n");
-        g_assert_not_reached();
-    }
-    return pc_mlist_fulfill(s, node, size);
-}
-
-static void pc_mlist_free(PCAlloc *s, uint64_t addr)
-{
-    MemBlock *node;
-
-    if (addr == 0) {
-        return;
-    }
-
-    node = mlist_find_key(&s->used, addr);
-    if (!node) {
-        fprintf(stderr, "Error: no record found for an allocation at "
-                "0x%016" PRIx64 ".\n",
-                addr);
-        g_assert_not_reached();
-    }
-
-    /* Rip it out of the used list and re-insert back into the free list. */
-    QTAILQ_REMOVE(&s->used, node, MLIST_ENTNAME);
-    mlist_sort_insert(&s->free, node);
-    mlist_coalesce(&s->free, node);
-}
-
-static uint64_t pc_alloc(QGuestAllocator *allocator, size_t size)
-{
-    PCAlloc *s = container_of(allocator, PCAlloc, alloc);
-    uint64_t rsize = size;
-    uint64_t naddr;
-
-    rsize += (PAGE_SIZE - 1);
-    rsize &= -PAGE_SIZE;
-    g_assert_cmpint((s->start + rsize), <=, s->end);
-    g_assert_cmpint(rsize, >=, size);
-
-    naddr = pc_mlist_alloc(s, rsize);
-    if (s->opts & PC_ALLOC_PARANOID) {
-        pc_mlist_check(s);
-    }
-
-    return naddr;
-}
-
-static void pc_free(QGuestAllocator *allocator, uint64_t addr)
-{
-    PCAlloc *s = container_of(allocator, PCAlloc, alloc);
-
-    pc_mlist_free(s, addr);
-    if (s->opts & PC_ALLOC_PARANOID) {
-        pc_mlist_check(s);
-    }
-}
-
 /*
  * Mostly for valgrind happiness, but it does offer
  * a chokepoint for debugging guest memory leaks, too.
  */
 void pc_alloc_uninit(QGuestAllocator *allocator)
 {
-    PCAlloc *s = container_of(allocator, PCAlloc, alloc);
-    MemBlock *node;
-    MemBlock *tmp;
-    PCAllocOpts mask;
-
-    /* Check for guest leaks, and destroy the list. */
-    QTAILQ_FOREACH_SAFE(node, &s->used, MLIST_ENTNAME, tmp) {
-        if (s->opts & (PC_ALLOC_LEAK_WARN | PC_ALLOC_LEAK_ASSERT)) {
-            fprintf(stderr, "guest malloc leak @ 0x%016" PRIx64 "; "
-                    "size 0x%016" PRIx64 ".\n",
-                    node->addr, node->size);
-        }
-        if (s->opts & (PC_ALLOC_LEAK_ASSERT)) {
-            g_assert_not_reached();
-        }
-        g_free(node);
-    }
-
-    /* If we have previously asserted that there are no leaks, then there
-     * should be only one node here with a specific address and size. */
-    mask = PC_ALLOC_LEAK_ASSERT | PC_ALLOC_PARANOID;
-    QTAILQ_FOREACH_SAFE(node, &s->free, MLIST_ENTNAME, tmp) {
-        if ((s->opts & mask) == mask) {
-            if ((node->addr != s->start) ||
-                (node->size != s->end - s->start)) {
-                fprintf(stderr, "Free list is corrupted.\n");
-                g_assert_not_reached();
-            }
-        }
-
-        g_free(node);
-    }
-
-    g_free(s);
+    alloc_uninit(allocator);
 }
 
-QGuestAllocator *pc_alloc_init_flags(PCAllocOpts flags)
+QGuestAllocator *pc_alloc_init_flags(QAllocOpts flags)
 {
-    PCAlloc *s = g_malloc0(sizeof(*s));
+    QGuestAllocator *s = g_malloc0(sizeof(*s));
     uint64_t ram_size;
     QFWCFG *fw_cfg = pc_fw_cfg_init();
     MemBlock *node;
 
     s->opts = flags;
-    s->alloc.alloc = pc_alloc;
-    s->alloc.free = pc_free;
+    s->page_size = PAGE_SIZE;
 
     ram_size = qfw_cfg_get_u64(fw_cfg, FW_CFG_RAM_SIZE);
 
@@ -325,10 +57,10 @@ QGuestAllocator *pc_alloc_init_flags(PCAllocOpts flags)
     node = mlist_new(s->start, s->end - s->start);
     QTAILQ_INSERT_HEAD(&s->free, node, MLIST_ENTNAME);
 
-    return &s->alloc;
+    return s;
 }
 
 inline QGuestAllocator *pc_alloc_init(void)
 {
-    return pc_alloc_init_flags(PC_ALLOC_NO_FLAGS);
+    return pc_alloc_init_flags(ALLOC_NO_FLAGS);
 }
diff --git a/tests/libqos/malloc-pc.h b/tests/libqos/malloc-pc.h
index 9f525e3b999399e3acfe638392da57b0b868a020..86ab9f0429b02fdfd4a4696903addb5d6389393c 100644
--- a/tests/libqos/malloc-pc.h
+++ b/tests/libqos/malloc-pc.h
@@ -15,15 +15,8 @@
 
 #include "libqos/malloc.h"
 
-typedef enum {
-    PC_ALLOC_NO_FLAGS    = 0x00,
-    PC_ALLOC_LEAK_WARN   = 0x01,
-    PC_ALLOC_LEAK_ASSERT = 0x02,
-    PC_ALLOC_PARANOID    = 0x04
-} PCAllocOpts;
-
 QGuestAllocator *pc_alloc_init(void);
-QGuestAllocator *pc_alloc_init_flags(PCAllocOpts flags);
-void             pc_alloc_uninit(QGuestAllocator *allocator);
+QGuestAllocator *pc_alloc_init_flags(QAllocOpts flags);
+void pc_alloc_uninit(QGuestAllocator *allocator);
 
 #endif
diff --git a/tests/libqos/malloc.c b/tests/libqos/malloc.c
new file mode 100644
index 0000000000000000000000000000000000000000..5debf184976eb76fd9c73dc6be5f7a9effa3dcf8
--- /dev/null
+++ b/tests/libqos/malloc.c
@@ -0,0 +1,270 @@
+/*
+ * libqos malloc support
+ *
+ * Copyright (c) 2014
+ *
+ * Author:
+ *  John Snow <jsnow@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "libqos/malloc.h"
+#include "qemu-common.h"
+#include <stdio.h>
+#include <inttypes.h>
+#include <glib.h>
+
+static void mlist_delete(MemList *list, MemBlock *node)
+{
+    g_assert(list && node);
+    QTAILQ_REMOVE(list, node, MLIST_ENTNAME);
+    g_free(node);
+}
+
+static MemBlock *mlist_find_key(MemList *head, uint64_t addr)
+{
+    MemBlock *node;
+    QTAILQ_FOREACH(node, head, MLIST_ENTNAME) {
+        if (node->addr == addr) {
+            return node;
+        }
+    }
+    return NULL;
+}
+
+static MemBlock *mlist_find_space(MemList *head, uint64_t size)
+{
+    MemBlock *node;
+
+    QTAILQ_FOREACH(node, head, MLIST_ENTNAME) {
+        if (node->size >= size) {
+            return node;
+        }
+    }
+    return NULL;
+}
+
+static MemBlock *mlist_sort_insert(MemList *head, MemBlock *insr)
+{
+    MemBlock *node;
+    g_assert(head && insr);
+
+    QTAILQ_FOREACH(node, head, MLIST_ENTNAME) {
+        if (insr->addr < node->addr) {
+            QTAILQ_INSERT_BEFORE(node, insr, MLIST_ENTNAME);
+            return insr;
+        }
+    }
+
+    QTAILQ_INSERT_TAIL(head, insr, MLIST_ENTNAME);
+    return insr;
+}
+
+static inline uint64_t mlist_boundary(MemBlock *node)
+{
+    return node->size + node->addr;
+}
+
+static MemBlock *mlist_join(MemList *head, MemBlock *left, MemBlock *right)
+{
+    g_assert(head && left && right);
+
+    left->size += right->size;
+    mlist_delete(head, right);
+    return left;
+}
+
+static void mlist_coalesce(MemList *head, MemBlock *node)
+{
+    g_assert(node);
+    MemBlock *left;
+    MemBlock *right;
+    char merge;
+
+    do {
+        merge = 0;
+        left = QTAILQ_PREV(node, MemList, MLIST_ENTNAME);
+        right = QTAILQ_NEXT(node, MLIST_ENTNAME);
+
+        /* clowns to the left of me */
+        if (left && mlist_boundary(left) == node->addr) {
+            node = mlist_join(head, left, node);
+            merge = 1;
+        }
+
+        /* jokers to the right */
+        if (right && mlist_boundary(node) == right->addr) {
+            node = mlist_join(head, node, right);
+            merge = 1;
+        }
+
+    } while (merge);
+}
+
+static uint64_t mlist_fulfill(QGuestAllocator *s, MemBlock *freenode,
+                                                                uint64_t size)
+{
+    uint64_t addr;
+    MemBlock *usednode;
+
+    g_assert(freenode);
+    g_assert_cmpint(freenode->size, >=, size);
+
+    addr = freenode->addr;
+    if (freenode->size == size) {
+        /* re-use this freenode as our used node */
+        QTAILQ_REMOVE(&s->free, freenode, MLIST_ENTNAME);
+        usednode = freenode;
+    } else {
+        /* adjust the free node and create a new used node */
+        freenode->addr += size;
+        freenode->size -= size;
+        usednode = mlist_new(addr, size);
+    }
+
+    mlist_sort_insert(&s->used, usednode);
+    return addr;
+}
+
+/* To assert the correctness of the list.
+ * Used only if ALLOC_PARANOID is set. */
+static void mlist_check(QGuestAllocator *s)
+{
+    MemBlock *node;
+    uint64_t addr = s->start > 0 ? s->start - 1 : 0;
+    uint64_t next = s->start;
+
+    QTAILQ_FOREACH(node, &s->free, MLIST_ENTNAME) {
+        g_assert_cmpint(node->addr, >, addr);
+        g_assert_cmpint(node->addr, >=, next);
+        addr = node->addr;
+        next = node->addr + node->size;
+    }
+
+    addr = s->start > 0 ? s->start - 1 : 0;
+    next = s->start;
+    QTAILQ_FOREACH(node, &s->used, MLIST_ENTNAME) {
+        g_assert_cmpint(node->addr, >, addr);
+        g_assert_cmpint(node->addr, >=, next);
+        addr = node->addr;
+        next = node->addr + node->size;
+    }
+}
+
+static uint64_t mlist_alloc(QGuestAllocator *s, uint64_t size)
+{
+    MemBlock *node;
+
+    node = mlist_find_space(&s->free, size);
+    if (!node) {
+        fprintf(stderr, "Out of guest memory.\n");
+        g_assert_not_reached();
+    }
+    return mlist_fulfill(s, node, size);
+}
+
+static void mlist_free(QGuestAllocator *s, uint64_t addr)
+{
+    MemBlock *node;
+
+    if (addr == 0) {
+        return;
+    }
+
+    node = mlist_find_key(&s->used, addr);
+    if (!node) {
+        fprintf(stderr, "Error: no record found for an allocation at "
+                "0x%016" PRIx64 ".\n",
+                addr);
+        g_assert_not_reached();
+    }
+
+    /* Rip it out of the used list and re-insert back into the free list. */
+    QTAILQ_REMOVE(&s->used, node, MLIST_ENTNAME);
+    mlist_sort_insert(&s->free, node);
+    mlist_coalesce(&s->free, node);
+}
+
+MemBlock *mlist_new(uint64_t addr, uint64_t size)
+{
+    MemBlock *block;
+
+    if (!size) {
+        return NULL;
+    }
+    block = g_malloc0(sizeof(MemBlock));
+
+    block->addr = addr;
+    block->size = size;
+
+    return block;
+}
+
+/*
+ * Mostly for valgrind happiness, but it does offer
+ * a chokepoint for debugging guest memory leaks, too.
+ */
+void alloc_uninit(QGuestAllocator *allocator)
+{
+    MemBlock *node;
+    MemBlock *tmp;
+    QAllocOpts mask;
+
+    /* Check for guest leaks, and destroy the list. */
+    QTAILQ_FOREACH_SAFE(node, &allocator->used, MLIST_ENTNAME, tmp) {
+        if (allocator->opts & (ALLOC_LEAK_WARN | ALLOC_LEAK_ASSERT)) {
+            fprintf(stderr, "guest malloc leak @ 0x%016" PRIx64 "; "
+                    "size 0x%016" PRIx64 ".\n",
+                    node->addr, node->size);
+        }
+        if (allocator->opts & (ALLOC_LEAK_ASSERT)) {
+            g_assert_not_reached();
+        }
+        g_free(node);
+    }
+
+    /* If we have previously asserted that there are no leaks, then there
+     * should be only one node here with a specific address and size. */
+    mask = ALLOC_LEAK_ASSERT | ALLOC_PARANOID;
+    QTAILQ_FOREACH_SAFE(node, &allocator->free, MLIST_ENTNAME, tmp) {
+        if ((allocator->opts & mask) == mask) {
+            if ((node->addr != allocator->start) ||
+                (node->size != allocator->end - allocator->start)) {
+                fprintf(stderr, "Free list is corrupted.\n");
+                g_assert_not_reached();
+            }
+        }
+
+        g_free(node);
+    }
+
+    g_free(allocator);
+}
+
+uint64_t guest_alloc(QGuestAllocator *allocator, size_t size)
+{
+    uint64_t rsize = size;
+    uint64_t naddr;
+
+    rsize += (allocator->page_size - 1);
+    rsize &= -allocator->page_size;
+    g_assert_cmpint((allocator->start + rsize), <=, allocator->end);
+    g_assert_cmpint(rsize, >=, size);
+
+    naddr = mlist_alloc(allocator, rsize);
+    if (allocator->opts & ALLOC_PARANOID) {
+        mlist_check(allocator);
+    }
+
+    return naddr;
+}
+
+void guest_free(QGuestAllocator *allocator, uint64_t addr)
+{
+    mlist_free(allocator, addr);
+    if (allocator->opts & ALLOC_PARANOID) {
+        mlist_check(allocator);
+    }
+}
diff --git a/tests/libqos/malloc.h b/tests/libqos/malloc.h
index 556538121e0eaa30c7717b31535629846781f8d2..465efeb8fb9f4237a9ac748e28ebf6cf3b572f21 100644
--- a/tests/libqos/malloc.h
+++ b/tests/libqos/malloc.h
@@ -15,24 +15,39 @@
 
 #include <stdint.h>
 #include <sys/types.h>
+#include "qemu/queue.h"
 
-typedef struct QGuestAllocator QGuestAllocator;
+#define MLIST_ENTNAME entries
 
-struct QGuestAllocator
-{
-    uint64_t (*alloc)(QGuestAllocator *allocator, size_t size);
-    void (*free)(QGuestAllocator *allocator, uint64_t addr);
-};
+typedef enum {
+    ALLOC_NO_FLAGS    = 0x00,
+    ALLOC_LEAK_WARN   = 0x01,
+    ALLOC_LEAK_ASSERT = 0x02,
+    ALLOC_PARANOID    = 0x04
+} QAllocOpts;
+
+typedef QTAILQ_HEAD(MemList, MemBlock) MemList;
+typedef struct MemBlock {
+    QTAILQ_ENTRY(MemBlock) MLIST_ENTNAME;
+    uint64_t size;
+    uint64_t addr;
+} MemBlock;
+
+typedef struct QGuestAllocator {
+    QAllocOpts opts;
+    uint64_t start;
+    uint64_t end;
+    uint32_t page_size;
+
+    MemList used;
+    MemList free;
+} QGuestAllocator;
+
+MemBlock *mlist_new(uint64_t addr, uint64_t size);
+void alloc_uninit(QGuestAllocator *allocator);
 
 /* Always returns page aligned values */
-static inline uint64_t guest_alloc(QGuestAllocator *allocator, size_t size)
-{
-    return allocator->alloc(allocator, size);
-}
-
-static inline void guest_free(QGuestAllocator *allocator, uint64_t addr)
-{
-    allocator->free(allocator, addr);
-}
+uint64_t guest_alloc(QGuestAllocator *allocator, size_t size);
+void guest_free(QGuestAllocator *allocator, uint64_t addr);
 
 #endif
diff --git a/tests/qemu-iotests-quick.sh b/tests/qemu-iotests-quick.sh
index 12af731c682f65b6cd7d6a5b907e4734d6caa289..0e554bb97240de643945fa784c26d6cbf50aece7 100755
--- a/tests/qemu-iotests-quick.sh
+++ b/tests/qemu-iotests-quick.sh
@@ -3,6 +3,6 @@
 cd tests/qemu-iotests
 
 ret=0
-./check -T -qcow2 -g quick || ret=1
+TEST_DIR=${TEST_DIR:-/tmp/qemu-iotests-quick-$$} ./check -T -qcow2 -g quick || ret=1
 
 exit $ret
diff --git a/tests/qemu-iotests/055 b/tests/qemu-iotests/055
index 0872444811fff4c30d82cf2c673df9a5d12226bd..e81d4d0d83ee0961aec759eeab26abde63e24e87 100755
--- a/tests/qemu-iotests/055
+++ b/tests/qemu-iotests/055
@@ -1,8 +1,8 @@
 #!/usr/bin/env python
 #
-# Tests for drive-backup
+# Tests for drive-backup and blockdev-backup
 #
-# Copyright (C) 2013 Red Hat, Inc.
+# Copyright (C) 2013, 2014 Red Hat, Inc.
 #
 # Based on 041.
 #
@@ -27,6 +27,7 @@ from iotests import qemu_img, qemu_io
 
 test_img = os.path.join(iotests.test_dir, 'test.img')
 target_img = os.path.join(iotests.test_dir, 'target.img')
+blockdev_target_img = os.path.join(iotests.test_dir, 'blockdev-target.img')
 
 class TestSingleDrive(iotests.QMPTestCase):
     image_len = 64 * 1024 * 1024 # MB
@@ -38,34 +39,41 @@ class TestSingleDrive(iotests.QMPTestCase):
         qemu_io('-f', iotests.imgfmt, '-c', 'write -P0xd5 1M 32k', test_img)
         qemu_io('-f', iotests.imgfmt, '-c', 'write -P0xdc 32M 124k', test_img)
         qemu_io('-f', iotests.imgfmt, '-c', 'write -P0xdc 67043328 64k', test_img)
+        qemu_img('create', '-f', iotests.imgfmt, blockdev_target_img, str(TestSingleDrive.image_len))
 
-        self.vm = iotests.VM().add_drive(test_img)
+        self.vm = iotests.VM().add_drive(test_img).add_drive(blockdev_target_img)
         self.vm.launch()
 
     def tearDown(self):
         self.vm.shutdown()
         os.remove(test_img)
+        os.remove(blockdev_target_img)
         try:
             os.remove(target_img)
         except OSError:
             pass
 
-    def test_cancel(self):
+    def do_test_cancel(self, cmd, target):
         self.assert_no_active_block_jobs()
 
-        result = self.vm.qmp('drive-backup', device='drive0',
-                             target=target_img, sync='full')
+        result = self.vm.qmp(cmd, device='drive0', target=target, sync='full')
         self.assert_qmp(result, 'return', {})
 
         event = self.cancel_and_wait()
         self.assert_qmp(event, 'data/type', 'backup')
 
-    def test_pause(self):
+    def test_cancel_drive_backup(self):
+        self.do_test_cancel('drive-backup', target_img)
+
+    def test_cancel_blockdev_backup(self):
+        self.do_test_cancel('blockdev-backup', 'drive1')
+
+    def do_test_pause(self, cmd, target, image):
         self.assert_no_active_block_jobs()
 
         self.vm.pause_drive('drive0')
-        result = self.vm.qmp('drive-backup', device='drive0',
-                             target=target_img, sync='full')
+        result = self.vm.qmp(cmd, device='drive0',
+                             target=target, sync='full')
         self.assert_qmp(result, 'return', {})
 
         result = self.vm.qmp('block-job-pause', device='drive0')
@@ -86,14 +94,25 @@ class TestSingleDrive(iotests.QMPTestCase):
         self.wait_until_completed()
 
         self.vm.shutdown()
-        self.assertTrue(iotests.compare_images(test_img, target_img),
+        self.assertTrue(iotests.compare_images(test_img, image),
                         'target image does not match source after backup')
 
+    def test_pause_drive_backup(self):
+        self.do_test_pause('drive-backup', target_img, target_img)
+
+    def test_pause_blockdev_backup(self):
+        self.do_test_pause('blockdev-backup', 'drive1', blockdev_target_img)
+
     def test_medium_not_found(self):
         result = self.vm.qmp('drive-backup', device='ide1-cd0',
                              target=target_img, sync='full')
         self.assert_qmp(result, 'error/class', 'GenericError')
 
+    def test_medium_not_found_blockdev_backup(self):
+        result = self.vm.qmp('blockdev-backup', device='ide1-cd0',
+                             target='drive1', sync='full')
+        self.assert_qmp(result, 'error/class', 'GenericError')
+
     def test_image_not_found(self):
         result = self.vm.qmp('drive-backup', device='drive0',
                              target=target_img, sync='full', mode='existing')
@@ -105,31 +124,53 @@ class TestSingleDrive(iotests.QMPTestCase):
                              format='spaghetti-noodles')
         self.assert_qmp(result, 'error/class', 'GenericError')
 
-    def test_device_not_found(self):
-        result = self.vm.qmp('drive-backup', device='nonexistent',
-                             target=target_img, sync='full')
+    def do_test_device_not_found(self, cmd, **args):
+        result = self.vm.qmp(cmd, **args)
         self.assert_qmp(result, 'error/class', 'DeviceNotFound')
 
+    def test_device_not_found(self):
+        self.do_test_device_not_found('drive-backup', device='nonexistent',
+                                      target=target_img, sync='full')
+
+        self.do_test_device_not_found('blockdev-backup', device='nonexistent',
+                                      target='drive0', sync='full')
+
+        self.do_test_device_not_found('blockdev-backup', device='drive0',
+                                      target='nonexistent', sync='full')
+
+        self.do_test_device_not_found('blockdev-backup', device='nonexistent',
+                                      target='nonexistent', sync='full')
+
+    def test_target_is_source(self):
+        result = self.vm.qmp('blockdev-backup', device='drive0',
+                             target='drive0', sync='full')
+        self.assert_qmp(result, 'error/class', 'GenericError')
+
 class TestSetSpeed(iotests.QMPTestCase):
     image_len = 80 * 1024 * 1024 # MB
 
     def setUp(self):
         qemu_img('create', '-f', iotests.imgfmt, test_img, str(TestSetSpeed.image_len))
         qemu_io('-f', iotests.imgfmt, '-c', 'write -P1 0 512', test_img)
-        self.vm = iotests.VM().add_drive(test_img)
+        qemu_img('create', '-f', iotests.imgfmt, blockdev_target_img, str(TestSingleDrive.image_len))
+
+        self.vm = iotests.VM().add_drive(test_img).add_drive(blockdev_target_img)
         self.vm.launch()
 
     def tearDown(self):
         self.vm.shutdown()
         os.remove(test_img)
-        os.remove(target_img)
+        os.remove(blockdev_target_img)
+        try:
+            os.remove(target_img)
+        except OSError:
+            pass
 
-    def test_set_speed(self):
+    def do_test_set_speed(self, cmd, target):
         self.assert_no_active_block_jobs()
 
         self.vm.pause_drive('drive0')
-        result = self.vm.qmp('drive-backup', device='drive0',
-                             target=target_img, sync='full')
+        result = self.vm.qmp(cmd, device='drive0', target=target, sync='full')
         self.assert_qmp(result, 'return', {})
 
         # Default speed is 0
@@ -148,10 +189,10 @@ class TestSetSpeed(iotests.QMPTestCase):
         event = self.cancel_and_wait(resume=True)
         self.assert_qmp(event, 'data/type', 'backup')
 
-        # Check setting speed in drive-backup works
+        # Check setting speed option works
         self.vm.pause_drive('drive0')
-        result = self.vm.qmp('drive-backup', device='drive0',
-                             target=target_img, sync='full', speed=4*1024*1024)
+        result = self.vm.qmp(cmd, device='drive0',
+                             target=target, sync='full', speed=4*1024*1024)
         self.assert_qmp(result, 'return', {})
 
         result = self.vm.qmp('query-block-jobs')
@@ -161,18 +202,24 @@ class TestSetSpeed(iotests.QMPTestCase):
         event = self.cancel_and_wait(resume=True)
         self.assert_qmp(event, 'data/type', 'backup')
 
-    def test_set_speed_invalid(self):
+    def test_set_speed_drive_backup(self):
+        self.do_test_set_speed('drive-backup', target_img)
+
+    def test_set_speed_blockdev_backup(self):
+        self.do_test_set_speed('blockdev-backup', 'drive1')
+
+    def do_test_set_speed_invalid(self, cmd, target):
         self.assert_no_active_block_jobs()
 
-        result = self.vm.qmp('drive-backup', device='drive0',
-                             target=target_img, sync='full', speed=-1)
+        result = self.vm.qmp(cmd, device='drive0',
+                             target=target, sync='full', speed=-1)
         self.assert_qmp(result, 'error/class', 'GenericError')
 
         self.assert_no_active_block_jobs()
 
         self.vm.pause_drive('drive0')
-        result = self.vm.qmp('drive-backup', device='drive0',
-                             target=target_img, sync='full')
+        result = self.vm.qmp(cmd, device='drive0',
+                             target=target, sync='full')
         self.assert_qmp(result, 'return', {})
 
         result = self.vm.qmp('block-job-set-speed', device='drive0', speed=-1)
@@ -181,6 +228,12 @@ class TestSetSpeed(iotests.QMPTestCase):
         event = self.cancel_and_wait(resume=True)
         self.assert_qmp(event, 'data/type', 'backup')
 
+    def test_set_speed_invalid_drive_backup(self):
+        self.do_test_set_speed_invalid('drive-backup', target_img)
+
+    def test_set_speed_invalid_blockdev_backup(self):
+        self.do_test_set_speed_invalid('blockdev-backup',  'drive1')
+
 class TestSingleTransaction(iotests.QMPTestCase):
     image_len = 64 * 1024 * 1024 # MB
 
@@ -190,41 +243,50 @@ class TestSingleTransaction(iotests.QMPTestCase):
         qemu_io('-f', iotests.imgfmt, '-c', 'write -P0xd5 1M 32k', test_img)
         qemu_io('-f', iotests.imgfmt, '-c', 'write -P0xdc 32M 124k', test_img)
         qemu_io('-f', iotests.imgfmt, '-c', 'write -P0xdc 67043328 64k', test_img)
+        qemu_img('create', '-f', iotests.imgfmt, blockdev_target_img, str(TestSingleDrive.image_len))
 
-        self.vm = iotests.VM().add_drive(test_img)
+        self.vm = iotests.VM().add_drive(test_img).add_drive(blockdev_target_img)
         self.vm.launch()
 
     def tearDown(self):
         self.vm.shutdown()
         os.remove(test_img)
+        os.remove(blockdev_target_img)
         try:
             os.remove(target_img)
         except OSError:
             pass
 
-    def test_cancel(self):
+    def do_test_cancel(self, cmd, target):
         self.assert_no_active_block_jobs()
 
         result = self.vm.qmp('transaction', actions=[{
-                'type': 'drive-backup',
+                'type': cmd,
                 'data': { 'device': 'drive0',
-                          'target': target_img,
+                          'target': target,
                           'sync': 'full' },
             }
         ])
+
         self.assert_qmp(result, 'return', {})
 
         event = self.cancel_and_wait()
         self.assert_qmp(event, 'data/type', 'backup')
 
-    def test_pause(self):
+    def test_cancel_drive_backup(self):
+        self.do_test_cancel('drive-backup', target_img)
+
+    def test_cancel_blockdev_backup(self):
+        self.do_test_cancel('blockdev-backup', 'drive1')
+
+    def do_test_pause(self, cmd, target, image):
         self.assert_no_active_block_jobs()
 
         self.vm.pause_drive('drive0')
         result = self.vm.qmp('transaction', actions=[{
-                'type': 'drive-backup',
+                'type': cmd,
                 'data': { 'device': 'drive0',
-                          'target': target_img,
+                          'target': target,
                           'sync': 'full' },
             }
         ])
@@ -248,19 +310,31 @@ class TestSingleTransaction(iotests.QMPTestCase):
         self.wait_until_completed()
 
         self.vm.shutdown()
-        self.assertTrue(iotests.compare_images(test_img, target_img),
+        self.assertTrue(iotests.compare_images(test_img, image),
                         'target image does not match source after backup')
 
-    def test_medium_not_found(self):
+    def test_pause_drive_backup(self):
+        self.do_test_pause('drive-backup', target_img, target_img)
+
+    def test_pause_blockdev_backup(self):
+        self.do_test_pause('blockdev-backup', 'drive1', blockdev_target_img)
+
+    def do_test_medium_not_found(self, cmd, target):
         result = self.vm.qmp('transaction', actions=[{
-                'type': 'drive-backup',
+                'type': cmd,
                 'data': { 'device': 'ide1-cd0',
-                          'target': target_img,
+                          'target': target,
                           'sync': 'full' },
             }
         ])
         self.assert_qmp(result, 'error/class', 'GenericError')
 
+    def test_medium_not_found_drive_backup(self):
+        self.do_test_medium_not_found('drive-backup', target_img)
+
+    def test_medium_not_found_blockdev_backup(self):
+        self.do_test_medium_not_found('blockdev-backup', 'drive1')
+
     def test_image_not_found(self):
         result = self.vm.qmp('transaction', actions=[{
                 'type': 'drive-backup',
@@ -283,6 +357,43 @@ class TestSingleTransaction(iotests.QMPTestCase):
         ])
         self.assert_qmp(result, 'error/class', 'DeviceNotFound')
 
+        result = self.vm.qmp('transaction', actions=[{
+                'type': 'blockdev-backup',
+                'data': { 'device': 'nonexistent',
+                          'target': 'drive1',
+                          'sync': 'full' },
+            }
+        ])
+        self.assert_qmp(result, 'error/class', 'DeviceNotFound')
+
+        result = self.vm.qmp('transaction', actions=[{
+                'type': 'blockdev-backup',
+                'data': { 'device': 'drive0',
+                          'target': 'nonexistent',
+                          'sync': 'full' },
+            }
+        ])
+        self.assert_qmp(result, 'error/class', 'DeviceNotFound')
+
+        result = self.vm.qmp('transaction', actions=[{
+                'type': 'blockdev-backup',
+                'data': { 'device': 'nonexistent',
+                          'target': 'nonexistent',
+                          'sync': 'full' },
+            }
+        ])
+        self.assert_qmp(result, 'error/class', 'DeviceNotFound')
+
+    def test_target_is_source(self):
+        result = self.vm.qmp('transaction', actions=[{
+                'type': 'blockdev-backup',
+                'data': { 'device': 'drive0',
+                          'target': 'drive0',
+                          'sync': 'full' },
+            }
+        ])
+        self.assert_qmp(result, 'error/class', 'GenericError')
+
     def test_abort(self):
         result = self.vm.qmp('transaction', actions=[{
                 'type': 'drive-backup',
@@ -298,5 +409,31 @@ class TestSingleTransaction(iotests.QMPTestCase):
         self.assert_qmp(result, 'error/class', 'GenericError')
         self.assert_no_active_block_jobs()
 
+        result = self.vm.qmp('transaction', actions=[{
+                'type': 'blockdev-backup',
+                'data': { 'device': 'nonexistent',
+                          'target': 'drive1',
+                          'sync': 'full' },
+            }, {
+                'type': 'Abort',
+                'data': {},
+            }
+        ])
+        self.assert_qmp(result, 'error/class', 'GenericError')
+        self.assert_no_active_block_jobs()
+
+        result = self.vm.qmp('transaction', actions=[{
+                'type': 'blockdev-backup',
+                'data': { 'device': 'drive0',
+                          'target': 'nonexistent',
+                          'sync': 'full' },
+            }, {
+                'type': 'Abort',
+                'data': {},
+            }
+        ])
+        self.assert_qmp(result, 'error/class', 'GenericError')
+        self.assert_no_active_block_jobs()
+
 if __name__ == '__main__':
     iotests.main(supported_fmts=['raw', 'qcow2'])
diff --git a/tests/qemu-iotests/055.out b/tests/qemu-iotests/055.out
index 6323079e0872c19741a56cdab8d333b299addd6c..42314e9c009b1af9a60d9726696a644aa2650143 100644
--- a/tests/qemu-iotests/055.out
+++ b/tests/qemu-iotests/055.out
@@ -1,5 +1,5 @@
-..............
+........................
 ----------------------------------------------------------------------
-Ran 14 tests
+Ran 24 tests
 
 OK
diff --git a/tests/qemu-iotests/058 b/tests/qemu-iotests/058
index 2d5ca85ddc760671836513fe57e342711a4c3411..a60b34b46cacdb97eb2b6d0f7008a8fcc2e276a9 100755
--- a/tests/qemu-iotests/058
+++ b/tests/qemu-iotests/058
@@ -87,6 +87,7 @@ trap "_cleanup; exit \$status" 0 1 2 3 15
 
 _supported_fmt qcow2
 _supported_proto file
+_supported_os Linux
 _require_command QEMU_NBD
 
 # Use -f raw instead of -f $IMGFMT for the NBD connection
diff --git a/tests/qemu-iotests/067 b/tests/qemu-iotests/067
index 29cd6b5affc4baece0c97e37cab8ff319e7dc219..0508c696a895e3036ce118b92d7e41dc18afc1ac 100755
--- a/tests/qemu-iotests/067
+++ b/tests/qemu-iotests/067
@@ -45,7 +45,8 @@ function do_run_qemu()
 
 function run_qemu()
 {
-    do_run_qemu "$@" 2>&1 | _filter_testdir | _filter_qmp | sed -e 's/\("actual-size":\s*\)[0-9]\+/\1SIZE/g'
+    do_run_qemu "$@" 2>&1 | _filter_testdir | _filter_qmp | _filter_qemu \
+                          | sed -e 's/\("actual-size":\s*\)[0-9]\+/\1SIZE/g'
 }
 
 size=128M
diff --git a/tests/qemu-iotests/071 b/tests/qemu-iotests/071
index 5d61ef6d817f58ccc9c902accce8ef7e9424b962..9eaa49b419992014809d10828c5ccaf32cd117d1 100755
--- a/tests/qemu-iotests/071
+++ b/tests/qemu-iotests/071
@@ -51,7 +51,7 @@ function do_run_qemu()
 
 function run_qemu()
 {
-    do_run_qemu "$@" 2>&1 | _filter_testdir | _filter_qmp | _filter_qemu_io
+    do_run_qemu "$@" 2>&1 | _filter_testdir | _filter_qemu | _filter_qmp | _filter_qemu_io
 }
 
 IMG_SIZE=64M
diff --git a/tests/qemu-iotests/071.out b/tests/qemu-iotests/071.out
index 46484ff69c050f03dfea46ff0fae077c32003aed..9205ce2512fa6210cf1ef14ff2d5641d98216ea5 100644
--- a/tests/qemu-iotests/071.out
+++ b/tests/qemu-iotests/071.out
@@ -52,8 +52,8 @@ read failed: Input/output error
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "SHUTDOWN"}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "DEVICE_TRAY_MOVED", "data": {"device": "ide1-cd0", "tray-open": true}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "DEVICE_TRAY_MOVED", "data": {"device": "floppy0", "tray-open": true}}
-qemu-system-x86_64: Failed to flush the L2 table cache: Input/output error
-qemu-system-x86_64: Failed to flush the refcount block cache: Input/output error
+QEMU_PROG: Failed to flush the L2 table cache: Input/output error
+QEMU_PROG: Failed to flush the refcount block cache: Input/output error
 
 
 === Testing blkverify on existing block device ===
@@ -92,7 +92,7 @@ read failed: Input/output error
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "SHUTDOWN"}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "DEVICE_TRAY_MOVED", "data": {"device": "ide1-cd0", "tray-open": true}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "DEVICE_TRAY_MOVED", "data": {"device": "floppy0", "tray-open": true}}
-qemu-system-x86_64: Failed to flush the L2 table cache: Input/output error
-qemu-system-x86_64: Failed to flush the refcount block cache: Input/output error
+QEMU_PROG: Failed to flush the L2 table cache: Input/output error
+QEMU_PROG: Failed to flush the refcount block cache: Input/output error
 
 *** done
diff --git a/tests/qemu-iotests/081 b/tests/qemu-iotests/081
index 9ab93ff89e89ead022cb2d3e455a9e2b6c5682f0..d9b042cfc71b4fe18b7477681d4757a6738dedf3 100755
--- a/tests/qemu-iotests/081
+++ b/tests/qemu-iotests/081
@@ -53,7 +53,7 @@ function do_run_qemu()
 
 function run_qemu()
 {
-    do_run_qemu "$@" 2>&1 | _filter_testdir | _filter_qmp | _filter_qemu_io
+    do_run_qemu "$@" 2>&1 | _filter_testdir | _filter_qemu | _filter_qmp | _filter_qemu_io
 }
 
 test_quorum=$($QEMU_IMG --help|grep quorum)
diff --git a/tests/qemu-iotests/087 b/tests/qemu-iotests/087
index d7454d13da53cfd6221058697aaafe65fe69e512..8694749947780c9a21368d5a13e42cfdce6a1237 100755
--- a/tests/qemu-iotests/087
+++ b/tests/qemu-iotests/087
@@ -45,7 +45,8 @@ function do_run_qemu()
 
 function run_qemu()
 {
-    do_run_qemu "$@" 2>&1 | _filter_testdir | _filter_qmp | sed -e 's/\("actual-size":\s*\)[0-9]\+/\1SIZE/g'
+    do_run_qemu "$@" 2>&1 | _filter_testdir | _filter_qmp | _filter_qemu \
+                          | sed -e 's/\("actual-size":\s*\)[0-9]\+/\1SIZE/g'
 }
 
 size=128M
diff --git a/tests/qemu-iotests/087.out b/tests/qemu-iotests/087.out
index 7d38cc26c5c358e1dee4a7f290893dc1274952fc..91f4ea1a8bad358bbd76a905df8ed0415df16c20 100644
--- a/tests/qemu-iotests/087.out
+++ b/tests/qemu-iotests/087.out
@@ -21,7 +21,6 @@ QMP_VERSION
 {"return": {}}
 {"error": {"class": "GenericError", "desc": "Device with id 'disk' already exists"}}
 {"error": {"class": "GenericError", "desc": "Device name 'test-node' conflicts with an existing node name"}}
-main-loop: WARNING: I/O thread spun for 1000 iterations
 {"error": {"class": "GenericError", "desc": "could not open disk image disk2: node-name=disk is conflicting with a device id"}}
 {"error": {"class": "GenericError", "desc": "could not open disk image disk2: Duplicate node name"}}
 {"error": {"class": "GenericError", "desc": "could not open disk image disk3: node-name=disk3 is conflicting with a device id"}}
diff --git a/tests/qemu-iotests/099 b/tests/qemu-iotests/099
index 948afff28b7c70b9376132a518a017409f7b3b32..80f3d9aaf328966ddc6c829e58e6c898be1d75ed 100755
--- a/tests/qemu-iotests/099
+++ b/tests/qemu-iotests/099
@@ -57,7 +57,7 @@ function run_qemu()
     # Get the "file": "foo" entry ($foo may only contain escaped double quotes,
     # which is how we can extract it)
     do_run_qemu "$@" 2>&1 | _filter_testdir | _filter_imgfmt | _filter_qmp \
-        | grep "drv0" \
+        | _filter_qemu | grep "drv0" \
         | sed -e 's/^.*"file": "\(\(\\"\|[^"]\)*\)".*$/\1/' -e 's/\\"/"/g'
 }
 
diff --git a/tests/qemu-iotests/110 b/tests/qemu-iotests/110
new file mode 100755
index 0000000000000000000000000000000000000000..a687f9567ddc340463b261e0c2d0af2fd9de722b
--- /dev/null
+++ b/tests/qemu-iotests/110
@@ -0,0 +1,94 @@
+#!/bin/bash
+#
+# Test case for relative backing file names in complex BDS trees
+#
+# Copyright (C) 2014 Red Hat, Inc.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+# creator
+owner=mreitz@redhat.com
+
+seq="$(basename $0)"
+echo "QA output created by $seq"
+
+here="$PWD"
+tmp=/tmp/$$
+status=1	# failure is the default!
+
+_cleanup()
+{
+	_cleanup_test_img
+}
+trap "_cleanup; exit \$status" 0 1 2 3 15
+
+# get standard environment, filters and checks
+. ./common.rc
+. ./common.filter
+
+# Any format supporting backing files
+_supported_fmt qed qcow qcow2 vmdk
+_supported_proto file
+_supported_os Linux
+_unsupported_imgopts "subformat=monolithicFlat" "subformat=twoGbMaxExtentFlat"
+
+TEST_IMG_REL=$(basename "$TEST_IMG")
+
+echo
+echo '=== Reconstructable filename ==='
+echo
+
+TEST_IMG="$TEST_IMG.base" _make_test_img 64M
+_make_test_img -b "$TEST_IMG_REL.base" 64M
+# qemu should be able to reconstruct the filename, so relative backing names
+# should work
+TEST_IMG="json:{'driver':'$IMGFMT','file':{'driver':'file','filename':'$TEST_IMG'}}" \
+    _img_info | _filter_img_info
+
+echo
+echo '=== Non-reconstructable filename ==='
+echo
+
+# Across blkdebug without a config file, you cannot reconstruct filenames, so
+# qemu is incapable of knowing the directory of the top image
+TEST_IMG="json:{
+    'driver': '$IMGFMT',
+    'file': {
+        'driver': 'blkdebug',
+        'image': {
+            'driver': 'file',
+            'filename': '$TEST_IMG'
+        },
+        'set-state': [
+            {
+                'event': 'read_aio',
+                'new_state': 42
+            }
+        ]
+    }
+}" _img_info | _filter_img_info
+
+echo
+echo '=== Backing name is always relative to the backed image ==='
+echo
+
+# omit the image size; it should work anyway
+_make_test_img -b "$TEST_IMG_REL.base"
+
+
+# success, all done
+echo '*** done'
+rm -f $seq.full
+status=0
diff --git a/tests/qemu-iotests/110.out b/tests/qemu-iotests/110.out
new file mode 100644
index 0000000000000000000000000000000000000000..152bacf41eb898f43e3957f49998d1ca28196c0c
--- /dev/null
+++ b/tests/qemu-iotests/110.out
@@ -0,0 +1,19 @@
+QA output created by 110
+
+=== Reconstructable filename ===
+
+Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=67108864
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 backing_file='t.IMGFMT.base'
+image: TEST_DIR/t.IMGFMT
+file format: IMGFMT
+virtual size: 64M (67108864 bytes)
+backing file: t.IMGFMT.base (actual path: TEST_DIR/t.IMGFMT.base)
+
+=== Non-reconstructable filename ===
+
+qemu-img: Cannot use relative backing file names for 'json:{"driver": "IMGFMT", "file": {"set-state.0.event": "read_aio", "image": {"driver": "file", "filename": "TEST_DIR/t.IMGFMT"}, "driver": "blkdebug", "set-state.0.new_state": 42}}'
+
+=== Backing name is always relative to the backed image ===
+
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 backing_file='t.IMGFMT.base'
+*** done
diff --git a/tests/qemu-iotests/check b/tests/qemu-iotests/check
index 8ca40116d745bba9955372955fb185a6c1d4ca67..baeae80f96bb8b5c1cb20754fde6923f4baf4091 100755
--- a/tests/qemu-iotests/check
+++ b/tests/qemu-iotests/check
@@ -238,6 +238,7 @@ QEMU_NBD      -- $QEMU_NBD
 IMGFMT        -- $FULL_IMGFMT_DETAILS
 IMGPROTO      -- $FULL_IMGPROTO_DETAILS
 PLATFORM      -- $FULL_HOST_DETAILS
+TEST_DIR      -- $TEST_DIR
 SOCKET_SCM_HELPER -- $SOCKET_SCM_HELPER
 
 EOF
diff --git a/tests/qemu-iotests/common.config b/tests/qemu-iotests/common.config
index 91a5ef696bcffa98191e67faf6ffbdcb90330f66..a1973ad9d01bcaa7d20e1db0e0c2aac0410a7b0c 100644
--- a/tests/qemu-iotests/common.config
+++ b/tests/qemu-iotests/common.config
@@ -155,4 +155,4 @@ _readlink()
 }
 
 # make sure this script returns success
-/bin/true
+true
diff --git a/tests/qemu-iotests/common.filter b/tests/qemu-iotests/common.filter
index 6c14590594a5cf5b2c4c81b084292a5941072dc8..b73c70be954f53c565e3b20f7159259553bdb0d6 100644
--- a/tests/qemu-iotests/common.filter
+++ b/tests/qemu-iotests/common.filter
@@ -159,6 +159,7 @@ _filter_qemu()
 {
     sed -e "s#\\(^\\|(qemu) \\)$(basename $QEMU_PROG):#\1QEMU_PROG:#" \
         -e 's#^QEMU [0-9]\+\.[0-9]\+\.[0-9]\+ monitor#QEMU X.Y.Z monitor#' \
+        -e '/main-loop: WARNING: I\/O thread spun for [0-9]\+ iterations/d' \
         -e $'s#\r##' # QEMU monitor uses \r\n line endings
 }
 
@@ -223,4 +224,4 @@ _filter_qemu_img_map()
 }
 
 # make sure this script returns success
-/bin/true
+true
diff --git a/tests/qemu-iotests/common.rc b/tests/qemu-iotests/common.rc
index 3b14053790ec83d15abc689b55e2dabecca7fd3c..aa093d9d8426afb9e197a2625cbf08f4fe515086 100644
--- a/tests/qemu-iotests/common.rc
+++ b/tests/qemu-iotests/common.rc
@@ -490,4 +490,4 @@ _die()
 }
 
 # make sure this script returns success
-/bin/true
+true
diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group
index a4742c6d019c6c0f822660306adad4a8f6c463d9..f8bf354156b540f59c4fd3c7b1217902828f5ea6 100644
--- a/tests/qemu-iotests/group
+++ b/tests/qemu-iotests/group
@@ -97,7 +97,7 @@
 088 rw auto quick
 089 rw auto quick
 090 rw auto quick
-091 rw auto quick
+091 rw auto
 092 rw auto quick
 095 rw auto quick
 097 rw auto backing
@@ -112,6 +112,7 @@
 107 rw auto quick
 108 rw auto quick
 109 rw auto
+110 rw auto backing quick
 111 rw auto quick
 113 rw auto quick
 114 rw auto quick
diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py
index f57f1548accdbce7aa5a8b72b50af8926795746b..87002e0e2cef0817bac4855ee41a7b7e2bfe1d60 100644
--- a/tests/qemu-iotests/iotests.py
+++ b/tests/qemu-iotests/iotests.py
@@ -282,12 +282,15 @@ def notrun(reason):
     print '%s not run: %s' % (seq, reason)
     sys.exit(0)
 
-def main(supported_fmts=[]):
+def main(supported_fmts=[], supported_oses=['linux']):
     '''Run tests'''
 
     if supported_fmts and (imgfmt not in supported_fmts):
         notrun('not suitable for this image format: %s' % imgfmt)
 
+    if sys.platform not in supported_oses:
+        notrun('not suitable for this OS: %s' % sys.platform)
+
     # We need to filter out the time taken from the output so that qemu-iotest
     # can reliably diff the results against master output.
     import StringIO
diff --git a/tests/test-coroutine.c b/tests/test-coroutine.c
index e22fae170abb579e9db5ceede25b889baa162252..27d1b6f8e851f8ee8860e4c3d7d46049f2b75c94 100644
--- a/tests/test-coroutine.c
+++ b/tests/test-coroutine.c
@@ -337,7 +337,7 @@ static void perf_cost(void)
                    "%luns per coroutine",
                    maxcycles,
                    duration, ops,
-                   (unsigned long)(1000000000 * duration) / maxcycles);
+                   (unsigned long)(1000000000.0 * duration / maxcycles));
 }
 
 int main(int argc, char **argv)
diff --git a/util/qemu-thread-posix.c b/util/qemu-thread-posix.c
index d05a6497e147dbb223df2249aefc21d01f356e95..41cb23df0c3995053086b415e10b726acbd914ac 100644
--- a/util/qemu-thread-posix.c
+++ b/util/qemu-thread-posix.c
@@ -26,6 +26,7 @@
 #endif
 #include "qemu/thread.h"
 #include "qemu/atomic.h"
+#include "qemu/notify.h"
 
 static bool name_threads;
 
@@ -401,6 +402,42 @@ void qemu_event_wait(QemuEvent *ev)
     }
 }
 
+static pthread_key_t exit_key;
+
+union NotifierThreadData {
+    void *ptr;
+    NotifierList list;
+};
+QEMU_BUILD_BUG_ON(sizeof(union NotifierThreadData) != sizeof(void *));
+
+void qemu_thread_atexit_add(Notifier *notifier)
+{
+    union NotifierThreadData ntd;
+    ntd.ptr = pthread_getspecific(exit_key);
+    notifier_list_add(&ntd.list, notifier);
+    pthread_setspecific(exit_key, ntd.ptr);
+}
+
+void qemu_thread_atexit_remove(Notifier *notifier)
+{
+    union NotifierThreadData ntd;
+    ntd.ptr = pthread_getspecific(exit_key);
+    notifier_remove(notifier);
+    pthread_setspecific(exit_key, ntd.ptr);
+}
+
+static void qemu_thread_atexit_run(void *arg)
+{
+    union NotifierThreadData ntd = { .ptr = arg };
+    notifier_list_notify(&ntd.list, NULL);
+}
+
+static void __attribute__((constructor)) qemu_thread_atexit_init(void)
+{
+    pthread_key_create(&exit_key, qemu_thread_atexit_run);
+}
+
+
 /* Attempt to set the threads name; note that this is for debug, so
  * we're not going to fail if we can't set it.
  */
diff --git a/util/qemu-thread-win32.c b/util/qemu-thread-win32.c
index c405c9bef62d7a0ea31bb907191b6dd60c4460de..406b52f91d6c2d442d4431b35e48b7db66d05f0d 100644
--- a/util/qemu-thread-win32.c
+++ b/util/qemu-thread-win32.c
@@ -12,6 +12,7 @@
  */
 #include "qemu-common.h"
 #include "qemu/thread.h"
+#include "qemu/notify.h"
 #include <process.h>
 #include <assert.h>
 #include <limits.h>
@@ -268,6 +269,7 @@ struct QemuThreadData {
     void             *(*start_routine)(void *);
     void             *arg;
     short             mode;
+    NotifierList      exit;
 
     /* Only used for joinable threads. */
     bool              exited;
@@ -275,18 +277,40 @@ struct QemuThreadData {
     CRITICAL_SECTION  cs;
 };
 
+static bool atexit_registered;
+static NotifierList main_thread_exit;
+
 static __thread QemuThreadData *qemu_thread_data;
 
+static void run_main_thread_exit(void)
+{
+    notifier_list_notify(&main_thread_exit, NULL);
+}
+
+void qemu_thread_atexit_add(Notifier *notifier)
+{
+    if (!qemu_thread_data) {
+        if (!atexit_registered) {
+            atexit_registered = true;
+            atexit(run_main_thread_exit);
+        }
+        notifier_list_add(&main_thread_exit, notifier);
+    } else {
+        notifier_list_add(&qemu_thread_data->exit, notifier);
+    }
+}
+
+void qemu_thread_atexit_remove(Notifier *notifier)
+{
+    notifier_remove(notifier);
+}
+
 static unsigned __stdcall win32_start_routine(void *arg)
 {
     QemuThreadData *data = (QemuThreadData *) arg;
     void *(*start_routine)(void *) = data->start_routine;
     void *thread_arg = data->arg;
 
-    if (data->mode == QEMU_THREAD_DETACHED) {
-        g_free(data);
-        data = NULL;
-    }
     qemu_thread_data = data;
     qemu_thread_exit(start_routine(thread_arg));
     abort();
@@ -296,12 +320,14 @@ void qemu_thread_exit(void *arg)
 {
     QemuThreadData *data = qemu_thread_data;
 
-    if (data) {
-        assert(data->mode != QEMU_THREAD_DETACHED);
+    notifier_list_notify(&data->exit, NULL);
+    if (data->mode == QEMU_THREAD_JOINABLE) {
         data->ret = arg;
         EnterCriticalSection(&data->cs);
         data->exited = true;
         LeaveCriticalSection(&data->cs);
+    } else {
+        g_free(data);
     }
     _endthreadex(0);
 }
@@ -313,9 +339,10 @@ void *qemu_thread_join(QemuThread *thread)
     HANDLE handle;
 
     data = thread->data;
-    if (!data) {
+    if (data->mode == QEMU_THREAD_DETACHED) {
         return NULL;
     }
+
     /*
      * Because multiple copies of the QemuThread can exist via
      * qemu_thread_get_self, we need to store a value that cannot
@@ -329,7 +356,6 @@ void *qemu_thread_join(QemuThread *thread)
         CloseHandle(handle);
     }
     ret = data->ret;
-    assert(data->mode != QEMU_THREAD_DETACHED);
     DeleteCriticalSection(&data->cs);
     g_free(data);
     return ret;
@@ -347,6 +373,7 @@ void qemu_thread_create(QemuThread *thread, const char *name,
     data->arg = arg;
     data->mode = mode;
     data->exited = false;
+    notifier_list_init(&data->exit);
 
     if (data->mode != QEMU_THREAD_DETACHED) {
         InitializeCriticalSection(&data->cs);
@@ -358,7 +385,7 @@ void qemu_thread_create(QemuThread *thread, const char *name,
         error_exit(GetLastError(), __func__);
     }
     CloseHandle(hThread);
-    thread->data = (mode == QEMU_THREAD_DETACHED) ? NULL : data;
+    thread->data = data;
 }
 
 void qemu_thread_get_self(QemuThread *thread)
@@ -373,11 +400,10 @@ HANDLE qemu_thread_get_handle(QemuThread *thread)
     HANDLE handle;
 
     data = thread->data;
-    if (!data) {
+    if (data->mode == QEMU_THREAD_DETACHED) {
         return NULL;
     }
 
-    assert(data->mode != QEMU_THREAD_DETACHED);
     EnterCriticalSection(&data->cs);
     if (!data->exited) {
         handle = OpenThread(SYNCHRONIZE | THREAD_SUSPEND_RESUME, FALSE,