diff --git a/block.c b/block.c
index 9a1a0d1e73c44209b5d8993c1e9aa5bee456a1ad..1c37ce4554b03ed2e914159013118531431cbfbd 100644
--- a/block.c
+++ b/block.c
@@ -4320,9 +4320,15 @@ int bdrv_inactivate_all(void)
     BdrvNextIterator it;
     int ret = 0;
     int pass;
+    GSList *aio_ctxs = NULL, *ctx;
 
     for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
-        aio_context_acquire(bdrv_get_aio_context(bs));
+        AioContext *aio_context = bdrv_get_aio_context(bs);
+
+        if (!g_slist_find(aio_ctxs, aio_context)) {
+            aio_ctxs = g_slist_prepend(aio_ctxs, aio_context);
+            aio_context_acquire(aio_context);
+        }
     }
 
     /* We do two passes of inactivation. The first pass calls to drivers'
@@ -4340,9 +4346,11 @@ int bdrv_inactivate_all(void)
     }
 
 out:
-    for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
-        aio_context_release(bdrv_get_aio_context(bs));
+    for (ctx = aio_ctxs; ctx != NULL; ctx = ctx->next) {
+        AioContext *aio_context = ctx->data;
+        aio_context_release(aio_context);
     }
+    g_slist_free(aio_ctxs);
 
     return ret;
 }
diff --git a/block/null.c b/block/null.c
index dd9c13f9ba834d1f17784612e9ad5a5f6bd9769f..0cdabaa4402c81eab29aa4a9200164c2e5da3bc2 100644
--- a/block/null.c
+++ b/block/null.c
@@ -110,8 +110,7 @@ static coroutine_fn int null_co_common(BlockDriverState *bs)
     BDRVNullState *s = bs->opaque;
 
     if (s->latency_ns) {
-        co_aio_sleep_ns(bdrv_get_aio_context(bs), QEMU_CLOCK_REALTIME,
-                        s->latency_ns);
+        qemu_co_sleep_ns(QEMU_CLOCK_REALTIME, s->latency_ns);
     }
     return 0;
 }
diff --git a/block/sheepdog.c b/block/sheepdog.c
index 488bad333b48c474cc5740f59d60bd6e427930f8..f684477328a8adf51dda023fec21f3cb5094c6b5 100644
--- a/block/sheepdog.c
+++ b/block/sheepdog.c
@@ -776,8 +776,7 @@ static coroutine_fn void reconnect_to_sdog(void *opaque)
         if (s->fd < 0) {
             DPRINTF("Wait for connection to be established\n");
             error_report_err(local_err);
-            co_aio_sleep_ns(bdrv_get_aio_context(s->bs), QEMU_CLOCK_REALTIME,
-                            1000000000ULL);
+            qemu_co_sleep_ns(QEMU_CLOCK_REALTIME, 1000000000ULL);
         }
     };
 
diff --git a/blockdev.c b/blockdev.c
index 56a6b24a0be3dd78c397ab95f92fcc7a7e113909..9c3a430cfbb4c2794545749bd668c0a856d994cd 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -45,6 +45,7 @@
 #include "qapi/qmp/qerror.h"
 #include "qapi/qobject-output-visitor.h"
 #include "sysemu/sysemu.h"
+#include "sysemu/iothread.h"
 #include "block/block_int.h"
 #include "qmp-commands.h"
 #include "block/trace.h"
@@ -1454,7 +1455,6 @@ struct BlkActionState {
 typedef struct InternalSnapshotState {
     BlkActionState common;
     BlockDriverState *bs;
-    AioContext *aio_context;
     QEMUSnapshotInfo sn;
     bool created;
 } InternalSnapshotState;
@@ -1485,6 +1485,7 @@ static void internal_snapshot_prepare(BlkActionState *common,
     qemu_timeval tv;
     BlockdevSnapshotInternal *internal;
     InternalSnapshotState *state;
+    AioContext *aio_context;
     int ret1;
 
     g_assert(common->action->type ==
@@ -1506,32 +1507,33 @@ static void internal_snapshot_prepare(BlkActionState *common,
         return;
     }
 
-    /* AioContext is released in .clean() */
-    state->aio_context = bdrv_get_aio_context(bs);
-    aio_context_acquire(state->aio_context);
+    aio_context = bdrv_get_aio_context(bs);
+    aio_context_acquire(aio_context);
 
     state->bs = bs;
+
+    /* Paired with .clean() */
     bdrv_drained_begin(bs);
 
     if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_INTERNAL_SNAPSHOT, errp)) {
-        return;
+        goto out;
     }
 
     if (bdrv_is_read_only(bs)) {
         error_setg(errp, "Device '%s' is read only", device);
-        return;
+        goto out;
     }
 
     if (!bdrv_can_snapshot(bs)) {
         error_setg(errp, "Block format '%s' used by device '%s' "
                    "does not support internal snapshots",
                    bs->drv->format_name, device);
-        return;
+        goto out;
     }
 
     if (!strlen(name)) {
         error_setg(errp, "Name is empty");
-        return;
+        goto out;
     }
 
     /* check whether a snapshot with name exist */
@@ -1539,12 +1541,12 @@ static void internal_snapshot_prepare(BlkActionState *common,
                                             &local_err);
     if (local_err) {
         error_propagate(errp, local_err);
-        return;
+        goto out;
     } else if (ret) {
         error_setg(errp,
                    "Snapshot with name '%s' already exists on device '%s'",
                    name, device);
-        return;
+        goto out;
     }
 
     /* 3. take the snapshot */
@@ -1560,11 +1562,14 @@ static void internal_snapshot_prepare(BlkActionState *common,
         error_setg_errno(errp, -ret1,
                          "Failed to create snapshot '%s' on device '%s'",
                          name, device);
-        return;
+        goto out;
     }
 
     /* 4. succeed, mark a snapshot is created */
     state->created = true;
+
+out:
+    aio_context_release(aio_context);
 }
 
 static void internal_snapshot_abort(BlkActionState *common)
@@ -1573,12 +1578,16 @@ static void internal_snapshot_abort(BlkActionState *common)
                              DO_UPCAST(InternalSnapshotState, common, common);
     BlockDriverState *bs = state->bs;
     QEMUSnapshotInfo *sn = &state->sn;
+    AioContext *aio_context;
     Error *local_error = NULL;
 
     if (!state->created) {
         return;
     }
 
+    aio_context = bdrv_get_aio_context(state->bs);
+    aio_context_acquire(aio_context);
+
     if (bdrv_snapshot_delete(bs, sn->id_str, sn->name, &local_error) < 0) {
         error_reportf_err(local_error,
                           "Failed to delete snapshot with id '%s' and "
@@ -1586,19 +1595,26 @@ static void internal_snapshot_abort(BlkActionState *common)
                           sn->id_str, sn->name,
                           bdrv_get_device_name(bs));
     }
+
+    aio_context_release(aio_context);
 }
 
 static void internal_snapshot_clean(BlkActionState *common)
 {
     InternalSnapshotState *state = DO_UPCAST(InternalSnapshotState,
                                              common, common);
+    AioContext *aio_context;
 
-    if (state->aio_context) {
-        if (state->bs) {
-            bdrv_drained_end(state->bs);
-        }
-        aio_context_release(state->aio_context);
+    if (!state->bs) {
+        return;
     }
+
+    aio_context = bdrv_get_aio_context(state->bs);
+    aio_context_acquire(aio_context);
+
+    bdrv_drained_end(state->bs);
+
+    aio_context_release(aio_context);
 }
 
 /* external snapshot private data */
@@ -1606,7 +1622,6 @@ typedef struct ExternalSnapshotState {
     BlkActionState common;
     BlockDriverState *old_bs;
     BlockDriverState *new_bs;
-    AioContext *aio_context;
     bool overlay_appended;
 } ExternalSnapshotState;
 
@@ -1626,6 +1641,7 @@ static void external_snapshot_prepare(BlkActionState *common,
     ExternalSnapshotState *state =
                              DO_UPCAST(ExternalSnapshotState, common, common);
     TransactionAction *action = common->action;
+    AioContext *aio_context;
 
     /* 'blockdev-snapshot' and 'blockdev-snapshot-sync' have similar
      * purpose but a different set of parameters */
@@ -1662,31 +1678,32 @@ static void external_snapshot_prepare(BlkActionState *common,
         return;
     }
 
-    /* Acquire AioContext now so any threads operating on old_bs stop */
-    state->aio_context = bdrv_get_aio_context(state->old_bs);
-    aio_context_acquire(state->aio_context);
+    aio_context = bdrv_get_aio_context(state->old_bs);
+    aio_context_acquire(aio_context);
+
+    /* Paired with .clean() */
     bdrv_drained_begin(state->old_bs);
 
     if (!bdrv_is_inserted(state->old_bs)) {
         error_setg(errp, QERR_DEVICE_HAS_NO_MEDIUM, device);
-        return;
+        goto out;
     }
 
     if (bdrv_op_is_blocked(state->old_bs,
                            BLOCK_OP_TYPE_EXTERNAL_SNAPSHOT, errp)) {
-        return;
+        goto out;
     }
 
     if (!bdrv_is_read_only(state->old_bs)) {
         if (bdrv_flush(state->old_bs)) {
             error_setg(errp, QERR_IO_ERROR);
-            return;
+            goto out;
         }
     }
 
     if (!bdrv_is_first_non_filter(state->old_bs)) {
         error_setg(errp, QERR_FEATURE_DISABLED, "snapshot");
-        return;
+        goto out;
     }
 
     if (action->type == TRANSACTION_ACTION_KIND_BLOCKDEV_SNAPSHOT_SYNC) {
@@ -1698,13 +1715,13 @@ static void external_snapshot_prepare(BlkActionState *common,
 
         if (node_name && !snapshot_node_name) {
             error_setg(errp, "New snapshot node name missing");
-            return;
+            goto out;
         }
 
         if (snapshot_node_name &&
             bdrv_lookup_bs(snapshot_node_name, snapshot_node_name, NULL)) {
             error_setg(errp, "New snapshot node name already in use");
-            return;
+            goto out;
         }
 
         flags = state->old_bs->open_flags;
@@ -1717,7 +1734,7 @@ static void external_snapshot_prepare(BlkActionState *common,
             int64_t size = bdrv_getlength(state->old_bs);
             if (size < 0) {
                 error_setg_errno(errp, -size, "bdrv_getlength failed");
-                return;
+                goto out;
             }
             bdrv_img_create(new_image_file, format,
                             state->old_bs->filename,
@@ -1725,7 +1742,7 @@ static void external_snapshot_prepare(BlkActionState *common,
                             NULL, size, flags, false, &local_err);
             if (local_err) {
                 error_propagate(errp, local_err);
-                return;
+                goto out;
             }
         }
 
@@ -1740,30 +1757,30 @@ static void external_snapshot_prepare(BlkActionState *common,
                               errp);
     /* We will manually add the backing_hd field to the bs later */
     if (!state->new_bs) {
-        return;
+        goto out;
     }
 
     if (bdrv_has_blk(state->new_bs)) {
         error_setg(errp, "The snapshot is already in use");
-        return;
+        goto out;
     }
 
     if (bdrv_op_is_blocked(state->new_bs, BLOCK_OP_TYPE_EXTERNAL_SNAPSHOT,
                            errp)) {
-        return;
+        goto out;
     }
 
     if (state->new_bs->backing != NULL) {
         error_setg(errp, "The snapshot already has a backing image");
-        return;
+        goto out;
     }
 
     if (!state->new_bs->drv->supports_backing) {
         error_setg(errp, "The snapshot does not support backing images");
-        return;
+        goto out;
     }
 
-    bdrv_set_aio_context(state->new_bs, state->aio_context);
+    bdrv_set_aio_context(state->new_bs, aio_context);
 
     /* This removes our old bs and adds the new bs. This is an operation that
      * can fail, so we need to do it in .prepare; undoing it for abort is
@@ -1772,15 +1789,22 @@ static void external_snapshot_prepare(BlkActionState *common,
     bdrv_append(state->new_bs, state->old_bs, &local_err);
     if (local_err) {
         error_propagate(errp, local_err);
-        return;
+        goto out;
     }
     state->overlay_appended = true;
+
+out:
+    aio_context_release(aio_context);
 }
 
 static void external_snapshot_commit(BlkActionState *common)
 {
     ExternalSnapshotState *state =
                              DO_UPCAST(ExternalSnapshotState, common, common);
+    AioContext *aio_context;
+
+    aio_context = bdrv_get_aio_context(state->old_bs);
+    aio_context_acquire(aio_context);
 
     /* We don't need (or want) to use the transactional
      * bdrv_reopen_multiple() across all the entries at once, because we
@@ -1789,6 +1813,8 @@ static void external_snapshot_commit(BlkActionState *common)
         bdrv_reopen(state->old_bs, state->old_bs->open_flags & ~BDRV_O_RDWR,
                     NULL);
     }
+
+    aio_context_release(aio_context);
 }
 
 static void external_snapshot_abort(BlkActionState *common)
@@ -1797,11 +1823,18 @@ static void external_snapshot_abort(BlkActionState *common)
                              DO_UPCAST(ExternalSnapshotState, common, common);
     if (state->new_bs) {
         if (state->overlay_appended) {
+            AioContext *aio_context;
+
+            aio_context = bdrv_get_aio_context(state->old_bs);
+            aio_context_acquire(aio_context);
+
             bdrv_ref(state->old_bs);   /* we can't let bdrv_set_backind_hd()
                                           close state->old_bs; we need it */
             bdrv_set_backing_hd(state->new_bs, NULL, &error_abort);
             bdrv_replace_node(state->new_bs, state->old_bs, &error_abort);
             bdrv_unref(state->old_bs); /* bdrv_replace_node() ref'ed old_bs */
+
+            aio_context_release(aio_context);
         }
     }
 }
@@ -1810,17 +1843,24 @@ static void external_snapshot_clean(BlkActionState *common)
 {
     ExternalSnapshotState *state =
                              DO_UPCAST(ExternalSnapshotState, common, common);
-    if (state->aio_context) {
-        bdrv_drained_end(state->old_bs);
-        aio_context_release(state->aio_context);
-        bdrv_unref(state->new_bs);
+    AioContext *aio_context;
+
+    if (!state->old_bs) {
+        return;
     }
+
+    aio_context = bdrv_get_aio_context(state->old_bs);
+    aio_context_acquire(aio_context);
+
+    bdrv_drained_end(state->old_bs);
+    bdrv_unref(state->new_bs);
+
+    aio_context_release(aio_context);
 }
 
 typedef struct DriveBackupState {
     BlkActionState common;
     BlockDriverState *bs;
-    AioContext *aio_context;
     BlockJob *job;
 } DriveBackupState;
 
@@ -1832,6 +1872,7 @@ static void drive_backup_prepare(BlkActionState *common, Error **errp)
     DriveBackupState *state = DO_UPCAST(DriveBackupState, common, common);
     BlockDriverState *bs;
     DriveBackup *backup;
+    AioContext *aio_context;
     Error *local_err = NULL;
 
     assert(common->action->type == TRANSACTION_ACTION_KIND_DRIVE_BACKUP);
@@ -1842,24 +1883,36 @@ static void drive_backup_prepare(BlkActionState *common, Error **errp)
         return;
     }
 
-    /* AioContext is released in .clean() */
-    state->aio_context = bdrv_get_aio_context(bs);
-    aio_context_acquire(state->aio_context);
+    aio_context = bdrv_get_aio_context(bs);
+    aio_context_acquire(aio_context);
+
+    /* Paired with .clean() */
     bdrv_drained_begin(bs);
+
     state->bs = bs;
 
     state->job = do_drive_backup(backup, common->block_job_txn, &local_err);
     if (local_err) {
         error_propagate(errp, local_err);
-        return;
+        goto out;
     }
+
+out:
+    aio_context_release(aio_context);
 }
 
 static void drive_backup_commit(BlkActionState *common)
 {
     DriveBackupState *state = DO_UPCAST(DriveBackupState, common, common);
+    AioContext *aio_context;
+
+    aio_context = bdrv_get_aio_context(state->bs);
+    aio_context_acquire(aio_context);
+
     assert(state->job);
     block_job_start(state->job);
+
+    aio_context_release(aio_context);
 }
 
 static void drive_backup_abort(BlkActionState *common)
@@ -1867,25 +1920,38 @@ static void drive_backup_abort(BlkActionState *common)
     DriveBackupState *state = DO_UPCAST(DriveBackupState, common, common);
 
     if (state->job) {
+        AioContext *aio_context;
+
+        aio_context = bdrv_get_aio_context(state->bs);
+        aio_context_acquire(aio_context);
+
         block_job_cancel_sync(state->job);
+
+        aio_context_release(aio_context);
     }
 }
 
 static void drive_backup_clean(BlkActionState *common)
 {
     DriveBackupState *state = DO_UPCAST(DriveBackupState, common, common);
+    AioContext *aio_context;
 
-    if (state->aio_context) {
-        bdrv_drained_end(state->bs);
-        aio_context_release(state->aio_context);
+    if (!state->bs) {
+        return;
     }
+
+    aio_context = bdrv_get_aio_context(state->bs);
+    aio_context_acquire(aio_context);
+
+    bdrv_drained_end(state->bs);
+
+    aio_context_release(aio_context);
 }
 
 typedef struct BlockdevBackupState {
     BlkActionState common;
     BlockDriverState *bs;
     BlockJob *job;
-    AioContext *aio_context;
 } BlockdevBackupState;
 
 static BlockJob *do_blockdev_backup(BlockdevBackup *backup, BlockJobTxn *txn,
@@ -1896,6 +1962,7 @@ static void blockdev_backup_prepare(BlkActionState *common, Error **errp)
     BlockdevBackupState *state = DO_UPCAST(BlockdevBackupState, common, common);
     BlockdevBackup *backup;
     BlockDriverState *bs, *target;
+    AioContext *aio_context;
     Error *local_err = NULL;
 
     assert(common->action->type == TRANSACTION_ACTION_KIND_BLOCKDEV_BACKUP);
@@ -1911,29 +1978,39 @@ static void blockdev_backup_prepare(BlkActionState *common, Error **errp)
         return;
     }
 
-    /* AioContext is released in .clean() */
-    state->aio_context = bdrv_get_aio_context(bs);
-    if (state->aio_context != bdrv_get_aio_context(target)) {
-        state->aio_context = NULL;
+    aio_context = bdrv_get_aio_context(bs);
+    if (aio_context != bdrv_get_aio_context(target)) {
         error_setg(errp, "Backup between two IO threads is not implemented");
         return;
     }
-    aio_context_acquire(state->aio_context);
+    aio_context_acquire(aio_context);
     state->bs = bs;
+
+    /* Paired with .clean() */
     bdrv_drained_begin(state->bs);
 
     state->job = do_blockdev_backup(backup, common->block_job_txn, &local_err);
     if (local_err) {
         error_propagate(errp, local_err);
-        return;
+        goto out;
     }
+
+out:
+    aio_context_release(aio_context);
 }
 
 static void blockdev_backup_commit(BlkActionState *common)
 {
     BlockdevBackupState *state = DO_UPCAST(BlockdevBackupState, common, common);
+    AioContext *aio_context;
+
+    aio_context = bdrv_get_aio_context(state->bs);
+    aio_context_acquire(aio_context);
+
     assert(state->job);
     block_job_start(state->job);
+
+    aio_context_release(aio_context);
 }
 
 static void blockdev_backup_abort(BlkActionState *common)
@@ -1941,25 +2018,38 @@ static void blockdev_backup_abort(BlkActionState *common)
     BlockdevBackupState *state = DO_UPCAST(BlockdevBackupState, common, common);
 
     if (state->job) {
+        AioContext *aio_context;
+
+        aio_context = bdrv_get_aio_context(state->bs);
+        aio_context_acquire(aio_context);
+
         block_job_cancel_sync(state->job);
+
+        aio_context_release(aio_context);
     }
 }
 
 static void blockdev_backup_clean(BlkActionState *common)
 {
     BlockdevBackupState *state = DO_UPCAST(BlockdevBackupState, common, common);
+    AioContext *aio_context;
 
-    if (state->aio_context) {
-        bdrv_drained_end(state->bs);
-        aio_context_release(state->aio_context);
+    if (!state->bs) {
+        return;
     }
+
+    aio_context = bdrv_get_aio_context(state->bs);
+    aio_context_acquire(aio_context);
+
+    bdrv_drained_end(state->bs);
+
+    aio_context_release(aio_context);
 }
 
 typedef struct BlockDirtyBitmapState {
     BlkActionState common;
     BdrvDirtyBitmap *bitmap;
     BlockDriverState *bs;
-    AioContext *aio_context;
     HBitmap *backup;
     bool prepared;
 } BlockDirtyBitmapState;
@@ -2038,7 +2128,6 @@ static void block_dirty_bitmap_clear_prepare(BlkActionState *common,
     }
 
     bdrv_clear_dirty_bitmap(state->bitmap, &state->backup);
-    /* AioContext is released in .clean() */
 }
 
 static void block_dirty_bitmap_clear_abort(BlkActionState *common)
@@ -2059,16 +2148,6 @@ static void block_dirty_bitmap_clear_commit(BlkActionState *common)
     hbitmap_free(state->backup);
 }
 
-static void block_dirty_bitmap_clear_clean(BlkActionState *common)
-{
-    BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState,
-                                             common, common);
-
-    if (state->aio_context) {
-        aio_context_release(state->aio_context);
-    }
-}
-
 static void abort_prepare(BlkActionState *common, Error **errp)
 {
     error_setg(errp, "Transaction aborted using Abort action");
@@ -2129,7 +2208,6 @@ static const BlkActionOps actions[] = {
         .prepare = block_dirty_bitmap_clear_prepare,
         .commit = block_dirty_bitmap_clear_commit,
         .abort = block_dirty_bitmap_clear_abort,
-        .clean = block_dirty_bitmap_clear_clean,
     }
 };
 
@@ -4052,6 +4130,47 @@ BlockJobInfoList *qmp_query_block_jobs(Error **errp)
     return head;
 }
 
+void qmp_x_blockdev_set_iothread(const char *node_name, StrOrNull *iothread,
+                                 bool has_force, bool force, Error **errp)
+{
+    AioContext *old_context;
+    AioContext *new_context;
+    BlockDriverState *bs;
+
+    bs = bdrv_find_node(node_name);
+    if (!bs) {
+        error_setg(errp, "Cannot find node %s", node_name);
+        return;
+    }
+
+    /* Protects against accidents. */
+    if (!(has_force && force) && bdrv_has_blk(bs)) {
+        error_setg(errp, "Node %s is associated with a BlockBackend and could "
+                         "be in use (use force=true to override this check)",
+                         node_name);
+        return;
+    }
+
+    if (iothread->type == QTYPE_QSTRING) {
+        IOThread *obj = iothread_by_id(iothread->u.s);
+        if (!obj) {
+            error_setg(errp, "Cannot find iothread %s", iothread->u.s);
+            return;
+        }
+
+        new_context = iothread_get_aio_context(obj);
+    } else {
+        new_context = qemu_get_aio_context();
+    }
+
+    old_context = bdrv_get_aio_context(bs);
+    aio_context_acquire(old_context);
+
+    bdrv_set_aio_context(bs, new_context);
+
+    aio_context_release(old_context);
+}
+
 QemuOptsList qemu_common_drive_opts = {
     .name = "drive",
     .head = QTAILQ_HEAD_INITIALIZER(qemu_common_drive_opts.head),
diff --git a/docs/devel/multiple-iothreads.txt b/docs/devel/multiple-iothreads.txt
index e4d340bbb7fe72656d742087238c5d5ab6d7a313..4f9012d154b94c9726483408c5a2c5aa79a2fc92 100644
--- a/docs/devel/multiple-iothreads.txt
+++ b/docs/devel/multiple-iothreads.txt
@@ -1,4 +1,4 @@
-Copyright (c) 2014 Red Hat Inc.
+Copyright (c) 2014-2017 Red Hat Inc.
 
 This work is licensed under the terms of the GNU GPL, version 2 or later.  See
 the COPYING file in the top-level directory.
@@ -92,8 +92,9 @@ aio_context_acquire()/aio_context_release() for mutual exclusion.  Once the
 context is acquired no other thread can access it or run event loop iterations
 in this AioContext.
 
-aio_context_acquire()/aio_context_release() calls may be nested.  This
-means you can call them if you're not sure whether #2 applies.
+Legacy code sometimes nests aio_context_acquire()/aio_context_release() calls.
+Do not use nesting anymore, it is incompatible with the BDRV_POLL_WHILE() macro
+used in the block layer and can lead to hangs.
 
 There is currently no lock ordering rule if a thread needs to acquire multiple
 AioContexts simultaneously.  Therefore, it is only safe for code holding the
diff --git a/hw/block/block.c b/hw/block/block.c
index 27878d0087a37f3dd12bbb8474dfad30e432005f..b0269c857fdfccd8e11f5e36fba106be4951e926 100644
--- a/hw/block/block.c
+++ b/hw/block/block.c
@@ -51,7 +51,7 @@ void blkconf_blocksizes(BlockConf *conf)
     }
 }
 
-void blkconf_apply_backend_options(BlockConf *conf, bool readonly,
+bool blkconf_apply_backend_options(BlockConf *conf, bool readonly,
                                    bool resizable, Error **errp)
 {
     BlockBackend *blk = conf->blk;
@@ -76,7 +76,7 @@ void blkconf_apply_backend_options(BlockConf *conf, bool readonly,
 
     ret = blk_set_perm(blk, perm, shared_perm, errp);
     if (ret < 0) {
-        return;
+        return false;
     }
 
     switch (conf->wce) {
@@ -99,9 +99,11 @@ void blkconf_apply_backend_options(BlockConf *conf, bool readonly,
 
     blk_set_enable_write_cache(blk, wce);
     blk_set_on_error(blk, rerror, werror);
+
+    return true;
 }
 
-void blkconf_geometry(BlockConf *conf, int *ptrans,
+bool blkconf_geometry(BlockConf *conf, int *ptrans,
                       unsigned cyls_max, unsigned heads_max, unsigned secs_max,
                       Error **errp)
 {
@@ -129,15 +131,16 @@ void blkconf_geometry(BlockConf *conf, int *ptrans,
     if (conf->cyls || conf->heads || conf->secs) {
         if (conf->cyls < 1 || conf->cyls > cyls_max) {
             error_setg(errp, "cyls must be between 1 and %u", cyls_max);
-            return;
+            return false;
         }
         if (conf->heads < 1 || conf->heads > heads_max) {
             error_setg(errp, "heads must be between 1 and %u", heads_max);
-            return;
+            return false;
         }
         if (conf->secs < 1 || conf->secs > secs_max) {
             error_setg(errp, "secs must be between 1 and %u", secs_max);
-            return;
+            return false;
         }
     }
+    return true;
 }
diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c
index 5556f0e64e55019debdb33b89309d65e53b5a57a..f6fc639e88d7c65aed2f6768e7aee3c7ff86f484 100644
--- a/hw/block/dataplane/virtio-blk.c
+++ b/hw/block/dataplane/virtio-blk.c
@@ -76,7 +76,7 @@ static void notify_guest_bh(void *opaque)
 }
 
 /* Context: QEMU global mutex held */
-void virtio_blk_data_plane_create(VirtIODevice *vdev, VirtIOBlkConf *conf,
+bool virtio_blk_data_plane_create(VirtIODevice *vdev, VirtIOBlkConf *conf,
                                   VirtIOBlockDataPlane **dataplane,
                                   Error **errp)
 {
@@ -91,11 +91,11 @@ void virtio_blk_data_plane_create(VirtIODevice *vdev, VirtIOBlkConf *conf,
             error_setg(errp,
                        "device is incompatible with iothread "
                        "(transport does not support notifiers)");
-            return;
+            return false;
         }
         if (!virtio_device_ioeventfd_enabled(vdev)) {
             error_setg(errp, "ioeventfd is required for iothread");
-            return;
+            return false;
         }
 
         /* If dataplane is (re-)enabled while the guest is running there could
@@ -103,12 +103,12 @@ void virtio_blk_data_plane_create(VirtIODevice *vdev, VirtIOBlkConf *conf,
          */
         if (blk_op_is_blocked(conf->conf.blk, BLOCK_OP_TYPE_DATAPLANE, errp)) {
             error_prepend(errp, "cannot start virtio-blk dataplane: ");
-            return;
+            return false;
         }
     }
     /* Don't try if transport does not support notifiers. */
     if (!virtio_device_ioeventfd_enabled(vdev)) {
-        return;
+        return false;
     }
 
     s = g_new0(VirtIOBlockDataPlane, 1);
@@ -126,6 +126,8 @@ void virtio_blk_data_plane_create(VirtIODevice *vdev, VirtIOBlkConf *conf,
     s->batch_notify_vqs = bitmap_new(conf->num_queues);
 
     *dataplane = s;
+
+    return true;
 }
 
 /* Context: QEMU global mutex held */
diff --git a/hw/block/dataplane/virtio-blk.h b/hw/block/dataplane/virtio-blk.h
index db3f47b173adef8e185603c0bb997a29db6c4e3d..5e18bb99aeb8ea8b535178c746fde2c14988dd74 100644
--- a/hw/block/dataplane/virtio-blk.h
+++ b/hw/block/dataplane/virtio-blk.h
@@ -19,7 +19,7 @@
 
 typedef struct VirtIOBlockDataPlane VirtIOBlockDataPlane;
 
-void virtio_blk_data_plane_create(VirtIODevice *vdev, VirtIOBlkConf *conf,
+bool virtio_blk_data_plane_create(VirtIODevice *vdev, VirtIOBlkConf *conf,
                                   VirtIOBlockDataPlane **dataplane,
                                   Error **errp);
 void virtio_blk_data_plane_destroy(VirtIOBlockDataPlane *s);
diff --git a/hw/block/fdc.c b/hw/block/fdc.c
index 67f78ac7021cfa94838559fb2c523181ce8cc01e..7b7dd41296d006657e5429d9a0b0b8ab495d86dd 100644
--- a/hw/block/fdc.c
+++ b/hw/block/fdc.c
@@ -473,16 +473,13 @@ static void fd_revalidate(FDrive *drv)
 static void fd_change_cb(void *opaque, bool load, Error **errp)
 {
     FDrive *drive = opaque;
-    Error *local_err = NULL;
 
     if (!load) {
         blk_set_perm(drive->blk, 0, BLK_PERM_ALL, &error_abort);
     } else {
-        blkconf_apply_backend_options(drive->conf,
-                                      blk_is_read_only(drive->blk), false,
-                                      &local_err);
-        if (local_err) {
-            error_propagate(errp, local_err);
+        if (!blkconf_apply_backend_options(drive->conf,
+                                           blk_is_read_only(drive->blk), false,
+                                           errp)) {
             return;
         }
     }
@@ -522,7 +519,6 @@ static void floppy_drive_realize(DeviceState *qdev, Error **errp)
     FloppyDrive *dev = FLOPPY_DRIVE(qdev);
     FloppyBus *bus = FLOPPY_BUS(qdev->parent_bus);
     FDrive *drive;
-    Error *local_err = NULL;
     int ret;
 
     if (dev->unit == -1) {
@@ -568,10 +564,9 @@ static void floppy_drive_realize(DeviceState *qdev, Error **errp)
     dev->conf.rerror = BLOCKDEV_ON_ERROR_AUTO;
     dev->conf.werror = BLOCKDEV_ON_ERROR_AUTO;
 
-    blkconf_apply_backend_options(&dev->conf, blk_is_read_only(dev->conf.blk),
-                                  false, &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
+    if (!blkconf_apply_backend_options(&dev->conf,
+                                       blk_is_read_only(dev->conf.blk),
+                                       false, errp)) {
         return;
     }
 
diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index 441e21ed1f2e1966c3ff7a6c17cad41a6ace89ad..e529e88e4e2da3bf1603d5ad91459e58a66537a4 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -920,7 +920,7 @@ static const MemoryRegionOps nvme_cmb_ops = {
     },
 };
 
-static int nvme_init(PCIDevice *pci_dev)
+static void nvme_realize(PCIDevice *pci_dev, Error **errp)
 {
     NvmeCtrl *n = NVME(pci_dev);
     NvmeIdCtrl *id = &n->id_ctrl;
@@ -928,27 +928,27 @@ static int nvme_init(PCIDevice *pci_dev)
     int i;
     int64_t bs_size;
     uint8_t *pci_conf;
-    Error *local_err = NULL;
 
     if (!n->conf.blk) {
-        return -1;
+        error_setg(errp, "drive property not set");
+        return;
     }
 
     bs_size = blk_getlength(n->conf.blk);
     if (bs_size < 0) {
-        return -1;
+        error_setg(errp, "could not get backing file size");
+        return;
     }
 
     blkconf_serial(&n->conf, &n->serial);
     if (!n->serial) {
-        return -1;
+        error_setg(errp, "serial property not set");
+        return;
     }
     blkconf_blocksizes(&n->conf);
-    blkconf_apply_backend_options(&n->conf, blk_is_read_only(n->conf.blk),
-                                  false, &local_err);
-    if (local_err) {
-        error_report_err(local_err);
-        return -1;
+    if (!blkconf_apply_backend_options(&n->conf, blk_is_read_only(n->conf.blk),
+                                       false, errp)) {
+        return;
     }
 
     pci_conf = pci_dev->config;
@@ -1046,7 +1046,6 @@ static int nvme_init(PCIDevice *pci_dev)
             cpu_to_le64(n->ns_size >>
                 id_ns->lbaf[NVME_ID_NS_FLBAS_INDEX(ns->id_ns.flbas)].ds);
     }
-    return 0;
 }
 
 static void nvme_exit(PCIDevice *pci_dev)
@@ -1081,7 +1080,7 @@ static void nvme_class_init(ObjectClass *oc, void *data)
     DeviceClass *dc = DEVICE_CLASS(oc);
     PCIDeviceClass *pc = PCI_DEVICE_CLASS(oc);
 
-    pc->init = nvme_init;
+    pc->realize = nvme_realize;
     pc->exit = nvme_exit;
     pc->class_id = PCI_CLASS_STORAGE_EXPRESS;
     pc->vendor_id = PCI_VENDOR_ID_INTEL;
diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
index 05d1440786e29771eea98f0f9fb97f19f7955cfb..b1532e4e9177bc00838f8641b8cb9106fa9df648 100644
--- a/hw/block/virtio-blk.c
+++ b/hw/block/virtio-blk.c
@@ -928,23 +928,34 @@ static void virtio_blk_device_realize(DeviceState *dev, Error **errp)
         error_setg(errp, "num-queues property must be larger than 0");
         return;
     }
+    if (!is_power_of_2(conf->queue_size) ||
+        conf->queue_size > VIRTQUEUE_MAX_SIZE) {
+        error_setg(errp, "invalid queue-size property (%" PRIu16 "), "
+                   "must be a power of 2 (max %d)",
+                   conf->queue_size, VIRTQUEUE_MAX_SIZE);
+        return;
+    }
 
     blkconf_serial(&conf->conf, &conf->serial);
-    blkconf_apply_backend_options(&conf->conf,
-                                  blk_is_read_only(conf->conf.blk), true,
-                                  &err);
-    if (err) {
-        error_propagate(errp, err);
+    if (!blkconf_apply_backend_options(&conf->conf,
+                                       blk_is_read_only(conf->conf.blk), true,
+                                       errp)) {
         return;
     }
     s->original_wce = blk_enable_write_cache(conf->conf.blk);
-    blkconf_geometry(&conf->conf, NULL, 65535, 255, 255, &err);
-    if (err) {
-        error_propagate(errp, err);
+    if (!blkconf_geometry(&conf->conf, NULL, 65535, 255, 255, errp)) {
         return;
     }
+
     blkconf_blocksizes(&conf->conf);
 
+    if (conf->conf.logical_block_size >
+        conf->conf.physical_block_size) {
+        error_setg(errp,
+                   "logical_block_size > physical_block_size not supported");
+        return;
+    }
+
     virtio_init(vdev, "virtio-blk", VIRTIO_ID_BLOCK,
                 sizeof(struct virtio_blk_config));
 
@@ -953,7 +964,7 @@ static void virtio_blk_device_realize(DeviceState *dev, Error **errp)
     s->sector_mask = (s->conf.conf.logical_block_size / BDRV_SECTOR_SIZE) - 1;
 
     for (i = 0; i < conf->num_queues; i++) {
-        virtio_add_queue(vdev, 128, virtio_blk_handle_output);
+        virtio_add_queue(vdev, conf->queue_size, virtio_blk_handle_output);
     }
     virtio_blk_data_plane_create(vdev, conf, &s->dataplane, &err);
     if (err != NULL) {
@@ -1012,6 +1023,7 @@ static Property virtio_blk_properties[] = {
     DEFINE_PROP_BIT("request-merging", VirtIOBlock, conf.request_merging, 0,
                     true),
     DEFINE_PROP_UINT16("num-queues", VirtIOBlock, conf.num_queues, 1),
+    DEFINE_PROP_UINT16("queue-size", VirtIOBlock, conf.queue_size, 128),
     DEFINE_PROP_LINK("iothread", VirtIOBlock, conf.iothread, TYPE_IOTHREAD,
                      IOThread *),
     DEFINE_PROP_END_OF_LIST(),
diff --git a/hw/core/qdev-properties-system.c b/hw/core/qdev-properties-system.c
index ec10da7424c801d72927e70713a16c146b4b4945..1d3ba722faaf285f415a9a5c005f8ca338c65ae5 100644
--- a/hw/core/qdev-properties-system.c
+++ b/hw/core/qdev-properties-system.c
@@ -22,6 +22,7 @@
 #include "qapi/visitor.h"
 #include "chardev/char-fe.h"
 #include "sysemu/iothread.h"
+#include "sysemu/tpm_backend.h"
 
 static void get_pointer(Object *obj, Visitor *v, Property *prop,
                         char *(*print)(void *ptr),
diff --git a/hw/ide/qdev.c b/hw/ide/qdev.c
index a5181b444899940684444b9702c208b15433358e..f395d245927501f848a1b022896ebb5720c9ecc3 100644
--- a/hw/ide/qdev.c
+++ b/hw/ide/qdev.c
@@ -160,7 +160,6 @@ static void ide_dev_initfn(IDEDevice *dev, IDEDriveKind kind, Error **errp)
 {
     IDEBus *bus = DO_UPCAST(IDEBus, qbus, dev->qdev.parent_bus);
     IDEState *s = bus->ifs + dev->unit;
-    Error *err = NULL;
     int ret;
 
     if (!dev->conf.blk) {
@@ -191,16 +190,13 @@ static void ide_dev_initfn(IDEDevice *dev, IDEDriveKind kind, Error **errp)
 
     blkconf_serial(&dev->conf, &dev->serial);
     if (kind != IDE_CD) {
-        blkconf_geometry(&dev->conf, &dev->chs_trans, 65535, 16, 255, &err);
-        if (err) {
-            error_propagate(errp, err);
+        if (!blkconf_geometry(&dev->conf, &dev->chs_trans, 65535, 16, 255,
+                              errp)) {
             return;
         }
     }
-    blkconf_apply_backend_options(&dev->conf, kind == IDE_CD, kind != IDE_CD,
-                                  &err);
-    if (err) {
-        error_propagate(errp, err);
+    if (!blkconf_apply_backend_options(&dev->conf, kind == IDE_CD,
+                                       kind != IDE_CD, errp)) {
         return;
     }
 
diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c
index 12431177a722445818c0ad7c160b48eb262a665f..870d9ae85a7a556f903456874a5e903a1d258128 100644
--- a/hw/scsi/scsi-disk.c
+++ b/hw/scsi/scsi-disk.c
@@ -2332,7 +2332,6 @@ static void scsi_disk_unit_attention_reported(SCSIDevice *dev)
 static void scsi_realize(SCSIDevice *dev, Error **errp)
 {
     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, dev);
-    Error *err = NULL;
 
     if (!s->qdev.conf.blk) {
         error_setg(errp, "drive property not set");
@@ -2356,17 +2355,13 @@ static void scsi_realize(SCSIDevice *dev, Error **errp)
     }
 
     if (dev->type == TYPE_DISK) {
-        blkconf_geometry(&dev->conf, NULL, 65535, 255, 255, &err);
-        if (err) {
-            error_propagate(errp, err);
+        if (!blkconf_geometry(&dev->conf, NULL, 65535, 255, 255, errp)) {
             return;
         }
     }
-    blkconf_apply_backend_options(&dev->conf,
-                                  blk_is_read_only(s->qdev.conf.blk),
-                                  dev->type == TYPE_DISK, &err);
-    if (err) {
-        error_propagate(errp, err);
+    if (!blkconf_apply_backend_options(&dev->conf,
+                                       blk_is_read_only(s->qdev.conf.blk),
+                                       dev->type == TYPE_DISK, errp)) {
         return;
     }
 
diff --git a/hw/usb/dev-storage.c b/hw/usb/dev-storage.c
index 8a61ec94c80536cdf504310ef9429b5f30a3231d..9722ac854c5de875e1548dfba632a08be937734d 100644
--- a/hw/usb/dev-storage.c
+++ b/hw/usb/dev-storage.c
@@ -596,12 +596,11 @@ static void usb_msd_unrealize_storage(USBDevice *dev, Error **errp)
     object_unref(OBJECT(&s->bus));
 }
 
-static void usb_msd_realize_storage(USBDevice *dev, Error **errp)
+static void usb_msd_storage_realize(USBDevice *dev, Error **errp)
 {
     MSDState *s = USB_STORAGE_DEV(dev);
     BlockBackend *blk = s->conf.blk;
     SCSIDevice *scsi_dev;
-    Error *err = NULL;
 
     if (!blk) {
         error_setg(errp, "drive property not set");
@@ -610,9 +609,8 @@ static void usb_msd_realize_storage(USBDevice *dev, Error **errp)
 
     blkconf_serial(&s->conf, &dev->serial);
     blkconf_blocksizes(&s->conf);
-    blkconf_apply_backend_options(&s->conf, blk_is_read_only(blk), true, &err);
-    if (err) {
-        error_propagate(errp, err);
+    if (!blkconf_apply_backend_options(&s->conf, blk_is_read_only(blk), true,
+                                       errp)) {
         return;
     }
 
@@ -636,24 +634,23 @@ static void usb_msd_realize_storage(USBDevice *dev, Error **errp)
                  &usb_msd_scsi_info_storage, NULL);
     scsi_dev = scsi_bus_legacy_add_drive(&s->bus, blk, 0, !!s->removable,
                                          s->conf.bootindex, dev->serial,
-                                         &err);
+                                         errp);
     blk_unref(blk);
     if (!scsi_dev) {
-        error_propagate(errp, err);
         return;
     }
     usb_msd_handle_reset(dev);
     s->scsi_dev = scsi_dev;
 }
 
-static void usb_msd_unrealize_bot(USBDevice *dev, Error **errp)
+static void usb_msd_bot_unrealize(USBDevice *dev, Error **errp)
 {
     MSDState *s = USB_STORAGE_DEV(dev);
 
     object_unref(OBJECT(&s->bus));
 }
 
-static void usb_msd_realize_bot(USBDevice *dev, Error **errp)
+static void usb_msd_bot_realize(USBDevice *dev, Error **errp)
 {
     MSDState *s = USB_STORAGE_DEV(dev);
     DeviceState *d = DEVICE(dev);
@@ -767,12 +764,12 @@ static void usb_msd_class_initfn_common(ObjectClass *klass, void *data)
     dc->vmsd = &vmstate_usb_msd;
 }
 
-static void usb_msd_class_initfn_storage(ObjectClass *klass, void *data)
+static void usb_msd_class_storage_initfn(ObjectClass *klass, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(klass);
     USBDeviceClass *uc = USB_DEVICE_CLASS(klass);
 
-    uc->realize = usb_msd_realize_storage;
+    uc->realize = usb_msd_storage_realize;
     uc->unrealize = usb_msd_unrealize_storage;
     dc->props = msd_properties;
 }
@@ -831,26 +828,26 @@ static void usb_msd_instance_init(Object *obj)
     object_property_set_int(obj, -1, "bootindex", NULL);
 }
 
-static void usb_msd_class_initfn_bot(ObjectClass *klass, void *data)
+static void usb_msd_class_bot_initfn(ObjectClass *klass, void *data)
 {
     USBDeviceClass *uc = USB_DEVICE_CLASS(klass);
 
-    uc->realize = usb_msd_realize_bot;
-    uc->unrealize = usb_msd_unrealize_bot;
+    uc->realize = usb_msd_bot_realize;
+    uc->unrealize = usb_msd_bot_unrealize;
     uc->attached_settable = true;
 }
 
 static const TypeInfo msd_info = {
     .name          = "usb-storage",
     .parent        = TYPE_USB_STORAGE,
-    .class_init    = usb_msd_class_initfn_storage,
+    .class_init    = usb_msd_class_storage_initfn,
     .instance_init = usb_msd_instance_init,
 };
 
 static const TypeInfo bot_info = {
     .name          = "usb-bot",
     .parent        = TYPE_USB_STORAGE,
-    .class_init    = usb_msd_class_initfn_bot,
+    .class_init    = usb_msd_class_bot_initfn,
 };
 
 static void usb_msd_register_types(void)
diff --git a/include/hw/block/block.h b/include/hw/block/block.h
index f3f6e8ef02cefdbdd49bfe893142a2356a96a63d..64b929882953f4bd756d6b19d5068fce3d133678 100644
--- a/include/hw/block/block.h
+++ b/include/hw/block/block.h
@@ -72,11 +72,11 @@ static inline unsigned int get_physical_block_exp(BlockConf *conf)
 /* Configuration helpers */
 
 void blkconf_serial(BlockConf *conf, char **serial);
-void blkconf_geometry(BlockConf *conf, int *trans,
+bool blkconf_geometry(BlockConf *conf, int *trans,
                       unsigned cyls_max, unsigned heads_max, unsigned secs_max,
                       Error **errp);
 void blkconf_blocksizes(BlockConf *conf);
-void blkconf_apply_backend_options(BlockConf *conf, bool readonly,
+bool blkconf_apply_backend_options(BlockConf *conf, bool readonly,
                                    bool resizable, Error **errp);
 
 /* Hard disk geometry */
diff --git a/include/hw/virtio/virtio-blk.h b/include/hw/virtio/virtio-blk.h
index d3c8a6fa8c26aadf75739ad000cff6e611b560cc..5117431d9607b726bd78359687c9022a708d1c91 100644
--- a/include/hw/virtio/virtio-blk.h
+++ b/include/hw/virtio/virtio-blk.h
@@ -39,6 +39,7 @@ struct VirtIOBlkConf
     uint32_t config_wce;
     uint32_t request_merging;
     uint16_t num_queues;
+    uint16_t queue_size;
 };
 
 struct VirtIOBlockDataPlane;
diff --git a/include/qemu/coroutine.h b/include/qemu/coroutine.h
index 9aff9a735e9fb9f724b1b422c93a1bb396b3095e..ce2eb736706be9bbd34dad067b153330f902eb16 100644
--- a/include/qemu/coroutine.h
+++ b/include/qemu/coroutine.h
@@ -261,12 +261,8 @@ void qemu_co_rwlock_unlock(CoRwlock *lock);
 
 /**
  * Yield the coroutine for a given duration
- *
- * Behaves similarly to co_sleep_ns(), but the sleeping coroutine will be
- * resumed when using aio_poll().
  */
-void coroutine_fn co_aio_sleep_ns(AioContext *ctx, QEMUClockType type,
-                                  int64_t ns);
+void coroutine_fn qemu_co_sleep_ns(QEMUClockType type, int64_t ns);
 
 /**
  * Yield until a file descriptor becomes readable
diff --git a/include/sysemu/iothread.h b/include/sysemu/iothread.h
index 110329b2b417027589f24023c481666c21d18c3c..799614ffd272be4db4667d4f7dbc8f9f7e5ef116 100644
--- a/include/sysemu/iothread.h
+++ b/include/sysemu/iothread.h
@@ -29,7 +29,8 @@ typedef struct {
     GOnce once;
     QemuMutex init_done_lock;
     QemuCond init_done_cond;    /* is thread initialization done? */
-    bool stopping;
+    bool stopping;              /* has iothread_stop() been called? */
+    bool running;               /* should iothread_run() continue? */
     int thread_id;
 
     /* AioContext poll parameters */
@@ -42,6 +43,7 @@ typedef struct {
    OBJECT_CHECK(IOThread, obj, TYPE_IOTHREAD)
 
 char *iothread_get_id(IOThread *iothread);
+IOThread *iothread_by_id(const char *id);
 AioContext *iothread_get_aio_context(IOThread *iothread);
 void iothread_stop_all(void);
 GMainContext *iothread_get_g_main_context(IOThread *iothread);
diff --git a/iothread.c b/iothread.c
index 27a42885781aa7e5bcc251be34fbc9440545d989..d8b6c1fb27b9499176da71229d88d2ad91c43265 100644
--- a/iothread.c
+++ b/iothread.c
@@ -55,7 +55,7 @@ static void *iothread_run(void *opaque)
     qemu_cond_signal(&iothread->init_done_cond);
     qemu_mutex_unlock(&iothread->init_done_lock);
 
-    while (!atomic_read(&iothread->stopping)) {
+    while (iothread->running) {
         aio_poll(iothread->ctx, true);
 
         if (atomic_read(&iothread->worker_context)) {
@@ -78,16 +78,25 @@ static void *iothread_run(void *opaque)
     return NULL;
 }
 
+/* Runs in iothread_run() thread */
+static void iothread_stop_bh(void *opaque)
+{
+    IOThread *iothread = opaque;
+
+    iothread->running = false; /* stop iothread_run() */
+
+    if (iothread->main_loop) {
+        g_main_loop_quit(iothread->main_loop);
+    }
+}
+
 void iothread_stop(IOThread *iothread)
 {
     if (!iothread->ctx || iothread->stopping) {
         return;
     }
     iothread->stopping = true;
-    aio_notify(iothread->ctx);
-    if (atomic_read(&iothread->main_loop)) {
-        g_main_loop_quit(iothread->main_loop);
-    }
+    aio_bh_schedule_oneshot(iothread->ctx, iothread_stop_bh, iothread);
     qemu_thread_join(&iothread->thread);
 }
 
@@ -134,6 +143,7 @@ static void iothread_complete(UserCreatable *obj, Error **errp)
     char *name, *thread_name;
 
     iothread->stopping = false;
+    iothread->running = true;
     iothread->thread_id = -1;
     iothread->ctx = aio_context_new(&local_error);
     if (!iothread->ctx) {
@@ -380,3 +390,10 @@ void iothread_destroy(IOThread *iothread)
 {
     object_unparent(OBJECT(iothread));
 }
+
+/* Lookup IOThread by its id.  Only finds user-created objects, not internal
+ * iothread_create() objects. */
+IOThread *iothread_by_id(const char *id)
+{
+    return IOTHREAD(object_resolve_path_type(id, TYPE_IOTHREAD, NULL));
+}
diff --git a/qapi/block-core.json b/qapi/block-core.json
index dd763dcf876630db1375eb42b5f2e50da00d28d5..a8cdbc300bde10ff9b514b51ee79d5a3781ec97b 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -3949,3 +3949,43 @@
   'data' : { 'parent': 'str',
              '*child': 'str',
              '*node': 'str' } }
+
+##
+# @x-blockdev-set-iothread:
+#
+# Move @node and its children into the @iothread.  If @iothread is null then
+# move @node and its children into the main loop.
+#
+# The node must not be attached to a BlockBackend.
+#
+# @node-name: the name of the block driver node
+#
+# @iothread: the name of the IOThread object or null for the main loop
+#
+# @force: true if the node and its children should be moved when a BlockBackend
+#         is already attached
+#
+# Note: this command is experimental and intended for test cases that need
+# control over IOThreads only.
+#
+# Since: 2.12
+#
+# Example:
+#
+# 1. Move a node into an IOThread
+# -> { "execute": "x-blockdev-set-iothread",
+#      "arguments": { "node-name": "disk1",
+#                     "iothread": "iothread0" } }
+# <- { "return": {} }
+#
+# 2. Move a node into the main loop
+# -> { "execute": "x-blockdev-set-iothread",
+#      "arguments": { "node-name": "disk1",
+#                     "iothread": null } }
+# <- { "return": {} }
+#
+##
+{ 'command': 'x-blockdev-set-iothread',
+  'data' : { 'node-name': 'str',
+             'iothread': 'StrOrNull',
+             '*force': 'bool' } }
diff --git a/tests/qemu-iotests/202 b/tests/qemu-iotests/202
new file mode 100755
index 0000000000000000000000000000000000000000..581ca34d79962d84a1e9d259feb1524357ca5a64
--- /dev/null
+++ b/tests/qemu-iotests/202
@@ -0,0 +1,95 @@
+#!/usr/bin/env python
+#
+# Copyright (C) 2017 Red Hat, Inc.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+# Creator/Owner: Stefan Hajnoczi <stefanha@redhat.com>
+#
+# Check that QMP 'transaction' blockdev-snapshot-sync with multiple drives on a
+# single IOThread completes successfully.  This particular command triggered a
+# hang due to recursive AioContext locking and BDRV_POLL_WHILE().  Protect
+# against regressions.
+
+import iotests
+
+iotests.verify_image_format(supported_fmts=['qcow2'])
+iotests.verify_platform(['linux'])
+
+with iotests.FilePath('disk0.img') as disk0_img_path, \
+     iotests.FilePath('disk1.img') as disk1_img_path, \
+     iotests.FilePath('disk0-snap.img') as disk0_snap_img_path, \
+     iotests.FilePath('disk1-snap.img') as disk1_snap_img_path, \
+     iotests.VM() as vm:
+
+    img_size = '10M'
+    iotests.qemu_img_pipe('create', '-f', iotests.imgfmt, disk0_img_path, img_size)
+    iotests.qemu_img_pipe('create', '-f', iotests.imgfmt, disk1_img_path, img_size)
+
+    iotests.log('Launching VM...')
+    vm.launch()
+
+    iotests.log('Adding IOThread...')
+    iotests.log(vm.qmp('object-add',
+                       qom_type='iothread',
+                       id='iothread0'))
+
+    iotests.log('Adding blockdevs...')
+    iotests.log(vm.qmp('blockdev-add',
+                       driver=iotests.imgfmt,
+                       node_name='disk0',
+                       file={
+                           'driver': 'file',
+                           'filename': disk0_img_path,
+                       }))
+    iotests.log(vm.qmp('blockdev-add',
+                       driver=iotests.imgfmt,
+                       node_name='disk1',
+                       file={
+                           'driver': 'file',
+                           'filename': disk1_img_path,
+                       }))
+
+    iotests.log('Setting iothread...')
+    iotests.log(vm.qmp('x-blockdev-set-iothread',
+                       node_name='disk0',
+                       iothread='iothread0'))
+    iotests.log(vm.qmp('x-blockdev-set-iothread',
+                       node_name='disk1',
+                       iothread='iothread0'))
+
+    iotests.log('Creating external snapshots...')
+    iotests.log(vm.qmp(
+                  'transaction',
+                  actions=[
+                      {
+                          'data': {
+                              'node-name': 'disk0',
+                              'snapshot-file': disk0_snap_img_path,
+                              'snapshot-node-name': 'disk0-snap',
+                              'mode': 'absolute-paths',
+                              'format': iotests.imgfmt,
+                          },
+                          'type': 'blockdev-snapshot-sync'
+                      }, {
+                          'data': {
+                              'node-name': 'disk1',
+                              'snapshot-file': disk1_snap_img_path,
+                              'snapshot-node-name': 'disk1-snap',
+                              'mode': 'absolute-paths',
+                              'format': iotests.imgfmt
+                          },
+                          'type': 'blockdev-snapshot-sync'
+                      }
+                  ]))
diff --git a/tests/qemu-iotests/202.out b/tests/qemu-iotests/202.out
new file mode 100644
index 0000000000000000000000000000000000000000..d5ea374e17eeae3aeccb280573fad79271f15603
--- /dev/null
+++ b/tests/qemu-iotests/202.out
@@ -0,0 +1,11 @@
+Launching VM...
+Adding IOThread...
+{u'return': {}}
+Adding blockdevs...
+{u'return': {}}
+{u'return': {}}
+Setting iothread...
+{u'return': {}}
+{u'return': {}}
+Creating external snapshots...
+{u'return': {}}
diff --git a/tests/qemu-iotests/203 b/tests/qemu-iotests/203
new file mode 100755
index 0000000000000000000000000000000000000000..2c811917d84fa718cbe71c0d1b7960713f0deef0
--- /dev/null
+++ b/tests/qemu-iotests/203
@@ -0,0 +1,59 @@
+#!/usr/bin/env python
+#
+# Copyright (C) 2017 Red Hat, Inc.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+# Creator/Owner: Stefan Hajnoczi <stefanha@redhat.com>
+#
+# Check that QMP 'migrate' with multiple drives on a single IOThread completes
+# successfully.  This particular command triggered a hang in the source QEMU
+# process due to recursive AioContext locking in bdrv_invalidate_all() and
+# BDRV_POLL_WHILE().
+
+import iotests
+
+iotests.verify_image_format(supported_fmts=['qcow2'])
+iotests.verify_platform(['linux'])
+
+with iotests.FilePath('disk0.img') as disk0_img_path, \
+     iotests.FilePath('disk1.img') as disk1_img_path, \
+     iotests.VM() as vm:
+
+    img_size = '10M'
+    iotests.qemu_img_pipe('create', '-f', iotests.imgfmt, disk0_img_path, img_size)
+    iotests.qemu_img_pipe('create', '-f', iotests.imgfmt, disk1_img_path, img_size)
+
+    iotests.log('Launching VM...')
+    (vm.add_object('iothread,id=iothread0')
+       .add_drive(disk0_img_path, 'node-name=drive0-node', interface='none')
+       .add_drive(disk1_img_path, 'node-name=drive1-node', interface='none')
+       .launch())
+
+    iotests.log('Setting IOThreads...')
+    iotests.log(vm.qmp('x-blockdev-set-iothread',
+                       node_name='drive0-node', iothread='iothread0',
+                       force=True))
+    iotests.log(vm.qmp('x-blockdev-set-iothread',
+                       node_name='drive1-node', iothread='iothread0',
+                       force=True))
+
+    iotests.log('Starting migration...')
+    iotests.log(vm.qmp('migrate', uri='exec:cat >/dev/null'))
+    while True:
+        vm.get_qmp_event(wait=60.0)
+        result = vm.qmp('query-migrate')
+        status = result.get('return', {}).get('status', None)
+        if status == 'completed':
+            break
diff --git a/tests/qemu-iotests/203.out b/tests/qemu-iotests/203.out
new file mode 100644
index 0000000000000000000000000000000000000000..3f1ff900e47f5117241a1f0f2f7e81cfe807b211
--- /dev/null
+++ b/tests/qemu-iotests/203.out
@@ -0,0 +1,6 @@
+Launching VM...
+Setting IOThreads...
+{u'return': {}}
+{u'return': {}}
+Starting migration...
+{u'return': {}}
diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group
index 3e688678dd9329a2f568cd6320abe9e14d20b204..93d96fb22fdb974e447350a9c10340f357ffa630 100644
--- a/tests/qemu-iotests/group
+++ b/tests/qemu-iotests/group
@@ -197,3 +197,5 @@
 197 rw auto quick
 198 rw auto
 200 rw auto
+202 rw auto quick
+203 rw auto
diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py
index 6f057904a9ae79bbe98a196c1321cd76c7106de6..44477e9295895a752a08b5d60ef79cf2acbc7850 100644
--- a/tests/qemu-iotests/iotests.py
+++ b/tests/qemu-iotests/iotests.py
@@ -197,6 +197,11 @@ def __init__(self, path_suffix=''):
                                  socket_scm_helper=socket_scm_helper)
         self._num_drives = 0
 
+    def add_object(self, opts):
+        self._args.append('-object')
+        self._args.append(opts)
+        return self
+
     def add_device(self, opts):
         self._args.append('-device')
         self._args.append(opts)
diff --git a/util/qemu-coroutine-sleep.c b/util/qemu-coroutine-sleep.c
index 254349cdbb619a08112cf06224045327706c85cf..afb678fbe51ccd5787a32ebdd93bbecade0a4355 100644
--- a/util/qemu-coroutine-sleep.c
+++ b/util/qemu-coroutine-sleep.c
@@ -31,9 +31,9 @@ static void co_sleep_cb(void *opaque)
     aio_co_wake(sleep_cb->co);
 }
 
-void coroutine_fn co_aio_sleep_ns(AioContext *ctx, QEMUClockType type,
-                                  int64_t ns)
+void coroutine_fn qemu_co_sleep_ns(QEMUClockType type, int64_t ns)
 {
+    AioContext *ctx = qemu_get_current_aio_context();
     CoSleepCB sleep_cb = {
         .co = qemu_coroutine_self(),
     };