Skip to content
Snippets Groups Projects
Commit 99a4706a authored by Maciej S. Szmigiero's avatar Maciej S. Szmigiero
Browse files

Add Hyper-V Dynamic Memory Protocol driver (hv-balloon) hot-add support


One of advantages of using this protocol over ACPI-based PC DIMM hotplug is
that it allows hot-adding memory in much smaller granularity because the
ACPI DIMM slot limit does not apply.

In order to enable this functionality a new memory backend needs to be
created and provided to the driver via the "memdev" parameter.

This can be achieved by, for example, adding
"-object memory-backend-ram,id=mem1,size=32G" to the QEMU command line and
then instantiating the driver with "memdev=mem1" parameter.

The device will try to use multiple memslots to cover the memory backend in
order to reduce the size of metadata for the not-yet-hot-added part of the
memory backend.

Co-developed-by: default avatarDavid Hildenbrand <david@redhat.com>
Acked-by: default avatarDavid Hildenbrand <david@redhat.com>
Signed-off-by: default avatarMaciej S. Szmigiero <maciej.szmigiero@oracle.com>
parent 0d9e8c0b
No related branches found
No related tags found
No related merge requests found
/*
* QEMU Hyper-V Dynamic Memory Protocol driver
*
* Copyright (C) 2020-2023 Oracle and/or its affiliates.
*
* This work is licensed under the terms of the GNU GPL, version 2 or later.
* See the COPYING file in the top-level directory.
*/
#include "hv-balloon-internal.h"
#include "hv-balloon-our_range_memslots.h"
#include "trace.h"
/* OurRange */
static void our_range_init(OurRange *our_range, uint64_t start, uint64_t count)
{
assert(count <= UINT64_MAX - start);
our_range->range.start = start;
our_range->range.count = count;
hvb_page_range_tree_init(&our_range->removed_guest);
hvb_page_range_tree_init(&our_range->removed_both);
/* mark the whole range as unused but for potential use */
our_range->added = 0;
our_range->unusable_tail = 0;
}
static void our_range_destroy(OurRange *our_range)
{
hvb_page_range_tree_destroy(&our_range->removed_guest);
hvb_page_range_tree_destroy(&our_range->removed_both);
}
void hvb_our_range_clear_removed_trees(OurRange *our_range)
{
hvb_page_range_tree_destroy(&our_range->removed_guest);
hvb_page_range_tree_destroy(&our_range->removed_both);
hvb_page_range_tree_init(&our_range->removed_guest);
hvb_page_range_tree_init(&our_range->removed_both);
}
void hvb_our_range_mark_added(OurRange *our_range, uint64_t additional_size)
{
assert(additional_size <= UINT64_MAX - our_range->added);
our_range->added += additional_size;
assert(our_range->added <= UINT64_MAX - our_range->unusable_tail);
assert(our_range->added + our_range->unusable_tail <=
our_range->range.count);
}
/* OurRangeMemslots */
static void our_range_memslots_init_slots(OurRangeMemslots *our_range,
MemoryRegion *backing_mr,
Object *memslot_owner)
{
OurRangeMemslotsSlots *memslots = &our_range->slots;
unsigned int idx;
uint64_t memslot_offset;
assert(memslots->count > 0);
memslots->slots = g_new0(MemoryRegion, memslots->count);
/* Initialize our memslots, but don't map them yet. */
assert(memslots->size_each > 0);
for (idx = 0, memslot_offset = 0; idx < memslots->count;
idx++, memslot_offset += memslots->size_each) {
uint64_t memslot_size;
g_autofree char *name = NULL;
/* The size of the last memslot might be smaller. */
if (idx == memslots->count - 1) {
uint64_t region_size;
assert(our_range->mr);
region_size = memory_region_size(our_range->mr);
memslot_size = region_size - memslot_offset;
} else {
memslot_size = memslots->size_each;
}
name = g_strdup_printf("memslot-%u", idx);
memory_region_init_alias(&memslots->slots[idx], memslot_owner, name,
backing_mr, memslot_offset, memslot_size);
/*
* We want to be able to atomically and efficiently activate/deactivate
* individual memslots without affecting adjacent memslots in memory
* notifiers.
*/
memory_region_set_unmergeable(&memslots->slots[idx], true);
}
memslots->mapped_count = 0;
}
OurRangeMemslots *hvb_our_range_memslots_new(uint64_t addr,
MemoryRegion *parent_mr,
MemoryRegion *backing_mr,
Object *memslot_owner,
unsigned int memslot_count,
uint64_t memslot_size)
{
OurRangeMemslots *our_range;
our_range = g_malloc(sizeof(*our_range));
our_range_init(&our_range->range,
addr / HV_BALLOON_PAGE_SIZE,
memory_region_size(parent_mr) / HV_BALLOON_PAGE_SIZE);
our_range->slots.size_each = memslot_size;
our_range->slots.count = memslot_count;
our_range->mr = parent_mr;
our_range_memslots_init_slots(our_range, backing_mr, memslot_owner);
return our_range;
}
static void our_range_memslots_free_memslots(OurRangeMemslots *our_range)
{
OurRangeMemslotsSlots *memslots = &our_range->slots;
unsigned int idx;
uint64_t offset;
memory_region_transaction_begin();
for (idx = 0, offset = 0; idx < memslots->mapped_count;
idx++, offset += memslots->size_each) {
trace_hv_balloon_unmap_slot(idx, memslots->count, offset);
assert(memory_region_is_mapped(&memslots->slots[idx]));
memory_region_del_subregion(our_range->mr, &memslots->slots[idx]);
}
memory_region_transaction_commit();
for (idx = 0; idx < memslots->count; idx++) {
object_unparent(OBJECT(&memslots->slots[idx]));
}
g_clear_pointer(&our_range->slots.slots, g_free);
}
void hvb_our_range_memslots_free(OurRangeMemslots *our_range)
{
OurRangeMemslotsSlots *memslots = &our_range->slots;
MemoryRegion *hostmem_mr;
RAMBlock *rb;
assert(our_range->slots.count > 0);
assert(our_range->slots.slots);
hostmem_mr = memslots->slots[0].alias;
rb = hostmem_mr->ram_block;
ram_block_discard_range(rb, 0, qemu_ram_get_used_length(rb));
our_range_memslots_free_memslots(our_range);
our_range_destroy(&our_range->range);
g_free(our_range);
}
void hvb_our_range_memslots_ensure_mapped_additional(OurRangeMemslots *our_range,
uint64_t additional_map_size)
{
OurRangeMemslotsSlots *memslots = &our_range->slots;
uint64_t total_map_size;
unsigned int idx;
uint64_t offset;
total_map_size = (our_range->range.added + additional_map_size) *
HV_BALLOON_PAGE_SIZE;
idx = memslots->mapped_count;
assert(memslots->size_each > 0);
offset = idx * memslots->size_each;
/*
* Activate all memslots covered by the newly added region in a single
* transaction.
*/
memory_region_transaction_begin();
for ( ; idx < memslots->count;
idx++, offset += memslots->size_each) {
/*
* If this memslot starts beyond or at the end of the range to map so
* does every next one.
*/
if (offset >= total_map_size) {
break;
}
/*
* Instead of enabling/disabling memslot, we add/remove them. This
* should make address space updates faster, because we don't have to
* loop over many disabled subregions.
*/
trace_hv_balloon_map_slot(idx, memslots->count, offset);
assert(!memory_region_is_mapped(&memslots->slots[idx]));
memory_region_add_subregion(our_range->mr, offset,
&memslots->slots[idx]);
memslots->mapped_count++;
}
memory_region_transaction_commit();
}
/*
* QEMU Hyper-V Dynamic Memory Protocol driver
*
* Copyright (C) 2020-2023 Oracle and/or its affiliates.
*
* This work is licensed under the terms of the GNU GPL, version 2 or later.
* See the COPYING file in the top-level directory.
*/
#ifndef HW_HYPERV_HV_BALLOON_OUR_RANGE_MEMSLOTS_H
#define HW_HYPERV_HV_BALLOON_OUR_RANGE_MEMSLOTS_H
#include "qemu/osdep.h"
#include "exec/memory.h"
#include "qom/object.h"
#include "hv-balloon-page_range_tree.h"
/* OurRange */
#define OUR_RANGE(ptr) ((OurRange *)(ptr))
/* "our range" means the memory range owned by this driver (for hot-adding) */
typedef struct OurRange {
PageRange range;
/* How many pages were hot-added to the guest */
uint64_t added;
/* Pages at the end not currently usable */
uint64_t unusable_tail;
/* Memory removed from the guest */
PageRangeTree removed_guest, removed_both;
} OurRange;
static inline uint64_t our_range_get_remaining_start(OurRange *our_range)
{
return our_range->range.start + our_range->added;
}
static inline uint64_t our_range_get_remaining_size(OurRange *our_range)
{
return our_range->range.count - our_range->added - our_range->unusable_tail;
}
void hvb_our_range_mark_added(OurRange *our_range, uint64_t additional_size);
static inline void our_range_mark_remaining_unusable(OurRange *our_range)
{
our_range->unusable_tail = our_range->range.count - our_range->added;
}
static inline PageRangeTree our_range_get_removed_tree(OurRange *our_range,
bool both)
{
if (both) {
return our_range->removed_both;
} else {
return our_range->removed_guest;
}
}
static inline bool our_range_is_removed_tree_empty(OurRange *our_range,
bool both)
{
if (both) {
return page_range_tree_is_empty(our_range->removed_both);
} else {
return page_range_tree_is_empty(our_range->removed_guest);
}
}
void hvb_our_range_clear_removed_trees(OurRange *our_range);
/* OurRangeMemslots */
typedef struct OurRangeMemslotsSlots {
/* Nominal size of each memslot (the last one might be smaller) */
uint64_t size_each;
/* Slots array and its element count */
MemoryRegion *slots;
unsigned int count;
/* How many slots are currently mapped */
unsigned int mapped_count;
} OurRangeMemslotsSlots;
typedef struct OurRangeMemslots {
OurRange range;
/* Memslots covering our range */
OurRangeMemslotsSlots slots;
MemoryRegion *mr;
} OurRangeMemslots;
OurRangeMemslots *hvb_our_range_memslots_new(uint64_t addr,
MemoryRegion *parent_mr,
MemoryRegion *backing_mr,
Object *memslot_owner,
unsigned int memslot_count,
uint64_t memslot_size);
void hvb_our_range_memslots_free(OurRangeMemslots *our_range);
G_DEFINE_AUTOPTR_CLEANUP_FUNC(OurRangeMemslots, hvb_our_range_memslots_free)
void hvb_our_range_memslots_ensure_mapped_additional(OurRangeMemslots *our_range,
uint64_t additional_map_size);
#endif
This diff is collapsed.
......@@ -2,4 +2,4 @@ specific_ss.add(when: 'CONFIG_HYPERV', if_true: files('hyperv.c'))
specific_ss.add(when: 'CONFIG_HYPERV_TESTDEV', if_true: files('hyperv_testdev.c'))
specific_ss.add(when: 'CONFIG_VMBUS', if_true: files('vmbus.c'))
specific_ss.add(when: 'CONFIG_SYNDBG', if_true: files('syndbg.c'))
specific_ss.add(when: 'CONFIG_HV_BALLOON', if_true: files('hv-balloon.c', 'hv-balloon-page_range_tree.c'))
specific_ss.add(when: 'CONFIG_HV_BALLOON', if_true: files('hv-balloon.c', 'hv-balloon-page_range_tree.c', 'hv-balloon-our_range_memslots.c'))
......@@ -23,9 +23,14 @@ hv_balloon_incoming_version(uint16_t major, uint16_t minor) "incoming proto vers
hv_balloon_incoming_caps(uint32_t caps) "incoming caps 0x%x"
hv_balloon_outgoing_unballoon(uint32_t trans_id, uint64_t count, uint64_t start, uint64_t rempages) "posting unballoon %"PRIu32" for %"PRIu64" @ 0x%"PRIx64", remaining %"PRIu64
hv_balloon_incoming_unballoon(uint32_t trans_id) "incoming unballoon response %"PRIu32
hv_balloon_outgoing_hot_add(uint32_t trans_id, uint64_t count, uint64_t start) "posting hot add %"PRIu32" for %"PRIu64" @ 0x%"PRIx64
hv_balloon_incoming_hot_add(uint32_t trans_id, uint32_t result, uint32_t count) "incoming hot add response %"PRIu32", result %"PRIu32", count %"PRIu32
hv_balloon_outgoing_balloon(uint32_t trans_id, uint64_t count, uint64_t rempages) "posting balloon %"PRIu32" for %"PRIu64", remaining %"PRIu64
hv_balloon_incoming_balloon(uint32_t trans_id, uint32_t range_count, uint32_t more_pages) "incoming balloon response %"PRIu32", ranges %"PRIu32", more %"PRIu32
hv_balloon_our_range_add(uint64_t count, uint64_t start) "adding our range %"PRIu64" @ 0x%"PRIx64
hv_balloon_remove_response(uint64_t count, uint64_t start, unsigned int both) "processing remove response range %"PRIu64" @ 0x%"PRIx64", both %u"
hv_balloon_remove_response_hole(uint64_t counthole, uint64_t starthole, uint64_t countrange, uint64_t startrange, uint64_t starthpr, unsigned int both) "response range hole %"PRIu64" @ 0x%"PRIx64" from range %"PRIu64" @ 0x%"PRIx64", before our start 0x%"PRIx64", both %u"
hv_balloon_remove_response_common(uint64_t countcommon, uint64_t startcommon, uint64_t countrange, uint64_t startrange, uint64_t counthpr, uint64_t starthpr, uint64_t removed, unsigned int both) "response common range %"PRIu64" @ 0x%"PRIx64" from range %"PRIu64" @ 0x%"PRIx64" with our %"PRIu64" @ 0x%"PRIx64", removed %"PRIu64", both %u"
hv_balloon_remove_response_remainder(uint64_t count, uint64_t start, unsigned int both) "remove response remaining range %"PRIu64" @ 0x%"PRIx64", both %u"
hv_balloon_map_slot(unsigned int idx, unsigned int total_slots, uint64_t offset) "mapping memslot %u / %u @ 0x%"PRIx64
hv_balloon_unmap_slot(unsigned int idx, unsigned int total_slots, uint64_t offset) "unmapping memslot %u / %u @ 0x%"PRIx64
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment