From a2c4efbb5b968a80eb552757308c2fb2f28157c6 Mon Sep 17 00:00:00 2001 From: Marcel Apfelbaum Date: Sun, 19 Jan 2014 13:07:36 +0100 Subject: [PATCH 11/11] exec: separate sections and nodes per address space RH-Author: Marcel Apfelbaum Message-id: <1390136856-7024-3-git-send-email-marcel.a@redhat.com> Patchwork-id: 56811 O-Subject: [RHEL-7 qemu-kvm PATCH v2 2/2] exec: separate sections and nodes per address space Bugzilla: 1003535 RH-Acked-by: Michael S. Tsirkin RH-Acked-by: Paolo Bonzini RH-Acked-by: Markus Armbruster Every address space has its own nodes and sections, but it uses the same global arrays of nodes/section. This limits the number of devices that can be attached to the guest to 20-30 devices. It happens because: - The sections array is limited to 2^12 entries. - The main memory has at least 100 sections. - Each device address space is actually an alias to main memory, multiplying its number of nodes/sections. Remove the limitation by using separate arrays of nodes and sections for each address space. Closest upstream commit: 53cb28cbfea038f8ad50132dc8a684e638c7d48b Signed-off-by: Marcel Apfelbaum --- v1 -> v2: - The series confilcted with Juan's series: - [RHEL7 qemu-kvm PATCH 00/40] bitmap optmization - Conflicts solved: - AddressSpaceDispatch was moved to exec.c - PhysPageEntry was moved to exec.c - Moved also PhysPageMap to exec.c exec.c | 166 ++++++++++++++++++++++++++++++++++------------------------------- 1 file changed, 86 insertions(+), 80 deletions(-) Signed-off-by: Miroslav Rezanina --- exec.c | 166 +++++++++++++++++++++++++++++++++------------------------------- 1 files changed, 86 insertions(+), 80 deletions(-) diff --git a/exec.c b/exec.c index 01c74cd..ce9310c 100644 --- a/exec.c +++ b/exec.c @@ -91,25 +91,32 @@ struct PhysPageEntry { uint16_t ptr : 15; }; +typedef PhysPageEntry Node[L2_SIZE]; + +typedef struct PhysPageMap { + unsigned sections_nb; + unsigned sections_nb_alloc; + unsigned nodes_nb; + unsigned nodes_nb_alloc; + Node *nodes; + MemoryRegionSection *sections; +} PhysPageMap; + struct AddressSpaceDispatch { /* This is a multi-level map on the physical address space. * The bottom level has pointers to MemoryRegionSections. */ PhysPageEntry phys_map; + PhysPageMap map; MemoryListener listener; + AddressSpace *as; }; -static MemoryRegionSection *phys_sections; -static unsigned phys_sections_nb, phys_sections_nb_alloc; #define PHYS_SECTION_UNASSIGNED 0 #define PHYS_SECTION_NOTDIRTY 1 #define PHYS_SECTION_ROM 2 #define PHYS_SECTION_WATCH 3 -/* Simple allocator for PhysPageEntry nodes */ -static PhysPageEntry (*phys_map_nodes)[L2_SIZE]; -static unsigned phys_map_nodes_nb, phys_map_nodes_nb_alloc; - #define PHYS_MAP_NODE_NIL (((uint16_t)~0) >> 1) static void io_mem_init(void); @@ -121,41 +128,38 @@ static MemoryRegion io_mem_watch; #if !defined(CONFIG_USER_ONLY) -static void phys_map_node_reserve(unsigned nodes) +static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes) { - if (phys_map_nodes_nb + nodes > phys_map_nodes_nb_alloc) { - typedef PhysPageEntry Node[L2_SIZE]; - phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc * 2, 16); - phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc, - phys_map_nodes_nb + nodes); - phys_map_nodes = g_renew(Node, phys_map_nodes, - phys_map_nodes_nb_alloc); + if (map->nodes_nb + nodes > map->nodes_nb_alloc) { + map->nodes_nb_alloc = MAX(map->nodes_nb_alloc * 2, 16); + map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, map->nodes_nb + nodes); + map->nodes = g_renew(Node, map->nodes, map->nodes_nb_alloc); } } -static uint16_t phys_map_node_alloc(void) +static uint16_t phys_map_node_alloc(PhysPageMap *map) { unsigned i; uint16_t ret; - ret = phys_map_nodes_nb++; + ret = map->nodes_nb++; assert(ret != PHYS_MAP_NODE_NIL); - assert(ret != phys_map_nodes_nb_alloc); + assert(ret != map->nodes_nb_alloc); for (i = 0; i < L2_SIZE; ++i) { - phys_map_nodes[ret][i].is_leaf = 0; - phys_map_nodes[ret][i].ptr = PHYS_MAP_NODE_NIL; + map->nodes[ret][i].is_leaf = 0; + map->nodes[ret][i].ptr = PHYS_MAP_NODE_NIL; } return ret; } -static void phys_map_nodes_reset(void) +static void phys_map_nodes_reset(PhysPageMap *map) { - phys_map_nodes_nb = 0; + map->nodes_nb = 0; } -static void phys_page_set_level(PhysPageEntry *lp, hwaddr *index, - hwaddr *nb, uint16_t leaf, +static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp, + hwaddr *index, hwaddr *nb, uint16_t leaf, int level) { PhysPageEntry *p; @@ -163,8 +167,8 @@ static void phys_page_set_level(PhysPageEntry *lp, hwaddr *index, hwaddr step = (hwaddr)1 << (level * L2_BITS); if (!lp->is_leaf && lp->ptr == PHYS_MAP_NODE_NIL) { - lp->ptr = phys_map_node_alloc(); - p = phys_map_nodes[lp->ptr]; + lp->ptr = phys_map_node_alloc(map); + p = map->nodes[lp->ptr]; if (level == 0) { for (i = 0; i < L2_SIZE; i++) { p[i].is_leaf = 1; @@ -172,7 +176,7 @@ static void phys_page_set_level(PhysPageEntry *lp, hwaddr *index, } } } else { - p = phys_map_nodes[lp->ptr]; + p = map->nodes[lp->ptr]; } lp = &p[(*index >> (level * L2_BITS)) & (L2_SIZE - 1)]; @@ -183,7 +187,7 @@ static void phys_page_set_level(PhysPageEntry *lp, hwaddr *index, *index += step; *nb -= step; } else { - phys_page_set_level(lp, index, nb, leaf, level - 1); + phys_page_set_level(map, lp, index, nb, leaf, level - 1); } ++lp; } @@ -194,9 +198,10 @@ static void phys_page_set(AddressSpaceDispatch *d, uint16_t leaf) { /* Wildly overreserve - it doesn't matter much. */ - phys_map_node_reserve(3 * P_L2_LEVELS); + phys_map_node_reserve(&d->map, 3 * P_L2_LEVELS); - phys_page_set_level(&d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1); + phys_page_set_level(&d->map, &d->phys_map, &index, + &nb, leaf, P_L2_LEVELS - 1); } MemoryRegionSection *phys_page_find(AddressSpaceDispatch *d, hwaddr index) @@ -210,13 +215,13 @@ MemoryRegionSection *phys_page_find(AddressSpaceDispatch *d, hwaddr index) if (lp.ptr == PHYS_MAP_NODE_NIL) { goto not_found; } - p = phys_map_nodes[lp.ptr]; + p = d->map.nodes[lp.ptr]; lp = p[(index >> (i * L2_BITS)) & (L2_SIZE - 1)]; } s_index = lp.ptr; not_found: - return &phys_sections[s_index]; + return &d->map.sections[s_index]; } bool memory_region_is_unassigned(MemoryRegion *mr) @@ -657,7 +662,7 @@ hwaddr memory_region_section_get_iotlb(CPUArchState *env, and avoid full address decoding in every device. We can't use the high bits of pd for this because IO_MEM_ROMD uses these as a ram address. */ - iotlb = section - phys_sections; + iotlb = section - address_space_memory.dispatch->map.sections; iotlb += memory_region_section_addr(section, paddr); } @@ -683,13 +688,14 @@ hwaddr memory_region_section_get_iotlb(CPUArchState *env, #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK) typedef struct subpage_t { MemoryRegion iomem; + AddressSpace *as; hwaddr base; uint16_t sub_section[TARGET_PAGE_SIZE]; } subpage_t; static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end, uint16_t section); -static subpage_t *subpage_init(hwaddr base); +static subpage_t *subpage_init(AddressSpace *as, hwaddr base); static void *(*phys_mem_alloc)(size_t size) = qemu_anon_ram_alloc; @@ -703,9 +709,9 @@ void phys_mem_set_alloc(void *(*alloc)(size_t)) phys_mem_alloc = alloc; } -static void destroy_page_desc(uint16_t section_index) +static void destroy_page_desc(PhysPageMap *map, uint16_t section_index) { - MemoryRegionSection *section = &phys_sections[section_index]; + MemoryRegionSection *section = &map->sections[section_index]; MemoryRegion *mr = section->mr; if (mr->subpage) { @@ -715,7 +721,8 @@ static void destroy_page_desc(uint16_t section_index) } } -static void destroy_l2_mapping(PhysPageEntry *lp, unsigned level) +static void destroy_l2_mapping(PhysPageMap *map, PhysPageEntry *lp, + unsigned level) { unsigned i; PhysPageEntry *p; @@ -724,12 +731,12 @@ static void destroy_l2_mapping(PhysPageEntry *lp, unsigned level) return; } - p = phys_map_nodes[lp->ptr]; + p = map->nodes[lp->ptr]; for (i = 0; i < L2_SIZE; ++i) { if (!p[i].is_leaf) { - destroy_l2_mapping(&p[i], level - 1); + destroy_l2_mapping(map, &p[i], level - 1); } else { - destroy_page_desc(p[i].ptr); + destroy_page_desc(map, p[i].ptr); } } lp->is_leaf = 0; @@ -738,24 +745,25 @@ static void destroy_l2_mapping(PhysPageEntry *lp, unsigned level) static void destroy_all_mappings(AddressSpaceDispatch *d) { - destroy_l2_mapping(&d->phys_map, P_L2_LEVELS - 1); - phys_map_nodes_reset(); + destroy_l2_mapping(&d->map, &d->phys_map, P_L2_LEVELS - 1); + phys_map_nodes_reset(&d->map); } -static uint16_t phys_section_add(MemoryRegionSection *section) +static uint16_t phys_section_add(PhysPageMap *map, + MemoryRegionSection *section) { - if (phys_sections_nb == phys_sections_nb_alloc) { - phys_sections_nb_alloc = MAX(phys_sections_nb_alloc * 2, 16); - phys_sections = g_renew(MemoryRegionSection, phys_sections, - phys_sections_nb_alloc); + if (map->sections_nb == map->sections_nb_alloc) { + map->sections_nb_alloc = MAX(map->sections_nb_alloc * 2, 16); + map->sections = g_renew(MemoryRegionSection, map->sections, + map->sections_nb_alloc); } - phys_sections[phys_sections_nb] = *section; - return phys_sections_nb++; + map->sections[map->sections_nb] = *section; + return map->sections_nb++; } -static void phys_sections_clear(void) +static void phys_sections_clear(PhysPageMap *map) { - phys_sections_nb = 0; + map->sections_nb = 0; } static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section) @@ -773,16 +781,16 @@ static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *secti assert(existing->mr->subpage || existing->mr == &io_mem_unassigned); if (!(existing->mr->subpage)) { - subpage = subpage_init(base); + subpage = subpage_init(d->as, base); subsection.mr = &subpage->iomem; phys_page_set(d, base >> TARGET_PAGE_BITS, 1, - phys_section_add(&subsection)); + phys_section_add(&d->map, &subsection)); } else { subpage = container_of(existing->mr, subpage_t, iomem); } start = section->offset_within_address_space & ~TARGET_PAGE_MASK; end = start + section->size - 1; - subpage_register(subpage, start, end, phys_section_add(section)); + subpage_register(subpage, start, end, phys_section_add(&d->map, section)); } @@ -791,7 +799,7 @@ static void register_multipage(AddressSpaceDispatch *d, MemoryRegionSection *sec hwaddr start_addr = section->offset_within_address_space; ram_addr_t size = section->size; hwaddr addr; - uint16_t section_index = phys_section_add(section); + uint16_t section_index = phys_section_add(&d->map, section); assert(size); @@ -1619,7 +1627,7 @@ static uint64_t subpage_read(void *opaque, hwaddr addr, mmio, len, addr, idx); #endif - section = &phys_sections[mmio->sub_section[idx]]; + section = &mmio->as->dispatch->map.sections[mmio->sub_section[idx]]; addr += mmio->base; addr -= section->offset_within_address_space; addr += section->offset_within_region; @@ -1638,7 +1646,7 @@ static void subpage_write(void *opaque, hwaddr addr, __func__, mmio, len, addr, idx, value); #endif - section = &phys_sections[mmio->sub_section[idx]]; + section = &mmio->as->dispatch->map.sections[mmio->sub_section[idx]]; addr += mmio->base; addr -= section->offset_within_address_space; addr += section->offset_within_region; @@ -1696,10 +1704,10 @@ static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end, printf("%s: %p start %08x end %08x idx %08x eidx %08x mem %ld\n", __func__, mmio, start, end, idx, eidx, memory); #endif - if (memory_region_is_ram(phys_sections[section].mr)) { - MemoryRegionSection new_section = phys_sections[section]; + if (memory_region_is_ram(mmio->as->dispatch->map.sections[section].mr)) { + MemoryRegionSection new_section = mmio->as->dispatch->map.sections[section]; new_section.mr = &io_mem_subpage_ram; - section = phys_section_add(&new_section); + section = phys_section_add(&mmio->as->dispatch->map, &new_section); } for (; idx <= eidx; idx++) { mmio->sub_section[idx] = section; @@ -1708,12 +1716,13 @@ static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end, return 0; } -static subpage_t *subpage_init(hwaddr base) +static subpage_t *subpage_init(AddressSpace *as, hwaddr base) { subpage_t *mmio; mmio = g_malloc0(sizeof(subpage_t)); + mmio->as = as; mmio->base = base; memory_region_init_io(&mmio->iomem, &subpage_ops, mmio, "subpage", TARGET_PAGE_SIZE); @@ -1727,7 +1736,7 @@ static subpage_t *subpage_init(hwaddr base) return mmio; } -static uint16_t dummy_section(MemoryRegion *mr) +static uint16_t dummy_section(PhysPageMap *map, MemoryRegion *mr) { MemoryRegionSection section = { .mr = mr, @@ -1736,12 +1745,13 @@ static uint16_t dummy_section(MemoryRegion *mr) .size = UINT64_MAX, }; - return phys_section_add(§ion); + return phys_section_add(map, §ion); } MemoryRegion *iotlb_to_region(hwaddr index) { - return phys_sections[index & ~TARGET_PAGE_MASK].mr; + return address_space_memory.dispatch->map.sections[ + index & ~TARGET_PAGE_MASK].mr; } static void io_mem_init(void) @@ -1761,23 +1771,19 @@ static void io_mem_init(void) static void mem_begin(MemoryListener *listener) { AddressSpaceDispatch *d = container_of(listener, AddressSpaceDispatch, listener); + uint16_t n; destroy_all_mappings(d); d->phys_map.ptr = PHYS_MAP_NODE_NIL; -} - -static void core_begin(MemoryListener *listener) -{ - uint16_t n; - phys_sections_clear(); - n = dummy_section(&io_mem_unassigned); + phys_sections_clear(&d->map); + n = dummy_section(&d->map, &io_mem_unassigned); assert(n == PHYS_SECTION_UNASSIGNED); - n = dummy_section(&io_mem_notdirty); + n = dummy_section(&d->map, &io_mem_notdirty); assert(n == PHYS_SECTION_NOTDIRTY); - n = dummy_section(&io_mem_rom); + n = dummy_section(&d->map, &io_mem_rom); assert(n == PHYS_SECTION_ROM); - n = dummy_section(&io_mem_watch); + n = dummy_section(&d->map, &io_mem_watch); assert(n == PHYS_SECTION_WATCH); } @@ -1822,7 +1828,6 @@ static void io_region_del(MemoryListener *listener, } static MemoryListener core_memory_listener = { - .begin = core_begin, .log_global_start = core_log_global_start, .log_global_stop = core_log_global_stop, .priority = 1, @@ -1840,7 +1845,7 @@ static MemoryListener tcg_memory_listener = { void address_space_init_dispatch(AddressSpace *as) { - AddressSpaceDispatch *d = g_new(AddressSpaceDispatch, 1); + AddressSpaceDispatch *d = g_new0(AddressSpaceDispatch, 1); d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .is_leaf = 0 }; d->listener = (MemoryListener) { @@ -1849,6 +1854,7 @@ void address_space_init_dispatch(AddressSpace *as) .region_nop = mem_add, .priority = 0, }; + d->as = as; as->dispatch = d; memory_listener_register(&d->listener, as); } @@ -1858,7 +1864,7 @@ void address_space_destroy_dispatch(AddressSpace *as) AddressSpaceDispatch *d = as->dispatch; memory_listener_unregister(&d->listener); - destroy_l2_mapping(&d->phys_map, P_L2_LEVELS - 1); + destroy_l2_mapping(&d->map, &d->phys_map, P_L2_LEVELS - 1); g_free(d); as->dispatch = NULL; } @@ -2446,7 +2452,7 @@ void stl_phys_notdirty(hwaddr addr, uint32_t val) if (!memory_region_is_ram(section->mr) || section->readonly) { addr = memory_region_section_addr(section, addr); if (memory_region_is_ram(section->mr)) { - section = &phys_sections[PHYS_SECTION_ROM]; + section = &address_space_memory.dispatch->map.sections[PHYS_SECTION_ROM]; } io_mem_write(section->mr, addr, val, 4); } else { @@ -2479,7 +2485,7 @@ void stq_phys_notdirty(hwaddr addr, uint64_t val) if (!memory_region_is_ram(section->mr) || section->readonly) { addr = memory_region_section_addr(section, addr); if (memory_region_is_ram(section->mr)) { - section = &phys_sections[PHYS_SECTION_ROM]; + section = &address_space_memory.dispatch->map.sections[PHYS_SECTION_ROM]; } #ifdef TARGET_WORDS_BIGENDIAN io_mem_write(section->mr, addr, val >> 32, 4); @@ -2508,7 +2514,7 @@ static inline void stl_phys_internal(hwaddr addr, uint32_t val, if (!memory_region_is_ram(section->mr) || section->readonly) { addr = memory_region_section_addr(section, addr); if (memory_region_is_ram(section->mr)) { - section = &phys_sections[PHYS_SECTION_ROM]; + section = &address_space_memory.dispatch->map.sections[PHYS_SECTION_ROM]; } #if defined(TARGET_WORDS_BIGENDIAN) if (endian == DEVICE_LITTLE_ENDIAN) { @@ -2575,7 +2581,7 @@ static inline void stw_phys_internal(hwaddr addr, uint32_t val, if (!memory_region_is_ram(section->mr) || section->readonly) { addr = memory_region_section_addr(section, addr); if (memory_region_is_ram(section->mr)) { - section = &phys_sections[PHYS_SECTION_ROM]; + section = &address_space_memory.dispatch->map.sections[PHYS_SECTION_ROM]; } #if defined(TARGET_WORDS_BIGENDIAN) if (endian == DEVICE_LITTLE_ENDIAN) { -- 1.7.1