diff --git a/apps/memory.c b/apps/memory.c index 1926db2b..586c9305 100644 --- a/apps/memory.c +++ b/apps/memory.c @@ -291,6 +291,72 @@ static void malloc(void) buddy_dump(); } +/** @brief A memory benchmark for page table walks and TLB misses */ +int bench(void) +{ + // init hardware performance counters + struct pmc_caps* cap = pmc_init(); + if (cap->version == 0x21) { // QEmu returns garbage + kputs("QEMU does not support PMCs.. skipping benchmark!\n"); + return -1; + } + + kprintf("PMC architecural version: %u\n", cap->version); + kprintf("There are %u general purpose PMCs (%u bit wide) available\n", cap->gp_count, cap->gp_width); + kprintf("There are %u fixed function PMCs (%u bit wide) available\n", cap->ff_count, cap->ff_width); + + // setup PMCs + pmc_stop_all(); + pmc_config(0, PMC_EVT_PAGE_WALK_CLKS, PMC_EVTSEL_OS | PMC_EVTSEL_EN, 0, 0); + pmc_config(1, PMC_EVT_PAGE_WALK_COUNT, PMC_EVTSEL_OS | PMC_EVTSEL_EN, 0, 0); + + // allocate space for results + uint64_t *data = kmalloc(ITERATIONS * sizeof(uint64_t)); + if (!data) + return -1; + + // clear caches + tlb_flush(); + flush_cache(); + + int i; + for (i=0; i < ITERATIONS; i++) { + pmc_write(0, 0); + pmc_write(1, 0); + + pmc_start_all(); + +#if 0 + int i = 100; + while (i--) { + tlb_flush(); + page_stats(0); + } +#else + //flush_cache(); + //tlb_flush(); + page_stats(0); +#endif + + pmc_stop_all(); + + uint64_t clks = pmc_read(0); + uint64_t count = pmc_read(1); + + /*kprintf("Number of Page table walks: %lu\n", count); + kprintf("Page table walks clock cycles: %lu\n", clks); + kprintf("Cycles per table walk: %lu.%u\n", clks / count, (1000 * clks / count) % 1000 );*/ + + data[i] = 1000000 * clks / count; + } + + // dump results + for (i=0; i + +// PMC MSR addresses +#define MSR_PERF_GLOBAL_STATUS 0x38E // global counter control facilities +#define MSR_PERF_GLOBAL_CTRL 0x38F +#define MSR_PERF_GLOBAL_OVF_CTRL 0x390 +#define IA32_PERF_CAPABILITIES 0x345 +#define IA32_PERFEVTSEL(i) (0x186 + i) // general purpose PMC configuration register +#define IA32_PMC(i) (0x0C1 + i) // general purpose PMC counter register +#define IA32_A_PMC(i) (0x4C1 + i) // general purpose alias PMC counter register for full width writes +#define MSR_PERF_FIXED_CTR(i) (0x309 + i) // fixed function PMC counter register +#define MSR_PERF_FIXED_CTR_CTRL 0x38D // fixed functiion PMC configuration register + + +/* For Intel Core 2 Duo the MSR_PERF_FIXED_CTRs are mapped as followed: + * MSR_PERF_FIXED_CTR(0) => INST_RETIRED.ANY + * MSR_PERF_FIXED_CTR(1) => CPU_CLK_UNHALTED.CORE + * MSR_PERF_FIXED_CTR(2) => CPU_CLK_UNHALTED.REF */ + + // architecural flags for IA32_PERFEVTSEL +#define PMC_EVTSEL_CMASK 24 // counter mask [31:24] +#define PMC_EVTSEL_UMASK 8 // unit mask [15:8] + +#define PMC_EVTSEL_INC (1 << 23) // invert counter mask +#define PMC_EVTSEL_EN (1 << 22) // enable counters +#define PMC_EVTSEL_ANY (1 << 21) // any thread (from version 3 on) +#define PMC_EVTSEL_INT (1 << 20) // APIC interrupt enable +#define PMC_EVTSEL_PC (1 << 19) // pin control +#define PMC_EVTSEL_E (1 << 18) // edge detect +#define PMC_EVTSEL_OS (1 << 17) // operating system mode +#define PMC_EVTSEL_USR (1 << 16) // user mode + +// Core 2 Duo non-architecural flags for IA32_PERFEVTSEL (bus snooping) +#define PMC_EVTSEL_HITM (1 << 11) // HITM response +#define PMC_EVTSEL_HIT (1 << 9) // HIT response +#define PMV_EVTSEL_CLEAN (1 << 8) // CLEAN response + +// architecutral PMC events CPUID.0AH.EBX[6:0] +#define PMC_EVT_UNHALTED_CORE_CLKS 0x003C // UnHalted Core Cycles +#define PMC_EVT_UNHALTED_REF_CLKS 0x013C // UnHalted Reference Cycles +#define PMC_EVT_INST_RET 0x00C0 // Instruction Retired +#define PMC_EVT_LLC_REF 0x4F2E // LLC Reference +#define PMC_EVT_LLC_MISS 0x412E // LLC Misses +#define PMC_EVT_BRANCH_RET 0x00C4 // Branch Instruction Retired +#define PMC_EVT_BRANCH_MISS_RET 0x00C5 // Branch Miss Retired + +// Core 2 Duo non-architecural PMC events +#define PMC_EVT_DTLB_MISS_ANY 0x0108 // Memory accesses that missed the TLB +#define PMC_EVT_DTLB_MISS_LD 0x0208 // DTLB misses due to load operations +#define PMC_EVT_DTLB_MISS_L0_LD 0x0408 // Level 0: DTLB misses due to load operations +#define PMC_EVT_DTLB_MISS_ST 0x0808 // DTLB misses due to store operations + +#define PMC_EVT_ITLB_FLUSH 0x4082 // ITLB flushes +#define PMC_EVT_ITLB_MISS 0x1282 // ITLB misses (either large or small page) +#define PMC_EVT_ITLB_MISS_RET 0x00C9 // Retired instructions that missed the ITLB +#define PMC_EVT_ITLB_MISS_SMALL 0x0282 // ITLB small page misses +#define PMC_EVT_ITLB_MISS_LARGE 0x1082 // ITLB large page misses + +#define PMC_EVT_PAGE_WALK_COUNT 0x010C // Number of page-walks executed +#define PMC_EVT_PAGE_WALK_CLKS 0x020C // Duration of page-walks in core cycles + +struct pmc { + uint8_t id; + + void (*start)(); + void (*stop)(); + + void (*reset)(); + void (*write)(uint64_t val); + uint64_t (*read)(); +}; + +struct pmc_caps { + /// Architecural PM version (CPUID.0AH:EAX[7:0]) + uint8_t version; + /// Number of available General Purpose PMCs (CPUID.0AH:EAX[15:8]) + uint8_t gp_count; + /// Number of available Fixed Function PMCs (CPUID.0AH.EDX[4:0]) + uint8_t ff_count; + /// Counter bit width of General Purpose PMCs (CPUID.0AH:EAX[23:16]) + uint8_t gp_width; + /// Counter bit width of Fixed Function PMCs (CPUID.0AH.EDX[12:5]) + uint8_t ff_width; + /// Bit mask of supported architecural PMC events (CPUID.0AH.EBX[6:0]) + uint32_t arch_events; + /// IA32_PERF_CAPABILITIES MSR + uint64_t msr; +}; + +/** @brief Queries the CPU about available Performance Monitoring capabilities + * + * @return A pointer to the capabilities struct + **/ +struct pmc_caps* pmc_init(); + +/** @brief Setups and stops the general purpose PMCs + * + * @param i The counter number to configure (positive for gp PMCs, negative for ff PMCs) + * @param event A combined event number including the unit mask (PMC_EVT_*) + * @param flags Flags for the IA32_PERFEVTSEL registers (PMC_EVTSEL_*) + * @param umask A seperate Unitmask ORed with event + * @param cmask A optional counter mask value + * @return + * - 0 on success + * - else failure (invalid counter or flags) + */ +int pmc_config(uint8_t i, uint16_t event, uint32_t flags, uint8_t umask, uint8_t cmask); + +/** @brief Start a single general purpose PMC + * + * @param i The counter number + * @return + * - 0 on success + * - -EINVAL on invalid counter number + */ +inline int pmc_start(uint8_t i); + +/** @brief Stop a single general purpose PMC + * + * @param i The counter number + * @return + * - 0 on success + * - -EINVAL on invalid counter number + */ +inline int pmc_stop(uint8_t i); + +/** @brief Start all PMCs at the same time + * + * @param i The counter number + * @return + * - 0 on success + * - -EINVAL on invalid counter number + */ +inline int pmc_start_all(); + +/** @brief Stop all PMCs at the same time + * + * @param i The counter number + * @return + * - 0 on success + * - -EINVAL on invalid counter number + */ +inline int pmc_stop_all(); + +/** @brief Read a single general purpose PMC + * + * @param i The counter number + * @return The counter value (see struct pmc_caps.gp_width) + */ +inline uint64_t pmc_read(uint8_t i); + +/** @brief Write a single general purpose PMC value + * + * Not all architectures support full width writes to the PMCs. + * If bit 13 (FW_WRITE) in struct pmc_caps.msr is not set the PMC + * is updated with the 32 bit sign extended version of val! + * + * @param i The counter number + * @param val The counter value (see struct pmc_caps.gp_width) + */ +inline int pmc_write(uint8_t i, uint64_t val); + +#endif diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 0b58f3b1..085ebb54 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -1,4 +1,4 @@ -C_source := gdt.c kb.c timer.c irq.c isrs.c idt.c vga.c uart.c multiboot.c apic.c pci.c processor.c +C_source := gdt.c kb.c timer.c irq.c isrs.c idt.c vga.c uart.c multiboot.c apic.c pci.c processor.c pmc.c ASM_source := entry$(BIT).asm string$(BIT).asm MODULE := arch_x86_kernel diff --git a/arch/x86/kernel/pmc.c b/arch/x86/kernel/pmc.c new file mode 100644 index 00000000..c1195b2b --- /dev/null +++ b/arch/x86/kernel/pmc.c @@ -0,0 +1,123 @@ +/* + * Copyright 2013 Steffen Vogel, Chair for Operating Systems, + * RWTH Aachen University + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * This file is part of MetalSVM. + */ + + /** + * @author Steffen Vogel + * @file arch/x86/kernel/pmc.c + * @brief Simple interface to IA32 Performance Monitor Counters + * + * This implementation is in parts specific for Intel Core 2 Duo Processors! + */ + +#include +#include +#include + +static struct pmc_caps caps = { 0 }; + +struct pmc_caps* pmc_init() +{ + if (!caps.version) { + uint32_t a, b, c, d; + cpuid(0x0A, &a, &b, &c, &d); + + caps.version = (a >> 0) & 0xff; + caps.gp_count = (a >> 8) & 0xff; + caps.gp_width = (a >> 16) & 0xff; + caps.ff_count = (d >> 0) & 0x1f; + caps.ff_width = (d >> 5) & 0xff; + caps.arch_events = (b >> 0) & 0x3f; + + // check if IA32_PERF_CAPABILITIES MSR is available + cpuid(0x01, &a, &b, &c, &d); + if (caps.version >= 2) { + if (c & (1 << 15 /* PDCM */)) + caps.msr = rdmsr(IA32_PERF_CAPABILITIES); + } + } + + return ∩︀ +} + +int pmc_config(uint8_t i, uint16_t event, uint32_t flags, uint8_t umask, uint8_t cmask) +{ + if (BUILTIN_EXPECT(i > caps.gp_count, 0)) + return -EINVAL; + + uint64_t evtsel = flags | event; + evtsel |= (cmask << PMC_EVTSEL_CMASK) | (umask << PMC_EVTSEL_UMASK); + + wrmsr(IA32_PERFEVTSEL(i), evtsel); + wrmsr(IA32_PMC(i), 0); + + return 0; +} + +inline int pmc_start(uint8_t i) +{ + if (BUILTIN_EXPECT(i > caps.gp_count, 0)) + return -EINVAL; + + wrmsr(IA32_PERFEVTSEL(i), rdmsr(IA32_PERFEVTSEL(i)) | PMC_EVTSEL_EN); + + return 0; +} + +inline int pmc_stop(uint8_t i) +{ + if (BUILTIN_EXPECT(i > caps.gp_count, 0)) + return -EINVAL; + + wrmsr(IA32_PERFEVTSEL(i), rdmsr(IA32_PERFEVTSEL(i)) & ~PMC_EVTSEL_EN); +} + +inline int pmc_start_all() +{ + if (BUILTIN_EXPECT(caps.version < 2, 0)) + return -EINVAL; + + wrmsr(MSR_PERF_GLOBAL_CTRL, -1L); +} + +inline int pmc_stop_all() +{ + if (BUILTIN_EXPECT(caps.version < 2, 0)) + return -EINVAL; + + wrmsr(MSR_PERF_GLOBAL_CTRL, 0); +} + +inline uint64_t pmc_read(uint8_t i) +{ + if (BUILTIN_EXPECT(i > caps.gp_count, 0)) + return 0; + + return rdmsr(IA32_PMC(i)); +} + +inline int pmc_write(uint8_t i, uint64_t val) +{ + if (BUILTIN_EXPECT(i > caps.gp_count, 0)) + return -EINVAL; + + if (caps.version >= 2 && caps.msr & (1 << 13 /* FW_WRITE */)) + wrmsr(IA32_A_PMC(i), val); + else + wrmsr(IA32_PMC(i), val); +}