implemented Performance Monitoring Counters and added a simple test benchmark
This commit is contained in:
parent
c70c488336
commit
a755ac5d5e
4 changed files with 386 additions and 1 deletions
|
@ -291,6 +291,72 @@ static void malloc(void)
|
|||
buddy_dump();
|
||||
}
|
||||
|
||||
/** @brief A memory benchmark for page table walks and TLB misses */
|
||||
int bench(void)
|
||||
{
|
||||
// init hardware performance counters
|
||||
struct pmc_caps* cap = pmc_init();
|
||||
if (cap->version == 0x21) { // QEmu returns garbage
|
||||
kputs("QEMU does not support PMCs.. skipping benchmark!\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
kprintf("PMC architecural version: %u\n", cap->version);
|
||||
kprintf("There are %u general purpose PMCs (%u bit wide) available\n", cap->gp_count, cap->gp_width);
|
||||
kprintf("There are %u fixed function PMCs (%u bit wide) available\n", cap->ff_count, cap->ff_width);
|
||||
|
||||
// setup PMCs
|
||||
pmc_stop_all();
|
||||
pmc_config(0, PMC_EVT_PAGE_WALK_CLKS, PMC_EVTSEL_OS | PMC_EVTSEL_EN, 0, 0);
|
||||
pmc_config(1, PMC_EVT_PAGE_WALK_COUNT, PMC_EVTSEL_OS | PMC_EVTSEL_EN, 0, 0);
|
||||
|
||||
// allocate space for results
|
||||
uint64_t *data = kmalloc(ITERATIONS * sizeof(uint64_t));
|
||||
if (!data)
|
||||
return -1;
|
||||
|
||||
// clear caches
|
||||
tlb_flush();
|
||||
flush_cache();
|
||||
|
||||
int i;
|
||||
for (i=0; i < ITERATIONS; i++) {
|
||||
pmc_write(0, 0);
|
||||
pmc_write(1, 0);
|
||||
|
||||
pmc_start_all();
|
||||
|
||||
#if 0
|
||||
int i = 100;
|
||||
while (i--) {
|
||||
tlb_flush();
|
||||
page_stats(0);
|
||||
}
|
||||
#else
|
||||
//flush_cache();
|
||||
//tlb_flush();
|
||||
page_stats(0);
|
||||
#endif
|
||||
|
||||
pmc_stop_all();
|
||||
|
||||
uint64_t clks = pmc_read(0);
|
||||
uint64_t count = pmc_read(1);
|
||||
|
||||
/*kprintf("Number of Page table walks: %lu\n", count);
|
||||
kprintf("Page table walks clock cycles: %lu\n", clks);
|
||||
kprintf("Cycles per table walk: %lu.%u\n", clks / count, (1000 * clks / count) % 1000 );*/
|
||||
|
||||
data[i] = 1000000 * clks / count;
|
||||
}
|
||||
|
||||
// dump results
|
||||
for (i=0; i<ITERATIONS; i++)
|
||||
kprintf("%u\t%lu\n", i, data[i]);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/** @brief This is a simple procedure to test memory management subsystem */
|
||||
int memory(void* arg)
|
||||
{
|
||||
|
@ -313,6 +379,9 @@ int memory(void* arg)
|
|||
wait(&ret);
|
||||
test(!ret, "userspace task returned with code = %d", ret);
|
||||
|
||||
kprintf("======== BENCH: memory and TLB benchmark started...\n");
|
||||
bench();
|
||||
|
||||
kprintf("======== All tests finished successfull...\n");
|
||||
|
||||
return 0;
|
||||
|
|
193
arch/x86/include/asm/pmc.h
Normal file
193
arch/x86/include/asm/pmc.h
Normal file
|
@ -0,0 +1,193 @@
|
|||
/*
|
||||
* Copyright 2013 Steffen Vogel, Chair for Operating Systems,
|
||||
* RWTH Aachen University
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
* This file is part of MetalSVM.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @author Steffen Vogel
|
||||
* @file arch/x86/include/pmc.h
|
||||
* @brief Simple interface to IA32 Performance Monitor Counters
|
||||
*
|
||||
* This implementation is in parts specific for Intel Core 2 Duo Processors!
|
||||
*/
|
||||
|
||||
#ifndef _ARCH_PMC_H_
|
||||
#define _ARCH_PMC_H_
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
// PMC MSR addresses
|
||||
#define MSR_PERF_GLOBAL_STATUS 0x38E // global counter control facilities
|
||||
#define MSR_PERF_GLOBAL_CTRL 0x38F
|
||||
#define MSR_PERF_GLOBAL_OVF_CTRL 0x390
|
||||
#define IA32_PERF_CAPABILITIES 0x345
|
||||
#define IA32_PERFEVTSEL(i) (0x186 + i) // general purpose PMC configuration register
|
||||
#define IA32_PMC(i) (0x0C1 + i) // general purpose PMC counter register
|
||||
#define IA32_A_PMC(i) (0x4C1 + i) // general purpose alias PMC counter register for full width writes
|
||||
#define MSR_PERF_FIXED_CTR(i) (0x309 + i) // fixed function PMC counter register
|
||||
#define MSR_PERF_FIXED_CTR_CTRL 0x38D // fixed functiion PMC configuration register
|
||||
|
||||
|
||||
/* For Intel Core 2 Duo the MSR_PERF_FIXED_CTRs are mapped as followed:
|
||||
* MSR_PERF_FIXED_CTR(0) => INST_RETIRED.ANY
|
||||
* MSR_PERF_FIXED_CTR(1) => CPU_CLK_UNHALTED.CORE
|
||||
* MSR_PERF_FIXED_CTR(2) => CPU_CLK_UNHALTED.REF */
|
||||
|
||||
// architecural flags for IA32_PERFEVTSEL
|
||||
#define PMC_EVTSEL_CMASK 24 // counter mask [31:24]
|
||||
#define PMC_EVTSEL_UMASK 8 // unit mask [15:8]
|
||||
|
||||
#define PMC_EVTSEL_INC (1 << 23) // invert counter mask
|
||||
#define PMC_EVTSEL_EN (1 << 22) // enable counters
|
||||
#define PMC_EVTSEL_ANY (1 << 21) // any thread (from version 3 on)
|
||||
#define PMC_EVTSEL_INT (1 << 20) // APIC interrupt enable
|
||||
#define PMC_EVTSEL_PC (1 << 19) // pin control
|
||||
#define PMC_EVTSEL_E (1 << 18) // edge detect
|
||||
#define PMC_EVTSEL_OS (1 << 17) // operating system mode
|
||||
#define PMC_EVTSEL_USR (1 << 16) // user mode
|
||||
|
||||
// Core 2 Duo non-architecural flags for IA32_PERFEVTSEL (bus snooping)
|
||||
#define PMC_EVTSEL_HITM (1 << 11) // HITM response
|
||||
#define PMC_EVTSEL_HIT (1 << 9) // HIT response
|
||||
#define PMV_EVTSEL_CLEAN (1 << 8) // CLEAN response
|
||||
|
||||
// architecutral PMC events CPUID.0AH.EBX[6:0]
|
||||
#define PMC_EVT_UNHALTED_CORE_CLKS 0x003C // UnHalted Core Cycles
|
||||
#define PMC_EVT_UNHALTED_REF_CLKS 0x013C // UnHalted Reference Cycles
|
||||
#define PMC_EVT_INST_RET 0x00C0 // Instruction Retired
|
||||
#define PMC_EVT_LLC_REF 0x4F2E // LLC Reference
|
||||
#define PMC_EVT_LLC_MISS 0x412E // LLC Misses
|
||||
#define PMC_EVT_BRANCH_RET 0x00C4 // Branch Instruction Retired
|
||||
#define PMC_EVT_BRANCH_MISS_RET 0x00C5 // Branch Miss Retired
|
||||
|
||||
// Core 2 Duo non-architecural PMC events
|
||||
#define PMC_EVT_DTLB_MISS_ANY 0x0108 // Memory accesses that missed the TLB
|
||||
#define PMC_EVT_DTLB_MISS_LD 0x0208 // DTLB misses due to load operations
|
||||
#define PMC_EVT_DTLB_MISS_L0_LD 0x0408 // Level 0: DTLB misses due to load operations
|
||||
#define PMC_EVT_DTLB_MISS_ST 0x0808 // DTLB misses due to store operations
|
||||
|
||||
#define PMC_EVT_ITLB_FLUSH 0x4082 // ITLB flushes
|
||||
#define PMC_EVT_ITLB_MISS 0x1282 // ITLB misses (either large or small page)
|
||||
#define PMC_EVT_ITLB_MISS_RET 0x00C9 // Retired instructions that missed the ITLB
|
||||
#define PMC_EVT_ITLB_MISS_SMALL 0x0282 // ITLB small page misses
|
||||
#define PMC_EVT_ITLB_MISS_LARGE 0x1082 // ITLB large page misses
|
||||
|
||||
#define PMC_EVT_PAGE_WALK_COUNT 0x010C // Number of page-walks executed
|
||||
#define PMC_EVT_PAGE_WALK_CLKS 0x020C // Duration of page-walks in core cycles
|
||||
|
||||
struct pmc {
|
||||
uint8_t id;
|
||||
|
||||
void (*start)();
|
||||
void (*stop)();
|
||||
|
||||
void (*reset)();
|
||||
void (*write)(uint64_t val);
|
||||
uint64_t (*read)();
|
||||
};
|
||||
|
||||
struct pmc_caps {
|
||||
/// Architecural PM version (CPUID.0AH:EAX[7:0])
|
||||
uint8_t version;
|
||||
/// Number of available General Purpose PMCs (CPUID.0AH:EAX[15:8])
|
||||
uint8_t gp_count;
|
||||
/// Number of available Fixed Function PMCs (CPUID.0AH.EDX[4:0])
|
||||
uint8_t ff_count;
|
||||
/// Counter bit width of General Purpose PMCs (CPUID.0AH:EAX[23:16])
|
||||
uint8_t gp_width;
|
||||
/// Counter bit width of Fixed Function PMCs (CPUID.0AH.EDX[12:5])
|
||||
uint8_t ff_width;
|
||||
/// Bit mask of supported architecural PMC events (CPUID.0AH.EBX[6:0])
|
||||
uint32_t arch_events;
|
||||
/// IA32_PERF_CAPABILITIES MSR
|
||||
uint64_t msr;
|
||||
};
|
||||
|
||||
/** @brief Queries the CPU about available Performance Monitoring capabilities
|
||||
*
|
||||
* @return A pointer to the capabilities struct
|
||||
**/
|
||||
struct pmc_caps* pmc_init();
|
||||
|
||||
/** @brief Setups and stops the general purpose PMCs
|
||||
*
|
||||
* @param i The counter number to configure (positive for gp PMCs, negative for ff PMCs)
|
||||
* @param event A combined event number including the unit mask (PMC_EVT_*)
|
||||
* @param flags Flags for the IA32_PERFEVTSEL registers (PMC_EVTSEL_*)
|
||||
* @param umask A seperate Unitmask ORed with event
|
||||
* @param cmask A optional counter mask value
|
||||
* @return
|
||||
* - 0 on success
|
||||
* - else failure (invalid counter or flags)
|
||||
*/
|
||||
int pmc_config(uint8_t i, uint16_t event, uint32_t flags, uint8_t umask, uint8_t cmask);
|
||||
|
||||
/** @brief Start a single general purpose PMC
|
||||
*
|
||||
* @param i The counter number
|
||||
* @return
|
||||
* - 0 on success
|
||||
* - -EINVAL on invalid counter number
|
||||
*/
|
||||
inline int pmc_start(uint8_t i);
|
||||
|
||||
/** @brief Stop a single general purpose PMC
|
||||
*
|
||||
* @param i The counter number
|
||||
* @return
|
||||
* - 0 on success
|
||||
* - -EINVAL on invalid counter number
|
||||
*/
|
||||
inline int pmc_stop(uint8_t i);
|
||||
|
||||
/** @brief Start all PMCs at the same time
|
||||
*
|
||||
* @param i The counter number
|
||||
* @return
|
||||
* - 0 on success
|
||||
* - -EINVAL on invalid counter number
|
||||
*/
|
||||
inline int pmc_start_all();
|
||||
|
||||
/** @brief Stop all PMCs at the same time
|
||||
*
|
||||
* @param i The counter number
|
||||
* @return
|
||||
* - 0 on success
|
||||
* - -EINVAL on invalid counter number
|
||||
*/
|
||||
inline int pmc_stop_all();
|
||||
|
||||
/** @brief Read a single general purpose PMC
|
||||
*
|
||||
* @param i The counter number
|
||||
* @return The counter value (see struct pmc_caps.gp_width)
|
||||
*/
|
||||
inline uint64_t pmc_read(uint8_t i);
|
||||
|
||||
/** @brief Write a single general purpose PMC value
|
||||
*
|
||||
* Not all architectures support full width writes to the PMCs.
|
||||
* If bit 13 (FW_WRITE) in struct pmc_caps.msr is not set the PMC
|
||||
* is updated with the 32 bit sign extended version of val!
|
||||
*
|
||||
* @param i The counter number
|
||||
* @param val The counter value (see struct pmc_caps.gp_width)
|
||||
*/
|
||||
inline int pmc_write(uint8_t i, uint64_t val);
|
||||
|
||||
#endif
|
|
@ -1,4 +1,4 @@
|
|||
C_source := gdt.c kb.c timer.c irq.c isrs.c idt.c vga.c uart.c multiboot.c apic.c pci.c processor.c
|
||||
C_source := gdt.c kb.c timer.c irq.c isrs.c idt.c vga.c uart.c multiboot.c apic.c pci.c processor.c pmc.c
|
||||
ASM_source := entry$(BIT).asm string$(BIT).asm
|
||||
MODULE := arch_x86_kernel
|
||||
|
||||
|
|
123
arch/x86/kernel/pmc.c
Normal file
123
arch/x86/kernel/pmc.c
Normal file
|
@ -0,0 +1,123 @@
|
|||
/*
|
||||
* Copyright 2013 Steffen Vogel, Chair for Operating Systems,
|
||||
* RWTH Aachen University
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
* This file is part of MetalSVM.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @author Steffen Vogel
|
||||
* @file arch/x86/kernel/pmc.c
|
||||
* @brief Simple interface to IA32 Performance Monitor Counters
|
||||
*
|
||||
* This implementation is in parts specific for Intel Core 2 Duo Processors!
|
||||
*/
|
||||
|
||||
#include <errno.h>
|
||||
#include <asm/pmc.h>
|
||||
#include <asm/processor.h>
|
||||
|
||||
static struct pmc_caps caps = { 0 };
|
||||
|
||||
struct pmc_caps* pmc_init()
|
||||
{
|
||||
if (!caps.version) {
|
||||
uint32_t a, b, c, d;
|
||||
cpuid(0x0A, &a, &b, &c, &d);
|
||||
|
||||
caps.version = (a >> 0) & 0xff;
|
||||
caps.gp_count = (a >> 8) & 0xff;
|
||||
caps.gp_width = (a >> 16) & 0xff;
|
||||
caps.ff_count = (d >> 0) & 0x1f;
|
||||
caps.ff_width = (d >> 5) & 0xff;
|
||||
caps.arch_events = (b >> 0) & 0x3f;
|
||||
|
||||
// check if IA32_PERF_CAPABILITIES MSR is available
|
||||
cpuid(0x01, &a, &b, &c, &d);
|
||||
if (caps.version >= 2) {
|
||||
if (c & (1 << 15 /* PDCM */))
|
||||
caps.msr = rdmsr(IA32_PERF_CAPABILITIES);
|
||||
}
|
||||
}
|
||||
|
||||
return ∩︀
|
||||
}
|
||||
|
||||
int pmc_config(uint8_t i, uint16_t event, uint32_t flags, uint8_t umask, uint8_t cmask)
|
||||
{
|
||||
if (BUILTIN_EXPECT(i > caps.gp_count, 0))
|
||||
return -EINVAL;
|
||||
|
||||
uint64_t evtsel = flags | event;
|
||||
evtsel |= (cmask << PMC_EVTSEL_CMASK) | (umask << PMC_EVTSEL_UMASK);
|
||||
|
||||
wrmsr(IA32_PERFEVTSEL(i), evtsel);
|
||||
wrmsr(IA32_PMC(i), 0);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
inline int pmc_start(uint8_t i)
|
||||
{
|
||||
if (BUILTIN_EXPECT(i > caps.gp_count, 0))
|
||||
return -EINVAL;
|
||||
|
||||
wrmsr(IA32_PERFEVTSEL(i), rdmsr(IA32_PERFEVTSEL(i)) | PMC_EVTSEL_EN);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
inline int pmc_stop(uint8_t i)
|
||||
{
|
||||
if (BUILTIN_EXPECT(i > caps.gp_count, 0))
|
||||
return -EINVAL;
|
||||
|
||||
wrmsr(IA32_PERFEVTSEL(i), rdmsr(IA32_PERFEVTSEL(i)) & ~PMC_EVTSEL_EN);
|
||||
}
|
||||
|
||||
inline int pmc_start_all()
|
||||
{
|
||||
if (BUILTIN_EXPECT(caps.version < 2, 0))
|
||||
return -EINVAL;
|
||||
|
||||
wrmsr(MSR_PERF_GLOBAL_CTRL, -1L);
|
||||
}
|
||||
|
||||
inline int pmc_stop_all()
|
||||
{
|
||||
if (BUILTIN_EXPECT(caps.version < 2, 0))
|
||||
return -EINVAL;
|
||||
|
||||
wrmsr(MSR_PERF_GLOBAL_CTRL, 0);
|
||||
}
|
||||
|
||||
inline uint64_t pmc_read(uint8_t i)
|
||||
{
|
||||
if (BUILTIN_EXPECT(i > caps.gp_count, 0))
|
||||
return 0;
|
||||
|
||||
return rdmsr(IA32_PMC(i));
|
||||
}
|
||||
|
||||
inline int pmc_write(uint8_t i, uint64_t val)
|
||||
{
|
||||
if (BUILTIN_EXPECT(i > caps.gp_count, 0))
|
||||
return -EINVAL;
|
||||
|
||||
if (caps.version >= 2 && caps.msr & (1 << 13 /* FW_WRITE */))
|
||||
wrmsr(IA32_A_PMC(i), val);
|
||||
else
|
||||
wrmsr(IA32_PMC(i), val);
|
||||
}
|
Loading…
Add table
Reference in a new issue