remove buf in the calucaltion of the MPB address
+ increasing the readabilty + add additional performance counters for the page allocation
This commit is contained in:
parent
2071bb0601
commit
7ed258a1fb
2 changed files with 56 additions and 14 deletions
|
@ -32,7 +32,7 @@ extern "C" {
|
||||||
|
|
||||||
#ifdef CONFIG_ROCKCREEK
|
#ifdef CONFIG_ROCKCREEK
|
||||||
|
|
||||||
#define SVM_WB
|
//#define SVM_WB
|
||||||
|
|
||||||
#define SVM_STRONG (1 << 0)
|
#define SVM_STRONG (1 << 0)
|
||||||
#define SVM_LAZYRELEASE (1 << 1)
|
#define SVM_LAZYRELEASE (1 << 1)
|
||||||
|
@ -96,7 +96,7 @@ void svm_invalidate(void);
|
||||||
/* *brief flushs the cache for all SVM regions
|
/* *brief flushs the cache for all SVM regions
|
||||||
*/
|
*/
|
||||||
#ifndef SVM_WB
|
#ifndef SVM_WB
|
||||||
static inline void svm_flush(void)
|
static inline void svm_flush(size_t unused)
|
||||||
{
|
{
|
||||||
// need to write to another line to make sure the write combine buffer gets flushed
|
// need to write to another line to make sure the write combine buffer gets flushed
|
||||||
*(int *)RCCE_fool_write_combine_buffer = 1;
|
*(int *)RCCE_fool_write_combine_buffer = 1;
|
||||||
|
|
|
@ -35,6 +35,8 @@
|
||||||
#include <asm/icc.h>
|
#include <asm/icc.h>
|
||||||
#include <asm/svm.h>
|
#include <asm/svm.h>
|
||||||
|
|
||||||
|
#define USE_PERFCOUNTERS 1
|
||||||
|
|
||||||
#define SHARED_PAGES (4*(RCCE_SHM_SIZE_MAX >> PAGE_SHIFT))
|
#define SHARED_PAGES (4*(RCCE_SHM_SIZE_MAX >> PAGE_SHIFT))
|
||||||
#define OWNER_SIZE ((SHARED_PAGES * sizeof(uint8_t) + PAGE_SIZE - 1) & ~(PAGE_SIZE - 1))
|
#define OWNER_SIZE ((SHARED_PAGES * sizeof(uint8_t) + PAGE_SIZE - 1) & ~(PAGE_SIZE - 1))
|
||||||
|
|
||||||
|
@ -65,6 +67,7 @@ static RCCE_FLAG release;
|
||||||
#define L2_CAPACITY (256*1024UL)
|
#define L2_CAPACITY (256*1024UL)
|
||||||
#define L2_WBSTRIDE (L2_CAPACITY/L2_WAYS)
|
#define L2_WBSTRIDE (L2_CAPACITY/L2_WAYS)
|
||||||
|
|
||||||
|
#ifdef SVM_WB
|
||||||
/* Helper function to read data into all 4 ways of L2 cache */
|
/* Helper function to read data into all 4 ways of L2 cache */
|
||||||
|
|
||||||
__attribute__((always_inline)) static inline void svm_purge_set(const size_t set)
|
__attribute__((always_inline)) static inline void svm_purge_set(const size_t set)
|
||||||
|
@ -83,6 +86,7 @@ __attribute__((always_inline)) static inline void svm_purge_set(const size_t set
|
||||||
|
|
||||||
static size_t dummy_base = OWN_MPB + L2_CAPACITY;
|
static size_t dummy_base = OWN_MPB + L2_CAPACITY;
|
||||||
static size_t dummy_offset = 0;
|
static size_t dummy_offset = 0;
|
||||||
|
#endif
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* This array describes the owner of a specific page.
|
* This array describes the owner of a specific page.
|
||||||
|
@ -97,11 +101,16 @@ static const size_t shmbegin = SHM_ADDR;
|
||||||
static uint32_t emit[RCCE_MAXNP] = {[0 ... RCCE_MAXNP-1] = 0};
|
static uint32_t emit[RCCE_MAXNP] = {[0 ... RCCE_MAXNP-1] = 0};
|
||||||
static uint32_t request[RCCE_MAXNP] = {[0 ... RCCE_MAXNP-1] = 0};
|
static uint32_t request[RCCE_MAXNP] = {[0 ... RCCE_MAXNP-1] = 0};
|
||||||
static uint32_t forward[RCCE_MAXNP] = {[0 ... RCCE_MAXNP-1] = 0};
|
static uint32_t forward[RCCE_MAXNP] = {[0 ... RCCE_MAXNP-1] = 0};
|
||||||
|
static uint32_t alloc_page = 0;
|
||||||
|
static uint32_t map_page = 0;
|
||||||
|
#if USE_PERFCOUNTERS
|
||||||
|
static uint64_t alloc_ticks = 0;
|
||||||
static uint64_t request_ticks = 0;
|
static uint64_t request_ticks = 0;
|
||||||
static uint64_t emit_ticks = 0;
|
static uint64_t emit_ticks = 0;
|
||||||
static uint64_t wait_ticks = 0;
|
static uint64_t wait_ticks = 0;
|
||||||
static uint64_t max_wait = 0;
|
static uint64_t max_wait = 0;
|
||||||
static uint64_t min_wait = (uint64_t) -1;
|
static uint64_t min_wait = (uint64_t) -1;
|
||||||
|
#endif
|
||||||
|
|
||||||
int svm_init(void)
|
int svm_init(void)
|
||||||
{
|
{
|
||||||
|
@ -185,14 +194,19 @@ static size_t get_shpage(void)
|
||||||
*/
|
*/
|
||||||
int svm_alloc_page(size_t addr, page_table_t* pgt)
|
int svm_alloc_page(size_t addr, page_table_t* pgt)
|
||||||
{
|
{
|
||||||
|
#if USE_PERFCOUNTERS
|
||||||
|
uint64_t start = rdtsc();
|
||||||
|
#endif
|
||||||
uint32_t index2 = (addr >> 12) & 0x3FF;
|
uint32_t index2 = (addr >> 12) & 0x3FF;
|
||||||
size_t phyaddr;
|
size_t phyaddr;
|
||||||
t_vcharp mpb = (t_vcharp) ((size_t)(virt_to_phys(addr) >> PAGE_SHIFT) | (size_t) RCCE_comm_buffer[RCCE_IAM]);
|
t_vcharp mpb = (t_vcharp) ((size_t)(virt_to_phys(addr) >> PAGE_SHIFT) | ((size_t) RCCE_comm_buffer[RCCE_IAM] - RCCE_LINE_SIZE));
|
||||||
uint16_t offset = 0xFFFF;
|
uint16_t offset = 0xFFFF;
|
||||||
|
|
||||||
|
addr &= 0xFFFFF000; // align address to the page boundary
|
||||||
|
|
||||||
RCCE_acquire_lock(RC_COREID[LOCK_ID]);
|
RCCE_acquire_lock(RC_COREID[LOCK_ID]);
|
||||||
|
|
||||||
RCCE_get((t_vcharp) &offset, mpb, sizeof(uint16_t), RCCE_IAM);
|
iRCCE_get((t_vcharp) &offset, mpb, sizeof(uint16_t), RCCE_IAM);
|
||||||
|
|
||||||
if (!offset) {
|
if (!offset) {
|
||||||
int i;
|
int i;
|
||||||
|
@ -200,7 +214,7 @@ int svm_alloc_page(size_t addr, page_table_t* pgt)
|
||||||
phyaddr = get_shpage();
|
phyaddr = get_shpage();
|
||||||
offset = (uint16_t) ((phyaddr - shmbegin) >> PAGE_SHIFT);
|
offset = (uint16_t) ((phyaddr - shmbegin) >> PAGE_SHIFT);
|
||||||
for(i=0; i<RCCE_NP; i++)
|
for(i=0; i<RCCE_NP; i++)
|
||||||
RCCE_put(mpb, (t_vcharp) &offset, sizeof(uint16_t), i);
|
iRCCE_put(mpb, (t_vcharp) &offset, sizeof(uint16_t), i);
|
||||||
|
|
||||||
RCCE_release_lock(RC_COREID[LOCK_ID]);
|
RCCE_release_lock(RC_COREID[LOCK_ID]);
|
||||||
|
|
||||||
|
@ -210,8 +224,12 @@ int svm_alloc_page(size_t addr, page_table_t* pgt)
|
||||||
phys2virt[(phyaddr - shmbegin) >> PAGE_SHIFT] = addr;
|
phys2virt[(phyaddr - shmbegin) >> PAGE_SHIFT] = addr;
|
||||||
tlb_flush_one_page(addr);
|
tlb_flush_one_page(addr);
|
||||||
|
|
||||||
kprintf("map new page frame 0x%x at 0x%x, flags0x%x, offset 0x%x, mpb 0x%x\n", phyaddr, addr, pgt->entries[index2] & 0xFFF, (int) offset, mpb);
|
alloc_page++;
|
||||||
|
//kprintf("map new page frame 0x%x at 0x%x, flags0x%x, offset 0x%x, mpb 0x%x\n", phyaddr, addr, pgt->entries[index2] & 0xFFF, (int) offset, mpb);
|
||||||
|
|
||||||
|
#if USE_PERFCOUNTERS
|
||||||
|
alloc_ticks += rdtsc() - start;
|
||||||
|
#endif
|
||||||
return 0;
|
return 0;
|
||||||
} else {
|
} else {
|
||||||
RCCE_release_lock(RC_COREID[LOCK_ID]);
|
RCCE_release_lock(RC_COREID[LOCK_ID]);
|
||||||
|
@ -226,11 +244,14 @@ int svm_alloc_page(size_t addr, page_table_t* pgt)
|
||||||
phys2virt[(phyaddr - shmbegin) >> PAGE_SHIFT] = addr;
|
phys2virt[(phyaddr - shmbegin) >> PAGE_SHIFT] = addr;
|
||||||
tlb_flush_one_page(addr);
|
tlb_flush_one_page(addr);
|
||||||
|
|
||||||
kprintf("map existing page frame 0x%x at 0x%x, offset 0x%x, mpb 0x%x\n", phyaddr, addr, offset, mpb);
|
map_page++;
|
||||||
|
//kprintf("map existing page frame 0x%x at 0x%x, offset 0x%x, mpb 0x%x\n", phyaddr, addr, offset, mpb);
|
||||||
|
|
||||||
|
#if USE_PERFCOUNTERS
|
||||||
|
alloc_ticks += rdtsc() - start;
|
||||||
|
#endif
|
||||||
if (pgt->entries[index2] & PG_SVM_LAZYRELEASE)
|
if (pgt->entries[index2] & PG_SVM_LAZYRELEASE)
|
||||||
return 0;
|
return 0;
|
||||||
kprintf("send request to %d, 0x%x\n", (int) page_owner[(phyaddr - shmbegin) >> PAGE_SHIFT], (phyaddr - shmbegin) >> PAGE_SHIFT);
|
|
||||||
return svm_access_request(addr);
|
return svm_access_request(addr);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -241,7 +262,9 @@ int svm_alloc_page(size_t addr, page_table_t* pgt)
|
||||||
*/
|
*/
|
||||||
int svm_access_request(size_t addr)
|
int svm_access_request(size_t addr)
|
||||||
{
|
{
|
||||||
|
#if USE_PERFCOUNTERS
|
||||||
uint64_t start = rdtsc();
|
uint64_t start = rdtsc();
|
||||||
|
#endif
|
||||||
size_t phyaddr = virt_to_phys(addr);
|
size_t phyaddr = virt_to_phys(addr);
|
||||||
uint32_t pageid;
|
uint32_t pageid;
|
||||||
int remote_rank;
|
int remote_rank;
|
||||||
|
@ -261,24 +284,31 @@ int svm_access_request(size_t addr)
|
||||||
((size_t*) payload)[0] = RCCE_IAM;
|
((size_t*) payload)[0] = RCCE_IAM;
|
||||||
((size_t*) payload)[1] = phyaddr;
|
((size_t*) payload)[1] = phyaddr;
|
||||||
|
|
||||||
|
//kprintf("send request (0x%x) to %d\n", addr, remote_rank);
|
||||||
/* send ping request */
|
/* send ping request */
|
||||||
iRCCE_mail_send(2*sizeof(size_t), SVM_REQ, 0, (char*) payload, remote_rank);
|
iRCCE_mail_send(2*sizeof(size_t), SVM_REQ, 0, (char*) payload, remote_rank);
|
||||||
icc_send_gic_irq(remote_rank);
|
icc_send_gic_irq(remote_rank);
|
||||||
request[remote_rank]++;
|
request[remote_rank]++;
|
||||||
|
|
||||||
|
#if USE_PERFCOUNTERS
|
||||||
uint64_t wait_start = rdtsc();
|
uint64_t wait_start = rdtsc();
|
||||||
|
#endif
|
||||||
// wait for response
|
// wait for response
|
||||||
icc_wait(SVM_RESP);
|
icc_wait(SVM_RESP);
|
||||||
|
#if USE_PERFCOUNTERS
|
||||||
uint64_t res = rdtsc() - wait_start;
|
uint64_t res = rdtsc() - wait_start;
|
||||||
wait_ticks += res;
|
wait_ticks += res;
|
||||||
if (min_wait > res)
|
if (min_wait > res)
|
||||||
min_wait = res;
|
min_wait = res;
|
||||||
if (max_wait < res)
|
if (max_wait < res)
|
||||||
max_wait = res;
|
max_wait = res;
|
||||||
|
#endif
|
||||||
|
|
||||||
ret = change_page_permissions(addr, addr+PAGE_SIZE, VMA_READ|VMA_WRITE|VMA_CACHEABLE);
|
ret = change_page_permissions(addr, addr+PAGE_SIZE, VMA_READ|VMA_WRITE|VMA_CACHEABLE);
|
||||||
|
|
||||||
|
#if USE_PERFCOUNTERS
|
||||||
request_ticks += rdtsc() - start;
|
request_ticks += rdtsc() - start;
|
||||||
|
#endif
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
@ -384,6 +414,9 @@ void* svm_malloc(size_t size, uint32_t consistency)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
map_flags |= MAP_NO_ACCESS;
|
map_flags |= MAP_NO_ACCESS;
|
||||||
|
#ifndef SVM_WB
|
||||||
|
map_flags |= MAP_MPE;
|
||||||
|
#endif
|
||||||
|
|
||||||
viraddr = map_region(0, 0, size >> PAGE_SHIFT, map_flags);
|
viraddr = map_region(0, 0, size >> PAGE_SHIFT, map_flags);
|
||||||
kprintf("svmmalloc: viraddr 0x%x, size 0x%x\n", viraddr, size);
|
kprintf("svmmalloc: viraddr 0x%x, size 0x%x\n", viraddr, size);
|
||||||
|
@ -400,7 +433,7 @@ void* svm_malloc(size_t size, uint32_t consistency)
|
||||||
}
|
}
|
||||||
|
|
||||||
//kprintf("mpb_addr 0x%x\n", mpb_addr);
|
//kprintf("mpb_addr 0x%x\n", mpb_addr);
|
||||||
RCCE_put(mpb_addr, buffer, RCCE_LINE_SIZE, RCCE_IAM);
|
iRCCE_put(mpb_addr, buffer, RCCE_LINE_SIZE, RCCE_IAM);
|
||||||
|
|
||||||
irq_nested_enable(flags);
|
irq_nested_enable(flags);
|
||||||
}
|
}
|
||||||
|
@ -454,7 +487,9 @@ void svm_free(void* addr, size_t size)
|
||||||
*/
|
*/
|
||||||
int svm_emit_page(size_t phyaddr, int ue)
|
int svm_emit_page(size_t phyaddr, int ue)
|
||||||
{
|
{
|
||||||
|
#if USE_PERFCOUNTERS
|
||||||
uint64_t start = rdtsc();
|
uint64_t start = rdtsc();
|
||||||
|
#endif
|
||||||
uint32_t pageid;
|
uint32_t pageid;
|
||||||
int remote_rank;
|
int remote_rank;
|
||||||
|
|
||||||
|
@ -497,7 +532,9 @@ int svm_emit_page(size_t phyaddr, int ue)
|
||||||
change_page_permissions(viraddr, viraddr+PAGE_SIZE, VMA_NOACCESS|VMA_READ|VMA_CACHEABLE);
|
change_page_permissions(viraddr, viraddr+PAGE_SIZE, VMA_NOACCESS|VMA_READ|VMA_CACHEABLE);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if USE_PERFCOUNTERS
|
||||||
emit_ticks += rdtsc() - start;
|
emit_ticks += rdtsc() - start;
|
||||||
|
#endif
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -545,7 +582,7 @@ void svm_invalidate(void)
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void svm_flush( size_t phyaddr )
|
void svm_flush(size_t phyaddr)
|
||||||
{
|
{
|
||||||
task_t* task = per_core(current_task);
|
task_t* task = per_core(current_task);
|
||||||
page_dir_t* pgd = task->pgd;
|
page_dir_t* pgd = task->pgd;
|
||||||
|
@ -659,8 +696,8 @@ int svm_barrier(uint32_t flags)
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
extern uint64_t check_ticks;
|
//extern uint64_t check_ticks;
|
||||||
extern uint64_t recv_ticks;
|
//extern uint64_t recv_ticks;
|
||||||
|
|
||||||
int svm_statistics(void)
|
int svm_statistics(void)
|
||||||
{
|
{
|
||||||
|
@ -676,13 +713,18 @@ int svm_statistics(void)
|
||||||
for(i=0; i<RCCE_MAXNP; i++)
|
for(i=0; i<RCCE_MAXNP; i++)
|
||||||
kprintf("\t%u", forward[i]);
|
kprintf("\t%u", forward[i]);
|
||||||
kputs("\n");
|
kputs("\n");
|
||||||
|
kprintf("allocate page frame: %u\n", alloc_page);
|
||||||
|
kprintf("map page frame: %d\n", map_page);
|
||||||
|
#if USE_PERFCOUNTERS
|
||||||
|
kprintf("alloc ticks: %llu\n", alloc_ticks);
|
||||||
kprintf("request ticks: %llu\n", request_ticks);
|
kprintf("request ticks: %llu\n", request_ticks);
|
||||||
kprintf("wait ticks: %llu\n", wait_ticks);
|
kprintf("wait ticks: %llu\n", wait_ticks);
|
||||||
kprintf("emit ticks: %llu\n", emit_ticks);
|
kprintf("emit ticks: %llu\n", emit_ticks);
|
||||||
kprintf("max wait: %llu\n", max_wait);
|
kprintf("max wait: %llu\n", max_wait);
|
||||||
kprintf("min wait: %llu\n", min_wait);
|
kprintf("min wait: %llu\n", min_wait);
|
||||||
kprintf("check_ticks: %llu\n", check_ticks);
|
//kprintf("check_ticks: %llu\n", check_ticks);
|
||||||
kprintf("recv_tick: %llu\n", recv_ticks);
|
//kprintf("recv_tick: %llu\n", recv_ticks);
|
||||||
|
#endif
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Reference in a new issue