Merge commit '499f43bfaf0bd7ca9387091712c0b60273bfa5d2' into mailbox_integration

Conflicts:
	arch/x86/scc/icc.c
	kernel/tests.c
This commit is contained in:
Generic account for RWTHAachen Students 2011-08-24 23:47:56 -07:00
commit 17b104d11c
17 changed files with 630 additions and 56 deletions

View file

@ -47,7 +47,7 @@ enum icc_mail_requests {
#define ICC_TAG_IP 0
#define ICC_TAG_SVM 1
#define ICC_TAG_SVMREQUEST 1
#define ICC_TAG_PINGREQUEST 2
#define ICC_TAG_PINGRESPONSE 3

View file

@ -41,8 +41,8 @@
#define _PAGE_BIT_PSE 7 /* 4 MB (or 2MB) page */
#define _PAGE_BIT_PAT 7 /* on 4KB pages */
#define _PAGE_BIT_GLOBAL 8 /* Global TLB entry PPro+ */
#define _PAGE_BIT_RESERVED 9 /* mark a virtual address range as reserved */
#define _PAGE_BIT_PAT_LARGE 12 /* On 2MB or 1GB pages */
#define _PAGE_BIT_SVM_STRONG 9 /* mark a virtual address range as used by the SVM system */
#define _PAGE_BIT_SVM_LAZYRELEASE 10 /* mark a virtual address range as used by the SVM system */
/// Page is present
#define PG_PRESENT (1 << _PAGE_BIT_PRESENT)
@ -64,12 +64,12 @@
#define PG_MPE PG_PSE
/// Global TLB entry (Pentium Pro and later)
#define PG_GLOBAL (1 << _PAGE_BIT_GLOBAL)
/// This virtual address range is reserved as marked
#define PG_RESERVED (1 << _PAGE_BIT_RESERVED)
/// Pattern flag
#define PG_PAT (1 << _PAGE_BIT_PAT)
/// Large page pattern flag
#define PG_PAT_LARGE (1 << _PAGE_BIT_PAT_LARGE)
/// This virtual address range is used by SVM system as marked
#define PG_SVM_STRONG (1 << _PAGE_BIT_SVM_STRONG)
/// This virtual address range is used by SVM system as marked
#define PG_SVM_LAZYRELEASE (1 << _PAGE_BIT_SVM_LAZYRELEASE)
/// This is a whole set of flags (PRESENT,RW,ACCESSED,DIRTY) for kernelspace tables
#define KERN_TABLE (PG_PRESENT|PG_RW|PG_ACCESSED|PG_DIRTY)
@ -152,7 +152,7 @@ int unmap_region(size_t viraddr, uint32_t npages);
*
* @param viraddr Desired virtual address
* @param phyaddr Physical address to map from
* @param npages The Region's size in pages
* @param npages The region's size in number of pages
* @param flags Further page flags
*
* @return

View file

@ -106,7 +106,7 @@ inline static void flush_cache(void) {
* The invd asm instruction which invalidates cache without writing back
* is used here
*/
inline static void invalid_cache(void) {
inline static void invalidate_cache(void) {
asm volatile ("invd");
}

108
arch/x86/include/asm/svm.h Normal file
View file

@ -0,0 +1,108 @@
/*
* Copyright 2011 Stefan Lankes, Chair for Operating Systems,
* RWTH Aachen University
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* This file is part of MetalSVM.
*/
#ifndef __ARCH_SVM_H__
#define __ARCH_SVM_H__
#include <metalsvm/stddef.h>
#ifdef CONFIG_ROCKCREEK
#include <asm/RCCE_lib.h>
#endif
#ifdef __cplusplus
extern "C" {
#endif
#ifdef CONFIG_ROCKCREEK
#define SVM_STRONG (1 << 0)
#define SVM_LAZYRELEASE (1 << 1)
/** @brief Init routine of the SVM subsystem
*
* @return
* - 0 on success
* - -ENOMEM not enough memory
*/
int svm_init(void);
/** @brief Memory allocator of the SVM subsystem.
*
* Like RCCE function, belongs svmmalloc to the synchronous
* function.
*
* @return Pointer to the new memory range
*/
void* svmmalloc(size_t sizei, uint32_t flags);
/** @brief Frees memory, which is managed by the SVM subsystem
*
* Like RCCE function, belongs svmfree to the synchronous function.
*/
void svmfree(void* addr, size_t size);
/** @brief Request for exlusive access
*
* @return
* - 0 on success
*/
int svm_access_request(size_t addr);
/** @brief emit page to core ue
*
* @return
* - 0 on success
*/
int svm_emit_page(size_t addr, int ue);
/* @brief invalidate the cache entries for all SVM regions
*/
static inline void svm_invalidate(void)
{
asm volatile ( ".byte 0x0f; .byte 0x0a;\n" ); // CL1FLUSHMB
}
/* *brief flushs the cache for all SVM regions
*/
#ifdef CONFIG_ROCKCREEK
#ifndef SVM_WB
static inline void svm_flush(void)
{
// need to write to another line to make sure the write combine buffer gets flushed
*(int *)RCCE_fool_write_combine_buffer = 1;
}
#else
void svm_flush(void);
#endif
#endif
/* @brief dumps the some performance counters (e.g. numbers of page migrations)
*
* @retrun
* - 0 on success
*/
int svm_statistics(void);
#endif
#ifdef __cplusplus
}
#endif
#endif

View file

@ -1,4 +1,4 @@
C_source := page.c
C_source := page.c svm.c
MODULE := arch_x86_mm
include $(TOPDIR)/Makefile.inc

View file

@ -34,6 +34,7 @@
#ifdef CONFIG_ROCKCREEK
#include <asm/RCCE_lib.h>
#include <asm/SCC_API.h>
#include <asm/svm.h>
#include <asm/icc.h>
#endif
@ -354,6 +355,17 @@ size_t map_region(size_t viraddr, size_t phyaddr, uint32_t npages, uint32_t flag
if (flags & MAP_MPE)
pgt->entries[index] |= PG_MPE;
#endif
if (flags & MAP_SVM_STRONG)
#ifndef SVM_WB
pgt->entries[index] |= PG_SVM_STRONG|PG_PWT;
#else
pgt->entries[index] |= PG_SVM;
#endif
if (flags & MAP_SVM_LAZYRELEASE)
pgt->entries[index] |= PG_SVM_LAZYRELEASE|PG_PWT;
if (flags & MAP_NO_ACCESS)
pgt->entries[index] &= ~PG_PRESENT;
if (flags & MAP_USER_SPACE)
atomic_int32_inc(&task->user_usage);
@ -395,6 +407,11 @@ int change_page_permissions(size_t start, size_t end, uint32_t flags)
phyaddr = pgt->entries[index2] & 0xFFFFF000;
newflags = pgt->entries[index2] & 0xFFF; // get old flags
if ((newflags & PG_SVM_STRONG) && !(newflags & PG_PRESENT) && (flags & (VMA_READ|VMA_WRITE) && !(flags & VMA_NOACCESS)))
newflags |= PG_PRESENT;
else if ((newflags & PG_SVM_STRONG) && (newflags & PG_PRESENT) && (flags & VMA_NOACCESS))
newflags &= ~PG_PRESENT;
// update flags
if (!(flags & VMA_WRITE))
newflags &= ~PG_RW;
@ -591,8 +608,11 @@ int print_paging_tree(size_t viraddr)
static void pagefault_handler(struct state *s)
{
task_t* task = per_core(current_task);
page_dir_t* pgd = task->pgd;
page_table_t* pgt = NULL;
size_t viraddr = read_cr2();
size_t phyaddr;
uint32_t index1, index2;
if ((viraddr >= task->start_heap) && (viraddr <= task->end_heap) && (viraddr > KERNEL_SPACE)) {
viraddr = viraddr & 0xFFFFF000;
@ -610,6 +630,19 @@ static void pagefault_handler(struct state *s)
put_page(phyaddr);
}
// does our SVM system need to handle this page fault?
index1 = viraddr >> 22;
index2 = (viraddr >> 12) & 0x3FF;
if (!pgd || !(pgd->entries[index1] & 0xFFFFF000))
goto default_handler;
pgt = (page_table_t*) ((KERNEL_SPACE - 1024*PAGE_SIZE + index1*PAGE_SIZE) & 0xFFFFF000);
if (!pgt || !(pgt->entries[index2]))
goto default_handler;
if (pgt->entries[index2] & PG_SVM_STRONG)
if (!svm_access_request(viraddr))
return;
default_handler:
kprintf("PAGE FAULT: Task %u got page fault at %p (irq %d, cs:eip 0x%x:0x%x)\n", task->id, viraddr, s->int_no, s->cs, s->eip);
kprintf("Register state: eax = 0x%x, ebx = 0x%x, ecx = 0x%x, edx = 0x%x, edi = 0x%x, esi = 0x%x, ebp = 0x%x, esp = 0x%x\n",

293
arch/x86/mm/svm.c Normal file
View file

@ -0,0 +1,293 @@
/*
* Copyright 2011 Stefan Lankes, Chair for Operating Systems,
* RWTH Aachen University
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* This file is part of MetalSVM.
*/
#include <metalsvm/stddef.h>
#include <metalsvm/stdio.h>
#include <metalsvm/stdlib.h>
#include <metalsvm/mmu.h>
#include <metalsvm/page.h>
#include <metalsvm/errno.h>
#include <asm/irqflags.h>
#include <asm/processor.h>
#ifdef CONFIG_ROCKCREEK
#include <asm/RCCE.h>
#include <asm/RCCE_lib.h>
#include <asm/iRCCE.h>
#include <asm/SCC_API.h>
#include <asm/icc.h>
#include <asm/svm.h>
#define SHARED_PAGES (RCCE_SHM_SIZE_MAX >> PAGE_SHIFT)
#define OWNER_SIZE ((SHARED_PAGES * sizeof(uint8_t) + PAGE_SIZE - 1) & ~(PAGE_SIZE - 1))
t_vcharp RC_SHM_BUFFER_START();
/*
* This array describes the owner of a specific page.
* Only the owner of a page is able to change the possession.
* => No lock is needded.
*/
static volatile uint8_t* page_owner = NULL;
// helper array to convert a physical to a virtual address
static size_t phys2virt[SHARED_PAGES] = {[0 ... SHARED_PAGES-1] = 0};
static size_t shmbegin = 0;
static int my_ue = 0;
static uint32_t emit[RCCE_MAXNP] = {[0 ... RCCE_MAXNP-1] = 0};
static uint32_t request[RCCE_MAXNP] = {[0 ... RCCE_MAXNP-1] = 0};
static uint32_t forward[RCCE_MAXNP] = {[0 ... RCCE_MAXNP-1] = 0};
int svm_init(void)
{
size_t phyaddr;
uint32_t flags;
// iRCCE is not thread save => disable interrupts
flags = irq_nested_disable();
my_ue = RCCE_ue();
shmbegin = (size_t)RC_SHM_BUFFER_START();
phyaddr = (size_t) RCCE_shmalloc(OWNER_SIZE);
irq_nested_enable(flags);
if (BUILTIN_EXPECT(!phyaddr, 0))
return -ENOMEM;
if (BUILTIN_EXPECT(phyaddr & 0xFFF, 0)) {
kprintf("RCCE_shmalloc returns not a page aligned physiacl address: 0x%x\n", phyaddr);
return -ENOMEM;
}
kprintf("Shared memory starts at the physical address 0x%x\n", shmbegin);
page_owner = (uint8_t*) map_region(0, phyaddr, OWNER_SIZE >> PAGE_SHIFT, MAP_KERNEL_SPACE|MAP_NO_CACHE);
if (BUILTIN_EXPECT(!page_owner, 0)) {
flags = irq_nested_disable();
RCCE_shfree((t_vcharp) phyaddr);
irq_nested_enable(flags);
return -ENOMEM;
}
// per default is core 0 owner
if (!my_ue)
memset((void*)page_owner, 0x00, OWNER_SIZE);
// iRCCE is not thread save => disable interrupts
flags = irq_nested_disable();
RCCE_barrier(&RCCE_COMM_WORLD);
irq_nested_enable(flags);
return 0;
}
/*
* This function is called by the pagefault handler
* => the interrupt flags is already cleared
*/
int svm_access_request(size_t addr)
{
size_t phyaddr = virt_to_phys(addr);
uint32_t pageid;
int remote_rank;
uint8_t payload[iRCCE_MAIL_HEADER_PAYLOAD];
if (phyaddr < shmbegin)
return -EINVAL;
if (phyaddr >= shmbegin + RCCE_SHM_SIZE_MAX)
return -EINVAL;
pageid = (phyaddr-shmbegin) >> PAGE_SHIFT;
//svm_flush();
if (page_owner[pageid] == my_ue)
return 0;
remote_rank = page_owner[pageid];
((size_t*) payload)[0] = my_ue;
((size_t*) payload)[1] = phyaddr;
//kprintf("send access request to %d of 0x%x\n", remote_rank, phyaddr);
/* send ping request */
iRCCE_mail_send(2*sizeof(size_t), ICC_TAG_SVMREQUEST, 0, payload, remote_rank);
request[remote_rank]++;
NOP8;
icc_send_irq(remote_rank);
/* check for incoming messages */
icc_mail_check();
while (page_owner[pageid] != my_ue) {
NOP4;
}
return change_page_permissions(addr, addr+PAGE_SIZE, VMA_READ|VMA_WRITE|VMA_CACHEABLE);
}
void* svmmalloc(size_t size, uint32_t consistency)
{
size_t phyaddr, viraddr, i;
uint32_t flags;
uint32_t map_flags = MAP_KERNEL_SPACE|MAP_MPE;
if (consistency & SVM_STRONG)
map_flags |= MAP_SVM_STRONG;
else if (consistency & SVM_LAZYRELEASE)
map_flags |= MAP_SVM_LAZYRELEASE;
else return 0;
// currently, we allocate memory in page size granulation
size = (size + PAGE_SIZE - 1) & ~(PAGE_SIZE - 1);
// iRCCE is not thread save => disable interrupts
flags = irq_nested_disable();
phyaddr = (size_t) RCCE_shmalloc(size);
if (RCCE_ue() && (consistency & SVM_STRONG))
map_flags |= MAP_NO_ACCESS;
irq_nested_enable(flags);
if (BUILTIN_EXPECT(!phyaddr, 0))
return NULL;
if (BUILTIN_EXPECT(phyaddr & 0xFFF, 0)) {
kprintf("RCCE_shmalloc returns not a page aligned physiacl address: 0x%x\n", phyaddr);
return NULL;
}
viraddr = map_region(0, phyaddr, size >> PAGE_SHIFT, map_flags);
for(i=0; i<size; i+=PAGE_SIZE)
phys2virt[(phyaddr + i - shmbegin) >> PAGE_SHIFT] = viraddr + i;
kprintf("svmmalloc: phyaddr 0x%x, viraddr 0x%x, size 0x%x\n", phyaddr, viraddr, size);
return (void*) viraddr;
}
void svmfree(void* addr, size_t size)
{
size_t phyaddr, i;
uint32_t flags;
if (BUILTIN_EXPECT(!addr || !size, 0))
return;
phyaddr = virt_to_phys((size_t) addr);
// currently, we allocate memory in page size granulation
size = (size + PAGE_SIZE - 1) & ~(PAGE_SIZE - 1);
kprintf("svmfree: phyaddr 0x%x, viraddr 0x%x, size 0x%x\n", phyaddr, addr, size);
unmap_region((size_t) addr, size >> PAGE_SHIFT);
for(i=0; i<size; i+=PAGE_SIZE)
phys2virt[(phyaddr + i - shmbegin) >> PAGE_SHIFT] = 0;
// iRCCE is not thread save => disable interrupts
flags = irq_nested_disable();
RCCE_shfree((t_vcharp) phyaddr);
irq_nested_enable(flags);
}
/*
* This function is called by icc_mail_check.
* => Interrupt flag is alread cleared.
*/
int svm_emit_page(size_t phyaddr, int ue)
{
uint32_t pageid;
//kprintf("Try to emit page 0x%x to %d\n", phyaddr, ue);
if (phyaddr < shmbegin)
return -EINVAL;
if (phyaddr >= shmbegin + RCCE_SHM_SIZE_MAX)
return -EINVAL;
pageid = (phyaddr-shmbegin) >> PAGE_SHIFT;
if (page_owner[pageid] != my_ue) {
// Core is nor owner => forward request to new owner
int remote_rank;
uint8_t payload[iRCCE_MAIL_HEADER_PAYLOAD];
kprintf("Ups, core %d is not owner of page 0x%x\n", my_ue, phyaddr);
remote_rank = page_owner[pageid];
((size_t*) payload)[0] = ue;
((size_t*) payload)[1] = phyaddr;
/* send ping request */
iRCCE_mail_send(2*sizeof(size_t), ICC_TAG_SVMREQUEST, 0, payload, remote_rank);
NOP8;
icc_send_irq(remote_rank);
forward[remote_rank]++;
} else {
size_t viraddr;
svm_flush();
page_owner[pageid] = ue;
emit[ue]++;
viraddr = phys2virt[(phyaddr - shmbegin) >> PAGE_SHIFT];
change_page_permissions(viraddr, viraddr+PAGE_SIZE, VMA_NOACCESS|VMA_READ|VMA_CACHEABLE);
}
return 0;
}
#ifdef SVM_WB
void svm_flush(void)
{
int z, tmp;
// need to write to another line to make sure the write combine buffer gets flushed
*(int *)RCCE_fool_write_combine_buffer = 1;
flush_cache();
#if 0
// try to flush L2 cache
z = Z_PID(RC_COREID[my_ue]);
tmp=ReadConfigReg(CRB_OWN + (z==0 ? GLCFG0 : GLCFG1));
tmp &= ~(1 << GLCFG_XFLSHNN_BIT);
SetConfigReg(CRB_OWN + (z==0 ? GLCFG0 : GLCFG1), tmp);
while(!(ReadConfigReg(CRB_OWN + (z==0 ? GLCFG0 : GLCFG1)) & (1 << GLCFG_XFLSHNN_BIT))) {
NOP8;
}
#endif
}
#endif
int svm_statistics(void)
{
uint32_t i;
kprintf("emit\t:");
for(i=0; i<RCCE_MAXNP; i++)
kprintf("\t%u", emit[i]);
kprintf("\nrequest\t:");
for(i=0; i<RCCE_MAXNP; i++)
kprintf("\t%u", request[i]);
kprintf("\nforward\t:");
for(i=0; i<RCCE_MAXNP; i++)
kprintf("\t%u", forward[i]);
kputs("\n");
return 0;
}
#endif

View file

@ -336,13 +336,13 @@ int RCCE_init(
RCCE_malloc_init(RCCE_comm_buffer[RCCE_IAM],RCCE_BUFF_SIZE);
#ifdef SHMADD
RCCE_shmalloc_init(map_region(NULL, RC_SHM_BUFFER_START()+RCCE_SHM_BUFFER_offset, RCCE_SHM_SIZE_MAX/PAGE_SIZE, MAP_KERNEL_SPACE|MAP_NO_CACHE), RCCE_SHM_SIZE_MAX);
RCCE_shmalloc_init(RC_SHM_BUFFER_START()+RCCE_SHM_BUFFER_offset, RCCE_SHM_SIZE_MAX);
#ifdef SHMDBG
kprintf("\n%d:%s:%d: RCCE_SHM_BUFFER_offset, RCCE_SHM_SIZE_MAX: %x %x\n", RCCE_IAM,
__FILE__,__LINE__,RCCE_SHM_BUFFER_offset ,RCCE_SHM_SIZE_MAX);
#endif
#else
RCCE_shmalloc_init(map_region(NULL, RC_SHM_BUFFER_START(), RCCE_SHM_SIZE_MAX/PAGE_SIZE, MAP_KERNEL_SPACE|MAP_NO_CACHE), RCCE_SHM_SIZE_MAX);
RCCE_shmalloc_init(map_region(RC_SHM_BUFFER_START(), RCCE_SHM_SIZE_MAX);
#endif
// initialize the (global) flag bookkeeping data structure

View file

@ -25,6 +25,7 @@
#include <asm/iRCCE.h>
#include <asm/SCC_API.h>
#include <asm/icc.h>
#include <asm/svm.h>
#define IRQ_STATUS 0xD000
#define IRQ_MASK 0xD200
@ -137,7 +138,7 @@ int icc_init(void)
return -ENODEV;
// enable additional outputs
RCCE_debug_set(RCCE_DEBUG_ALL);
//RCCE_debug_set(RCCE_DEBUG_ALL);
my_ue = RCCE_ue();
num_ues = RCCE_num_ues();
@ -169,9 +170,18 @@ int icc_init(void)
// reset INTR/LINT0 flag
z = Z_PID(RC_COREID[my_ue]);
tmp=ReadConfigReg(CRB_OWN + (z==0 ? GLCFG0 : GLCFG1));
tmp &= ~2;
tmp &= ~(1 << GLCFG_XINTR_BIT);
SetConfigReg(CRB_OWN + (z==0 ? GLCFG0 : GLCFG1), tmp);
#if 0
// disable L2 cache
z = Z_PID(RC_COREID[my_ue]);
tmp=ReadConfigReg(CRB_OWN + (z==0 ? L2CFG0 : L2CFG1));
tmp |= (1 << L2CFG_WAYDISABLE_BIT);
SetConfigReg(CRB_OWN + (z==0 ? L2CFG0 : L2CFG1), tmp);
kprintf("set L2CFG to 0x%x\n", (uint32_t) tmp);
#endif
// set interrupt handler (INTR/LINT0)
irq_install_handler(124, intr_handler);

View file

@ -17,25 +17,10 @@
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*/
/**
* @author Stefan Lankey, Carsten Clauss
* @file arch/x86/scc/scc_memcpy.h
* @brief Special memcpy related implementations for the Intel SCC
*
* This file contains special SCC-efficient memcpy implementations
* to get memory from the RAM into the on-die memory or from the
* on-die memory into the RAM.
*/
#ifndef __SCC_MEMCPY_H_
#define __SCC_MEMPCY_H_
#include <metalsvm/stddef.h>
#ifdef CONFIG_ROCKCREEK
/** @brief Fast procedure to get a byte range from RAM into on-die memory.
*
/*
* A write access, which cache line is not present, doesn't perform (on the
* current SCC architecture) a cache line fill. Therefore, the core writes
* in this case directly to the memory.
@ -43,14 +28,10 @@
* The following function copies from the on-die memory (MPB) to the off-die
* memory and prefetchs its destintation. Therefore, the function avoids the
* bad behavior of a "write miss".
*
* @param dest Destination address
* @param src Source address
* @param count Range size in bytes
*/
inline static void *memcpy_get(void *dest, const void *src, size_t count)
{
int32_t h, i, j, k, l, m;
int h, i, j, k, l, m;
asm volatile ("cld;\n\t"
"1: cmpl $0, %%eax ; je 2f\n\t"
@ -108,18 +89,13 @@ inline static void *memcpy_put(void* dest, const void *src, size_t count)
return dest;
}
#else
/** @brief Fast procedure to get a byte range from on-die memory into RAM.
*
/*
* If the destination is located on on-die memory (MPB), classical prefetching
* techniques will be used to increase the performance.
*
* @param dest Destination address
* @param src Source address
* @param count range size in bytes
*/
inline static void *memcpy_put(void *dest, const void *src, size_t count)
{
int32_t i, j, k, l;
int i, j, k, l;
/*
* We use the floating point registers to
@ -191,5 +167,3 @@ inline static void *memcpy_put(void *dest, const void *src, size_t count)
#endif
#endif
#endif

View file

@ -47,6 +47,7 @@ extern HANDLE hProc;
#include <metalsvm/semaphore.h>
#include <metalsvm/spinlock.h>
#include <metalsvm/page.h>
#include <asm/RCCE.h>
#include <asm/RCCE_lib.h>
@ -715,7 +716,11 @@ err_t mmnif_init(struct netif* netif)
/* Alloc and clear shared memory for rx_buff
*/
mpb_size = (sizeof(mm_rx_buffer_t) + MMNIF_RX_BUFFERLEN);
// align mpb size to the granularity of a page size
mpb_size = (mpb_size + PAGE_SIZE - 1) & ~(PAGE_SIZE - 1);
mpb_start_address = RCCE_shmalloc(mpb_size*MMNIF_CORES);
// map physical address in the virtual address space
mpb_start_address = map_region(0, mpb_start_address, mpb_size >> PAGE_SHIFT, MAP_KERNEL_SPACE|MAP_NO_CACHE);
mmnif->rx_buff = mpb_start_address + (mpb_size) * (own_ip_address - router_ip_address);
if (!(mpb_start_address))
@ -1103,6 +1108,8 @@ int mmnif_open(void)
*/
int mmnif_close(void)
{
size_t phyaddr;
mmnif_t* mmnif;
if (!mmnif_dev)
@ -1119,7 +1126,12 @@ int mmnif_close(void)
kfree(mmnif->tx_buff[0],MMNIF_TX_QUEUELEN * MMNIF_TX_BUFFERLEN);
kfree(mmnif_dev,sizeof(mmnif_t));
RCCE_shfree(mpb_start_address);
// determine physical address
phyaddr = virt_to_phys(mpb_start_address);
// unmap shared memory regeion
unmap_region(mpb_start_address, mpb_size >> PAGE_SHIFT);
RCCE_shfree(phyaddr);
return NULL;
}

View file

@ -261,6 +261,7 @@ again:
static void rckemacif_input(struct netif* netif, struct pbuf* p)
{
struct eth_hdr *ethhdr;
err_t err;
/* points to packet payload, which starts with an Ethernet header */
ethhdr = p->payload;
@ -275,8 +276,8 @@ static void rckemacif_input(struct netif* netif, struct pbuf* p)
case ETHTYPE_PPPOE:
#endif /* PPPOE_SUPPORT */
/* full packet send to tcpip_thread to process */
if (mynetif->input(p, mynetif) != ERR_OK) {
LWIP_DEBUGF(NETIF_DEBUG, ("rckemacif_input: IP input error\n"));
if ((err = mynetif->input(p, mynetif)) != ERR_OK) {
LWIP_DEBUGF(NETIF_DEBUG, ("rckemacif_input: IP input error %u\n", err));
pbuf_free(p);
}
break;

View file

@ -48,6 +48,10 @@ extern "C" {
#ifdef CONFIG_ROCKCREEK
#define MAP_MPE (1 << 8)
#endif
#define MAP_SVM_STRONG (1 << 9)
#define MAP_SVM_LAZYRELEASE (1 << 10)
#define MAP_NO_ACCESS (1 << 11)
void NORETURN abort(void);
/** @brief Kernel's memory allocator function.

View file

@ -32,10 +32,11 @@
extern "C" {
#endif
#define VMA_READ 0x01
#define VMA_WRITE 0x02
#define VMA_EXECUTE 0x04
#define VMA_CACHEABLE 0x08
#define VMA_READ (1 << 0)
#define VMA_WRITE (1 << 1)
#define VMA_EXECUTE (1 << 2)
#define VMA_CACHEABLE (1 << 3)
#define VMA_NOACCESS (1 << 4)
struct vma;

View file

@ -32,6 +32,7 @@
#include <asm/kb.h>
#ifdef CONFIG_ROCKCREEK
#include <asm/icc.h>
#include <asm/svm.h>
#endif
/*
@ -75,6 +76,7 @@ int main(void)
mmu_init();
#ifdef CONFIG_ROCKCREEK
icc_init();
svm_init();
#endif
initrd_init();

View file

@ -24,12 +24,15 @@
#include <metalsvm/semaphore.h>
#include <metalsvm/mailbox.h>
#include <metalsvm/syscall.h>
#include <metalsvm/vma.h>
#include <metalsvm/page.h>
#ifdef CONFIG_ROCKCREEK
#include <asm/icc.h>
#include <asm/RCCE.h>
#include <asm/RCCE_lib.h>
#include <asm/iRCCE.h>
#include <asm/iRCCE_lib.h>
#include <asm/svm.h>
#include <asm/SCC_API.h>
#include <lwip/sockets.h>
@ -113,6 +116,139 @@ static int mail_noise(void*arg) {
icc_mail_noise(); // generate noise in the mesh
return 0;
}
#define N 1024
//#define N 514
#define LAZY
volatile static int* A[N];
volatile static int* B[N];
volatile static int* C[N];
static int svm_test(void *arg)
{
uint64_t start, end;
uint32_t i, j, k;
int my_ue, num_ues;
RCCE_barrier(&RCCE_COMM_WORLD);
my_ue = RCCE_ue();
num_ues = RCCE_num_ues();
#if 1
if (!my_ue) {
// allocate and initialize SVM region
A[0] = (int*) kmalloc(3*N*N*sizeof(int));
memset((void*) A[0], 0x00, 3*N*N*sizeof(int));
// initialize matrices
for(i=0; i<N; i++) {
A[i] = A[0] + i*N;
B[i] = A[0] + (i*N + N*N);
C[i] = A[0] + (i*N + 2*N*N);
}
for(i=0; i<N; i++) {
A[i][i] = 1;
for(j=0; j<N; j++)
B[i][j] = i+j;
}
kputs("Start sequentiell calculation...\n");
start = rdtsc();
start = rdtsc();
// start calculation
for(i=0; i<N; i++)
for(j=0; j<N; j++)
for(k=0; k<N; k++)
C[i][j] += A[i][k] * B[k][j];
end = rdtsc();
kprintf("Calculation time (seq): %llu\n", end-start);
kfree(A[0], 3*N*N*sizeof(int));
}
RCCE_barrier(&RCCE_COMM_WORLD);
#endif
// allocate and initialize SVM region
#ifndef LAZY
A[0] = (int*) svmmalloc(3*N*N*sizeof(int), SVM_STRONG);
#else
A[0] = (int*) svmmalloc(3*N*N*sizeof(int), SVM_LAZYRELEASE);
#endif
if (!my_ue)
memset((void*) A[0], 0x00, 3*N*N*sizeof(int));
// initialize matrices
for(i=0; i<N; i++) {
A[i] = A[0] + i*N;
B[i] = A[0] + (i*N + N*N);
C[i] = A[0] + (i*N + 2*N*N);
}
if (!my_ue) {
for(i=0; i<N; i++) {
A[i][i] = 1;
for(j=0; j<N; j++)
B[i][j] = i+j;
}
}
svm_flush();
RCCE_barrier(&RCCE_COMM_WORLD);
kputs("Start parallel calculation...\n");
start = rdtsc();
start = rdtsc();
#ifndef LAZY
// Now, we need only read access on A and B
change_page_permissions((size_t) A[0], (size_t) (A[0]+2*N*N), VMA_CACHEABLE|VMA_READ);
RCCE_barrier(&RCCE_COMM_WORLD);
#endif
// start calculation
for(i=my_ue*(N/num_ues); i<(my_ue+1)*(N/num_ues); i++)
for(j=0; j<N; j++)
for(k=0; k<N; k++)
C[i][j] += A[i][k] * B[k][j];
svm_flush();
RCCE_barrier(&RCCE_COMM_WORLD);
end = rdtsc();
kputs("Check results...\n");
if (!my_ue) {
uint32_t err = 0;
svm_invalidate();
for(i=0; (i<N) && (err < 32); i++) {
for(j=0; (j<N) && (err < 32); j++) {
if (C[i][j] != i+j) {
err++;
kprintf("Wrong value at C[%u][%u] = %u, B[%u][%u] = %u\n", i, j, C[i][j], i, j, B[i][j]);
}
}
}
}
RCCE_barrier(&RCCE_COMM_WORLD);
kprintf("Calculation time (par): %llu\n", end-start);
svmfree((void*) A[0], 3*N*sizeof(int));
svm_statistics();
return 0;
}
#endif
static int join_test(void* arg)

View file

@ -152,8 +152,8 @@ int mmu_init(void)
}
}
#elif defined(CONFIG_ROCKCREEK)
/* of course, the first twenty slots belong to the private memory */
for(addr=0x00; addr<20*0x1000000; addr+=PAGE_SIZE) {
/* of course, the first slots belong to the private memory */
for(addr=0x00; addr<1*0x1000000; addr+=PAGE_SIZE) {
page_clear_mark(addr >> PAGE_SHIFT);
if (addr > addr + PAGE_SIZE)
break;
@ -219,14 +219,14 @@ int mmu_init(void)
* Now, we are able to read the FPGA registers and to
* determine the number of slots for private memory.
*/
uint32_t slots = *((volatile uint32_t*) (FPGA_BASE + 0x8244));
uint32_t slots = *((volatile uint8_t*) (FPGA_BASE + 0x8244));
if (slots == 0)
slots = 21;
slots = 1;
kprintf("MetalSVM use %d slots for private memory\n", slots);
// define the residual private slots as free
for(addr=20*0x1000000; addr<(slots-1)*0x1000000; addr+=PAGE_SIZE) {
for(addr=1*0x1000000; addr<slots*0x1000000; addr+=PAGE_SIZE) {
page_clear_mark(addr >> PAGE_SHIFT);
if (addr > addr + PAGE_SIZE)
break;