/* * Copyright 2011 Stefan Lankes, Chair for Operating Systems, * RWTH Aachen University * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * This file is part of MetalSVM. */ #include #include #include #include #include #include #include #include #include #include #ifdef CONFIG_ROCKCREEK #include #include #include #include #include #include #define USE_PERFCOUNTERS 1 #define USE_RESP_MAIL 1 #define SHARED_PAGES (4*(RCCE_SHM_SIZE_MAX >> PAGE_SHIFT)) #define OWNER_SIZE ((SHARED_PAGES * sizeof(uint8_t) + PAGE_SIZE - 1) & ~(PAGE_SIZE - 1)) #define AIREG1 0 #define AIREG2 (AIREG1 + 1) #define LOCK_ID 0 #define ABS(a) (((a) < 0) ? -(a) : (a)) t_vcharp RC_SHM_BUFFER_START(); typedef struct { int counter; int initializer; } atomic_increg_t; static volatile atomic_increg_t *incregs = NULL; static RCCE_FLAG release; /* * Details on L2 cache (nedded for flushing) */ #define OWN_MPB 0xd8000000 #define L2_LINESIZE 32UL #define L2_WAYS 4UL #define L2_CAPACITY (256*1024UL) #define L2_WBSTRIDE (L2_CAPACITY/L2_WAYS) #ifdef SVM_WB /* Helper function to read data into all 4 ways of L2 cache */ __attribute__((always_inline)) static inline void svm_purge_set(const size_t set) { register char tmp; /* Translate the set to a kernel space virtual address */ const volatile char* dummyData = (volatile char*)set; /* Now read new data into all four ways, and then reread the first */ tmp = *dummyData; tmp = *(dummyData + L2_WBSTRIDE); tmp = *(dummyData + L2_WBSTRIDE * 2); tmp = *(dummyData + L2_WBSTRIDE * 3); } static size_t dummy_base = OWN_MPB + L2_CAPACITY; static size_t dummy_offset = 0; #endif /* * This array describes the owner of a specific page. * Only the owner of a page is able to change the possession. * => No lock is needded. */ static volatile uint8_t* page_owner = NULL; // helper array to convert a physical to a virtual address static size_t phys2virt[SHARED_PAGES] = {[0 ... SHARED_PAGES-1] = 0}; static const size_t shmbegin = SHM_ADDR; static uint32_t emit[RCCE_MAXNP] = {[0 ... RCCE_MAXNP-1] = 0}; static uint32_t request[RCCE_MAXNP] = {[0 ... RCCE_MAXNP-1] = 0}; static uint32_t forward[RCCE_MAXNP] = {[0 ... RCCE_MAXNP-1] = 0}; static uint32_t alloc_page = 0; static uint32_t map_page = 0; #if USE_PERFCOUNTERS static uint64_t alloc_ticks = 0; static uint64_t request_ticks = 0; static uint64_t emit_ticks = 0; static uint64_t wait_ticks = 0; static uint64_t max_wait = 0; static uint64_t min_wait = (uint64_t) -1; #endif int svm_init(void) { uint32_t i, flags; kprintf("Shared memory starts at the physical address 0x%x\n", shmbegin); page_owner = (uint8_t*) map_region(0, shmbegin, OWNER_SIZE >> PAGE_SHIFT, MAP_KERNEL_SPACE|MAP_NO_CACHE); if (BUILTIN_EXPECT(!page_owner, 0)) return -ENOMEM; if (!RCCE_IAM) { memset((void*)page_owner, 0xFF, OWNER_SIZE); // owner vector is owned by core 0 for(i=0; i<(OWNER_SIZE >> PAGE_SHIFT); i++) page_owner[i] = 0; } // initialize svm barrier incregs = (volatile atomic_increg_t*) map_region(0, 0xF900E000, 2, MAP_KERNEL_SPACE|MAP_NO_CACHE); if (BUILTIN_EXPECT(!incregs, 0)) return -ENOMEM; kprintf("Map atomic counters at 0x%x\n", incregs); if (!RCCE_IAM) { incregs[AIREG1].initializer = 0; incregs[AIREG2].initializer = 0; } // iRCCE is not thread save => disable interrupts flags = irq_nested_disable(); RCCE_flag_alloc(&release); irq_nested_enable(flags); RCCE_barrier(&RCCE_COMM_WORLD); return 0; } static size_t get_shpages(uint32_t n) { int x = X_PID(RC_MY_COREID); int y = Y_PID(RC_MY_COREID); size_t i, j = 0, k = 0, start = SHM_X0_Y0; int diff, min = x + y; diff = ABS(5 - x) + ABS(0 - y); if (diff < min) { min = diff; start = SHM_X5_Y0; } diff = ABS(0 - x) + ABS(2 - y); if (diff < min) { min = diff; start = SHM_X0_Y2; } diff = ABS(5 - x) + ABS(2 - y); if (diff < min) { min = diff; start = SHM_X5_Y2; } for(i=0; (i < SHARED_PAGES) && (k < n); i++) { k = 0; j = (((start - shmbegin) >> PAGE_SHIFT) + i) % SHARED_PAGES; while((k= RCCE_MAXNP)) { k++; i++; } } if (BUILTIN_EXPECT(i >= SHARED_PAGES, 0)) return 0; memset((void*) (page_owner+j), RCCE_IAM, sizeof(uint8_t)*n); return shmbegin + (j << PAGE_SHIFT); } size_t shmalloc(uint32_t n) { size_t ret; RCCE_acquire_lock(RC_COREID[LOCK_ID]); ret = get_shpages(n); RCCE_release_lock(RC_COREID[LOCK_ID]); return ret; } /* * This function is called by the pagefault handler * => the interrupt flags is already cleared */ int svm_alloc_page(size_t addr, page_table_t* pgt) { #if USE_PERFCOUNTERS uint64_t start = rdtsc(); #endif uint32_t index2 = (addr >> 12) & 0x3FF; size_t phyaddr; t_vcharp mpb = (t_vcharp) ((size_t)(virt_to_phys(addr) >> PAGE_SHIFT) | ((size_t) RCCE_comm_buffer[RCCE_IAM] - RCCE_LINE_SIZE)); uint16_t offset = 0xFFFF; addr &= PAGE_MASK; // align address to the page boundary RCCE_acquire_lock(RC_COREID[LOCK_ID]); iRCCE_get((t_vcharp) &offset, mpb, sizeof(uint16_t), RCCE_IAM); if (!offset) { int i; phyaddr = get_shpages(1); offset = (uint16_t) ((phyaddr - shmbegin) >> PAGE_SHIFT); for(i=0; ientries[index2] &= 0xFFF; pgt->entries[index2] &= ~PG_SVM_INIT; pgt->entries[index2] |= phyaddr|PG_PRESENT; phys2virt[(phyaddr - shmbegin) >> PAGE_SHIFT] = addr; tlb_flush_one_page(addr); alloc_page++; //kprintf("map new page frame 0x%x at 0x%x, flags0x%x, offset 0x%x, mpb 0x%x\n", phyaddr, addr, pgt->entries[index2] & 0xFFF, (int) offset, mpb); #if USE_PERFCOUNTERS alloc_ticks += rdtsc() - start; #endif return 0; } else { RCCE_release_lock(RC_COREID[LOCK_ID]); phyaddr = shmbegin + ((size_t)offset << PAGE_SHIFT); pgt->entries[index2] &= 0xFFF; pgt->entries[index2] &= ~PG_SVM_INIT; if (pgt->entries[index2] & PG_SVM_LAZYRELEASE) pgt->entries[index2] |= phyaddr|PG_PRESENT; else pgt->entries[index2] |= phyaddr; phys2virt[(phyaddr - shmbegin) >> PAGE_SHIFT] = addr; tlb_flush_one_page(addr); map_page++; //kprintf("map existing page frame 0x%x at 0x%x, offset 0x%x, mpb 0x%x\n", phyaddr, addr, offset, mpb); #if USE_PERFCOUNTERS alloc_ticks += rdtsc() - start; #endif if (pgt->entries[index2] & PG_SVM_LAZYRELEASE) return 0; if (pgt->entries[index2] & PG_RW) return svm_access_request(addr); return 0; } } /* * This function is called by the pagefault handler * => the interrupt flags is already cleared */ int svm_access_request(size_t addr) { #if USE_PERFCOUNTERS uint64_t start = rdtsc(); #endif size_t phyaddr = virt_to_phys(addr); uint32_t pageid; int remote_rank; uint8_t payload[iRCCE_MAIL_HEADER_PAYLOAD]; int ret; if (phyaddr < shmbegin) return -EINVAL; if (phyaddr >= shmbegin + RCCE_SHM_SIZE_MAX) return -EINVAL; pageid = (phyaddr-shmbegin) >> PAGE_SHIFT; remote_rank = page_owner[pageid]; if (remote_rank == RCCE_IAM) return 0; ((size_t*) payload)[0] = RCCE_IAM; ((size_t*) payload)[1] = phyaddr; //kprintf("send request (0x%x) to %d\n", addr, remote_rank); /* send ping request */ iRCCE_mail_send(2*sizeof(size_t), SVM_REQ, 0, (char*) payload, remote_rank); NOP4; icc_send_gic_irq(remote_rank); request[remote_rank]++; #if USE_RESP_MAIL #if USE_PERFCOUNTERS uint64_t wait_start = rdtsc(); #endif // wait for response icc_wait(SVM_RESP); #if USE_PERFCOUNTERS uint64_t res = rdtsc() - wait_start; wait_ticks += res; if (min_wait > res) min_wait = res; if (max_wait < res) max_wait = res; #endif #else NOP8; while (page_owner[pageid] != RCCE_IAM) { icc_mail_check(); NOP8; } #endif addr &= PAGE_MASK; // align address to page boundary ret = change_page_permissions(addr, addr + PAGE_SIZE, VMA_READ|VMA_WRITE|VMA_CACHEABLE); #if USE_PERFCOUNTERS request_ticks += rdtsc() - start; #endif return ret; } //static atomic_int32_t size_counter = ATOMIC_INIT(0); void* svm_malloc(size_t size, uint32_t consistency) { size_t viraddr, phyaddr, i, j; t_vcharp mpb_addr; uint32_t flags; task_t* task = per_core(current_task); uint32_t map_flags = MAP_KERNEL_SPACE|MAP_SVM_INIT; uint8_t buffer[RCCE_LINE_SIZE]= {[0 ... RCCE_LINE_SIZE-1] = 0}; if(!(consistency & SVM_L2)) map_flags |= MAP_MPE; else task->flags |= TASK_L2; if (consistency & SVM_STRONG) map_flags |= MAP_SVM_STRONG; else if (consistency & SVM_LAZYRELEASE) map_flags |= MAP_SVM_LAZYRELEASE; else return NULL; // currently, we allocate memory in page size granulation size = (size + PAGE_SIZE - 1) & ~(PAGE_SIZE - 1); #if 0 // Workaround for our MARC paper // iRCCE is not thread save => disable interrupts flags = irq_nested_disable(); kprintf("Entering shmmalloc: size 0x%x, owner_size 0x%x\n", size, OWNER_SIZE); if (RCCE_IAM && (consistency & SVM_STRONG)) map_flags |= MAP_NO_ACCESS; viraddr = vm_alloc(size >> PAGE_SHIFT, map_flags); kprintf("vm_alloc returns 0x%x\n", viraddr); static uint32_t last = 0; // get memory on MC0 if (last) phyaddr = last + size/4; else last = phyaddr = (size_t) RCCE_shmalloc(size/4); map_region(viraddr, phyaddr, (size/4) >> PAGE_SHIFT, map_flags|MAP_REMAP); for(i=0; i> PAGE_SHIFT] = viraddr + i; kprintf("svmmalloc on MC0: phyaddr 0x%x, viraddr 0x%x, size 0x%x\n", phyaddr, viraddr, size); // get memory on MC1 phyaddr = shmbegin + 0x1000000 + atomic_int32_read(&size_counter); map_region(viraddr + size/4, phyaddr, (size/4) >> PAGE_SHIFT, map_flags|MAP_REMAP); for(i=0; i> PAGE_SHIFT] = viraddr + size/4 + i; kprintf("svmmalloc on MC1: phyaddr 0x%x, viraddr 0x%x, size 0x%x\n", phyaddr, viraddr+size/4, size); // get memory on MC2 phyaddr = shmbegin + 0x2000000 + atomic_int32_read(&size_counter); map_region(viraddr + 2 * size/4, phyaddr, (size/4) >> PAGE_SHIFT, map_flags|MAP_REMAP); for(i=0; i> PAGE_SHIFT] = viraddr + 2 * size/4 + i; kprintf("svmmalloc on MC2: phyaddr 0x%x, viraddr 0x%x, size 0x%x\n", phyaddr, viraddr+2*size/4, size); // get memory on MC3 phyaddr = shmbegin + 0x3000000 + atomic_int32_read(&size_counter); map_region(viraddr + 3 * size/4, phyaddr, (size/4) >> PAGE_SHIFT, map_flags|MAP_REMAP); for(i=0; i> PAGE_SHIFT] = viraddr + 3 * size/4 + i; kprintf("svmmalloc on MC3: phyaddr 0x%x, viraddr 0x%x, size 0x%x\n", phyaddr, viraddr+3*size/4, size); atomic_int32_add(&size_counter, size/4); irq_nested_enable(flags); kprintf("shmmalloc returns 0x%x\n", viraddr); return (void*) viraddr; #endif #if 0 // iRCCE is not thread save => disable interrupts flags = irq_nested_disable(); phyaddr = (size_t) RCCE_shmalloc(size); if (RCCE_IAM && (consistency & SVM_STRONG)) map_flags |= MAP_NO_ACCESS; irq_nested_enable(flags); if (BUILTIN_EXPECT(!phyaddr, 0)) return NULL; if (BUILTIN_EXPECT(phyaddr & 0xFFF, 0)) { kprintf("RCCE_shmalloc returns not a page aligned physical address: 0x%x\n", phyaddr); return NULL; } viraddr = map_region(0, phyaddr, size >> PAGE_SHIFT, map_flags); for(i=0; i> PAGE_SHIFT] = viraddr + i; kprintf("svmmalloc: phyaddr 0x%x, viraddr 0x%x, size 0x%x\n", phyaddr, viraddr, size); return (void*) viraddr; #endif map_flags |= MAP_NO_ACCESS; #ifndef SVM_WB map_flags |= MAP_MPE; #endif viraddr = map_region(0, 0, size >> PAGE_SHIFT, map_flags); kprintf("svmmalloc: viraddr 0x%x, size 0x%x, flags 0x%x\n", viraddr, size, map_flags); map_flags |= MAP_REMAP; for(i=0, j=0, mpb_addr=0; i> PAGE_SHIFT); for(i=0; i> PAGE_SHIFT] = 0; // iRCCE is not thread save => disable interrupts flags = irq_nested_disable(); RCCE_shfree((t_vcharp) phyaddr); irq_nested_enable(flags); #endif } /* * This function is called by icc_mail_check. * => Interrupt flag is alread cleared. */ int svm_emit_page(size_t phyaddr, int ue) { #if USE_PERFCOUNTERS uint64_t start = rdtsc(); #endif uint32_t pageid; int remote_rank; //kprintf("Try to emit page 0x%x to %d\n", phyaddr, ue); if (phyaddr < shmbegin) return -EINVAL; if (phyaddr >= shmbegin + RCCE_SHM_SIZE_MAX) return -EINVAL; pageid = (phyaddr-shmbegin) >> PAGE_SHIFT; remote_rank = page_owner[pageid]; if (remote_rank != RCCE_IAM) { // Core is nor owner => forward request to new owner uint8_t payload[iRCCE_MAIL_HEADER_PAYLOAD]; kprintf("Ups, core %d is not owner of page 0x%x\n", RCCE_IAM, phyaddr); ((size_t*) payload)[0] = ue; ((size_t*) payload)[1] = phyaddr; /* send ping request */ iRCCE_mail_send(2*sizeof(size_t), SVM_REQ, 0, (char*)payload, remote_rank); /* send interrupt */ icc_send_gic_irq(remote_rank); forward[remote_rank]++; } else { size_t viraddr; svm_flush(phyaddr); #if USE_RESP_MAIL // send response back to ue // ue is polling for the response => no irq is needed iRCCE_mail_send(0, SVM_RESP, 0, NULL, ue); #endif emit[ue]++; page_owner[pageid] = ue; viraddr = phys2virt[pageid]; change_page_permissions(viraddr, viraddr+PAGE_SIZE, VMA_NOACCESS|VMA_READ|VMA_CACHEABLE); } #if USE_PERFCOUNTERS emit_ticks += rdtsc() - start; #endif return 0; } #if 0 void svm_flush(void) { int z, tmp; // need to write to another line to make sure the write combine buffer gets flushed *(int *)RCCE_fool_write_combine_buffer = 1; flush_cache(); #error Currently not supported #if 0 // try to flush L2 cache z = Z_PID(RC_COREID[my_ue]); tmp=ReadConfigReg(CRB_OWN + (z==0 ? GLCFG0 : GLCFG1)); tmp &= ~(1 << GLCFG_XFLSHNN_BIT); SetConfigReg(CRB_OWN + (z==0 ? GLCFG0 : GLCFG1), tmp); while(!(ReadConfigReg(CRB_OWN + (z==0 ? GLCFG0 : GLCFG1)) & (1 << GLCFG_XFLSHNN_BIT))) { NOP8; } #endif } #endif /* * Function to flush one page or entire cache. */ #ifdef SVM_WB void svm_invalidate(void) { task_t* task = per_core(current_task); if(task->flags & TASK_L2) { asm volatile ( ".byte 0x0f; .byte 0x0a;\n" ); // CL1FLUSHMB } else { /* no action needed svm_flush already invalidates cache */ return; } } void svm_flush(size_t phyaddr) { task_t* task = per_core(current_task); page_dir_t* pgd = task->pgd; page_table_t* pgt = NULL; size_t step = 0; size_t stride = L2_LINESIZE; size_t range = L2_WBSTRIDE; size_t viraddr; uint32_t index1, index2; uint32_t flags; /* flush entire Cache if phyaddr == 0 */ if(!phyaddr) { if( task->flags & TASK_L2 ){ goto flush_l2; } else { goto flush_l1; } /* flush one page */ } else { /* align the address to page boundaries */ phyaddr &= ~(PAGE_SIZE-1); /* lookup pgt to check if L2 is enabled */ viraddr = phys2virt[(phyaddr - shmbegin) >> PAGE_SHIFT]; index1 = viraddr >> 22; index2 = (viraddr >> 12) & 0x3FF; /* check if pgt is present */ if (!pgd || !(pgd->entries[index1] & PAGE_MASK)) goto wrong_addr; pgt = (page_table_t*)((KERNEL_SPACE - 1024 * PAGE_SIZE + index1 * PAGE_SIZE) & PAGE_MASK); if( pgt->entries[index2] & PG_MPE ) { goto flush_l1; } else { phyaddr = phyaddr % L2_WBSTRIDE; range = PAGE_SIZE; goto flush_l2; } } /* * FLUSH L1 CACHE: */ flush_l1: kputs("flush L1\n"); *(int *)RCCE_fool_write_combine_buffer = 1; //__asm__ volatile ( "wbinvd;\n\t" ); flush_cache(); return; flush_l2: /* * FLUSH L2 CACHE: * disable iterrupts due to pseudo LRU behavior of L2 cache */ flags = irq_nested_disable(); /* toggle between dummy areas */ phyaddr += dummy_base + dummy_offset; kprintf("flush-l2: phyaddr 0x%x\n", phyaddr); if(dummy_offset) dummy_offset = 0; else dummy_offset = L2_CAPACITY; flush_cache(); for( step = 0; step < range; step += stride ) svm_purge_set( phyaddr + step ); irq_nested_enable(flags); return; wrong_addr: kputs("svm flush error: address not valid!\n"); return; } #endif int svm_barrier(uint32_t flags) { int i; RCCE_COMM *comm = &RCCE_COMM_WORLD; static int index = 0; if (flags & SVM_LAZYRELEASE) { svm_flush(0); svm_invalidate(); } #if 1 // Lubachevsky barrier with flags index = !index; if (incregs[AIREG1].counter > (comm->size - 2)) { incregs[AIREG1].initializer = 0; while(incregs[AIREG1].initializer); for (i = 0; i < comm->size; i++) RCCE_flag_write(&release, index, comm->member[i]); } else RCCE_wait_until(release, index); #else RCCE_barrier(&RCCE_COMM_WORLD); #endif return 0; } //extern uint64_t check_ticks; //extern uint64_t recv_ticks; int svm_statistics(void) { uint32_t i; kprintf("emit\t:"); for(i=0; i