From 62f2038a6e518172d3ae1b84c62d48f1fa52cc69 Mon Sep 17 00:00:00 2001 From: Stefan Lankes Date: Wed, 19 Oct 2011 04:47:19 -0700 Subject: [PATCH 01/17] add a workaround to avoid the using of the missing select function --- apps/netio.c | 32 +++++++++++++++++++++++++++----- 1 file changed, 27 insertions(+), 5 deletions(-) diff --git a/apps/netio.c b/apps/netio.c index bb63c89c..c6969fd1 100644 --- a/apps/netio.c +++ b/apps/netio.c @@ -30,6 +30,8 @@ #include #endif +#define USE_SOCKET_BYPASSING 1 + /* * This implements a netio server and client (only TCP version). * The client sends a command word (4 bytes) then a data length word (4 bytes). @@ -45,8 +47,20 @@ /* See http://www.nwlab.net/art/netio/netio.html to get the netio tool */ #ifdef CONFIG_LWIP +#if USE_SOCKET_BYPASSING // for socket bypassing +#include +#undef LWIP_COMPAT_SOCKETS +#endif + #include #include +#include + +#if USE_SOCKET_BYPASSING // for socket bypassing +#include +#undef AF_INET +#define AF_INET AF_MMNIF_NET +#endif typedef struct { @@ -60,10 +74,10 @@ typedef struct #define CMD_RES 3 #define CTLSIZE sizeof(CONTROL) -#define DEFAULTPORT 0x494F /* "IO" */ +#define DEFAULTPORT 0x494F #define TMAXSIZE 65536 -static int tSizes[] = {1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384, 32767}; +static int tSizes[] = {/*1, 2, 4, 8, 16, */32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384, 32767}; static size_t ntSizes = sizeof(tSizes) / sizeof(int); static int nPort = DEFAULTPORT; static const int sobufsize = 131072; @@ -153,6 +167,7 @@ static int TCPServer(void* arg) setsockopt(server, SOL_SOCKET, SO_RCVBUF, (char *) &sobufsize, sizeof(sobufsize)); setsockopt(server, SOL_SOCKET, SO_SNDBUF, (char *) &sobufsize, sizeof(sobufsize)); + memset((char *) &sa_server, 0x00, sizeof(sa_server)); sa_server.sin_family = AF_INET; sa_server.sin_port = htons(nPort); sa_server.sin_addr = addr_local; @@ -177,6 +192,7 @@ static int TCPServer(void* arg) { kprintf("TCP server listening.\n"); +#ifdef select FD_ZERO(&fds); FD_SET(server, &fds); tv.tv_sec = 3600; @@ -190,10 +206,16 @@ static int TCPServer(void* arg) if (rc == 0 || FD_ISSET(server, &fds) == 0) continue; - +#endif length = sizeof(sa_client); - if ((client = accept(server, (struct sockaddr *) &sa_client, &length)) == -1) +#if USE_SOCKET_BYPASSING + // TODO: Bug, not compatible with BSD sockets + memcpy(&sa_client, &sa_server, length); +#endif + if ((client = accept(server, (struct sockaddr *) &sa_client, &length)) < 0) { + kprintf("accept faild: %d\n", errno); continue; + } setsockopt(client, SOL_SOCKET, SO_RCVBUF, (char *) &sobufsize, sizeof(sobufsize)); setsockopt(client, SOL_SOCKET, SO_SNDBUF, (char *) &sobufsize, sizeof(sobufsize)); @@ -215,7 +237,7 @@ static int TCPServer(void* arg) kprintf("\nReceiving from client, packet size %s ... \n", PacketSize(ctl.data)); cBuffer[0] = 0; nData = 0; - + do { for (nByte = 0; nByte < ctl.data; ) { From 5f84a93c01952eb6d66f468400c0c5d9465f2ab9 Mon Sep 17 00:00:00 2001 From: Stefan Lankes Date: Wed, 19 Oct 2011 04:48:16 -0700 Subject: [PATCH 02/17] extending the functionality and increasing the compatability of the Socket API - removing some minor bugs (races) - increasing the readability --- drivers/net/mmnif.c | 298 +++++++++++++++++++------------------------- drivers/net/mmnif.h | 33 ++++- 2 files changed, 158 insertions(+), 173 deletions(-) diff --git a/drivers/net/mmnif.c b/drivers/net/mmnif.c index 329a333c..a8a728ac 100644 --- a/drivers/net/mmnif.c +++ b/drivers/net/mmnif.c @@ -18,29 +18,34 @@ * * mmnif.c --- memmory mapped interface * - * Virutal IP Interface for the concept processor SCC + * Virtual IP Interface for the concept processor SCC * */ +/* + * 15th October 2011: + * - Redesign of the interrupt handling (by Stefan Lankes) + * - Add iRCCE support (by Stefan Lankes) + * - Extending the BSD socket interface + */ + #include #if defined(CONFIG_LWIP) && defined(CONFIG_ROCKCREEK) -#include "mmnif.h" /* definitions */ - #include /* lwip netif */ #include /* inteface stats */ #include /* ethernet arp packets */ #include /* struct iphdr */ #include /* tcpip_input() */ #include - -//#include +#include #include /* mailbox_ptr_t */ #include #include #include +#include #include #include @@ -51,7 +56,7 @@ #include #include -#include +#include #define TRUE 1 #define FALSE 0 @@ -80,7 +85,7 @@ #define MMNIF_AUTO_SOCKET_TIMEOUT 500 -#define MMNIF_FAST_SOCKET_BLOCK 1 +#define MMNIF_FAST_SOCKET_BLOCK 0 #ifdef DEBUG_MMNIF #include "util.h" /* hex dump */ @@ -120,11 +125,6 @@ static int npseudosocket = MMNIF_PSEUDO_SOCKET_START; static spinlock_t pseudolock; -/* IP address of the local core and the router core to get packets forwarded - */ -static unsigned int own_ip_address = 0xC0A80000; /* 192.168.0.0 */ -static unsigned int router_ip_address = 0xC0A80001; /* 192.168.0.1 */ - /* "message passing buffer" specific constants: * - start address * - size @@ -152,7 +152,6 @@ typedef struct acceptor { uint8_t stat; uint8_t src_ip; uint16_t port; - spinlock_t alock; int nsock; int rsock; } acceptor_t; @@ -222,7 +221,6 @@ typedef struct mm_rx_buffer { */ uint16_t head; uint16_t tail; - spinlock_t rlock; /* descritpor queue * desc_table : descriptor table @@ -261,10 +259,6 @@ typedef struct mmnif { mm_rx_buffer_t *rx_buff; uint8_t *rx_heap; - /* lock to protect members - */ - spinlock_t lock; - /* semaphore to regulate polling vs. interrupts */ sem_t com_poll; @@ -343,10 +337,10 @@ inline static int mmnif_trigger_irq(dest_ip) addr = CRB_ADDR(x, y) + (z == 0 ? GLCFG0 : GLCFG1); // send interrupt to ue - do { - NOP1; + //do { + // NOP1; tmp = ReadConfigReg(addr); - } while (tmp & 1); + //} while (tmp & 1); tmp |= 1; SetConfigReg(addr, tmp); @@ -437,35 +431,28 @@ void mmnif_print_driver_status(void) static uint8_t mmnif_get_destination(struct netif *netif, struct pbuf *p) { struct ip_hdr *iphdr; + ip_addr_p_t ip; uint8_t core; - uint8_t *ip4addr; - uint8_t addr[4]; - uint32_t netmask = 0xFFFFFF00; /* grab the destination ip address out of the ip header * for internal routing the last ocet is interpreted as core ID. */ iphdr = (struct ip_hdr *)(p->payload); - ip4addr = (uint8_t*) &iphdr->dest.addr; - - /* revert the address to host format */ - addr[3] = ip4addr[0]; - addr[2] = ip4addr[1]; - addr[1] = ip4addr[2]; - addr[0] = ip4addr[3]; + ip = iphdr->dest; /* check if the ip address is in the Local Area Network of the 48 cores */ /* if it's not the same network the router core is addressed * Note: the router core is core 1 */ - if (!((netmask & *(uint32_t *) addr) == (netmask & own_ip_address))) + if (ip_addr_netcmp(&ip, &netif->ip_addr, &netif->netmask)) return 1; - core = addr[0]; + core = ip4_addr4(&ip); /* check if the address is legitimata else return router core again */ if ((core) < 1 || (core > MMNIF_CORES)) core = 1; + kprintf("core %d\n", (int) core); return core; } @@ -477,7 +464,7 @@ static uint8_t mmnif_get_destination(struct netif *netif, struct pbuf *p) static uint32_t mmnif_rxbuff_alloc(uint8_t dest, uint16_t len) { uint32_t ret = 0; - mm_rx_buffer_t *rb = (mm_rx_buffer_t *) ((char *)header_start_address + (dest - 1) * header_size); + volatile mm_rx_buffer_t *rb = (mm_rx_buffer_t *) ((char *)header_start_address + (dest - 1) * header_size); #if MMNIF_USE_MPB char* memblock = (char*)heap_start_address + (dest-1)/2*16*1024*1024 + (dest-1)%2 * 0x2000; @@ -492,7 +479,7 @@ static uint32_t mmnif_rxbuff_alloc(uint8_t dest, uint16_t len) // if ((rb->head - rb->tail < len)&&(rb->tail != rb->head)) // return NULL; - RCCE_acquire_lock(dest-1); + RCCE_acquire_lock(RC_COREID[dest-1]); if (rb->dcount) { if (rb->tail > rb->head) @@ -542,9 +529,8 @@ static uint32_t mmnif_rxbuff_alloc(uint8_t dest, uint16_t len) } } } + RCCE_release_lock(RC_COREID[dest-1]); -out: - RCCE_release_lock(dest-1); return ret; } @@ -554,7 +540,7 @@ out: */ static int mmnif_commit_packet(uint8_t dest, uint32_t addr) { - mm_rx_buffer_t *rb = (mm_rx_buffer_t *) ((char *)header_start_address + (dest - 1) * header_size); + volatile mm_rx_buffer_t *rb = (mm_rx_buffer_t *) ((char *)header_start_address + (dest - 1) * header_size); uint32_t i; for (i = 0; i < MMNIF_MAX_DESCRIPTORS; i++) @@ -564,6 +550,7 @@ static int mmnif_commit_packet(uint8_t dest, uint32_t addr) { rb->desc_table[i].stat = MMNIF_STATUS_RDY; rb->desc_table[i].fast_sock = -1; + return 0; } } @@ -577,7 +564,7 @@ static int mmnif_commit_packet(uint8_t dest, uint32_t addr) */ static int mmnif_commit_packet_bypass(uint8_t dest, uint32_t addr, int dest_socket) { - mm_rx_buffer_t* rb = (mm_rx_buffer_t *) ((char *)header_start_address + (dest - 1) * header_size); + volatile mm_rx_buffer_t* rb = (mm_rx_buffer_t *) ((char *)header_start_address + (dest - 1) * header_size); uint32_t i; for (i = 0; i < MMNIF_MAX_DESCRIPTORS; i++) @@ -600,11 +587,11 @@ static int mmnif_commit_packet_bypass(uint8_t dest, uint32_t addr, int dest_sock static void mmnif_rxbuff_free(void) { mmnif_t *mmnif = mmnif_dev->state; - mm_rx_buffer_t *b = mmnif->rx_buff; + volatile mm_rx_buffer_t *b = mmnif->rx_buff; uint32_t i, j; uint32_t rpos; - RCCE_acquire_lock(RCCE_IAM); + RCCE_acquire_lock(RC_MY_COREID); rpos = b->dread; for (i = 0, j = rpos; i < MMNIF_MAX_DESCRIPTORS; i++) @@ -631,7 +618,7 @@ static void mmnif_rxbuff_free(void) break; } - RCCE_release_lock(RCCE_IAM); + RCCE_release_lock(RC_MY_COREID); } /* @@ -703,6 +690,7 @@ realloc: LINK_STATS_INC(link.xmit); mmnif->stats.tx++; mmnif->stats.tx_bytes += p->tot_len; + mmnif_trigger_irq(dest_ip); return ERR_OK; @@ -756,6 +744,7 @@ static int mmnif_hashadd(int sock, int rsock, uint8_t dest_ip) p->socket = sock; p->remote_socket = rsock; p->dest_ip = dest_ip; + return 0; } } @@ -783,6 +772,7 @@ static int mmnif_hashdelete(int sock) p->socket = -1; p->remote_socket = 0; p->dest_ip = 0; + return 0; } } @@ -801,19 +791,9 @@ static err_t mmnif_tx_bypass(struct netif * netif, void *pbuff, uint16_t size, i //uint32_t exp_delay = 2; //mm_rx_buffer_t *rb = (mm_rx_buffer_t *) ((char *)header_start_address + (dest->dest_ip - 1) * header_size); - /* Perform serveral sanity checks on the packet and the buffers: - * - is the output packet to big? - */ - -// if (size > MMNIF_TX_BUFFERLEN) -// { -// DEBUGPRINTF("mmnif_tx(): packet is longer than %d bytes\n",MMNIF_TX_BUFFERLEN); -// goto drop_packet; -// } - /* allocate memory for the packet in the remote buffer */ realloc: - write_address = mmnif_rxbuff_alloc(dest->dest_ip, CLINE_ALIGN(size)); + write_address = mmnif_rxbuff_alloc(dest->dest_ip, size); if (!write_address) { @@ -821,14 +801,7 @@ realloc: // udelay(exp_delay); // exp_delay << 1; // reschedule(); - NOP8; - NOP8; - NOP8; - NOP8; - NOP8; - NOP8; - NOP8; - NOP8; + NOP8;NOP8;NOP8;NOP8;NOP8;NOP8;NOP8;NOP8; goto realloc; } @@ -854,7 +827,7 @@ realloc: if (mmnif_commit_packet_bypass(dest->dest_ip, write_address, dest->remote_socket)) { - DEBUGPRINTF("mmnif_tx(): packet somehow lost during commit\n"); + DEBUGPRINTF("mmnif_tx_bypass(): packet somehow lost during commit\n"); } #ifdef DEBUG_MMNIF_PACKET // DEBUGPRINTF("\n SEND %p with length: %d\n",(char*)mpb_start_address + (dest_ip -1)*mpb_size + pos * 1792,p->tot_len +2); @@ -865,7 +838,9 @@ realloc: LINK_STATS_INC(link.xmit); mmnif->stats.tx++; mmnif->stats.tx_bytes += size; + mmnif_trigger_irq(dest->dest_ip); + return ERR_OK; drop_packet: @@ -882,27 +857,30 @@ drop_packet: int mmnif_send(int s, void *data, size_t size, int flags) { bypass_rxdesc_t *p = mmnif_hashlookup(s); - uint32_t i, j, k, ret; + uint32_t i, j, k; + int total_size = 0; if (p != 0) { - if (size < ((MMNIF_RX_BUFFERLEN / 2) - 1)) - return mmnif_tx_bypass(mmnif_dev, data, size, s); - - else - { + if (size < ((MMNIF_RX_BUFFERLEN / 2) - 1)) { + if (mmnif_tx_bypass(mmnif_dev, data, size, s) == ERR_OK) + return size; + else + return -1; + } else { j = size / (((MMNIF_RX_BUFFERLEN / 2) - 1)); k = size - (j * (((MMNIF_RX_BUFFERLEN / 2) - 1))); for (i = 0; i < j; i++) { - ret = mmnif_tx_bypass(mmnif_dev, data + i * ((MMNIF_RX_BUFFERLEN / 2) - 1), ((MMNIF_RX_BUFFERLEN / 2) - 1), s); - - if (ret < 0) - return ret; + if (mmnif_tx_bypass(mmnif_dev, data + i * ((MMNIF_RX_BUFFERLEN / 2) - 1), ((MMNIF_RX_BUFFERLEN / 2) - 1), s) != ERR_OK) + return total_size; + total_size += (MMNIF_RX_BUFFERLEN / 2) - 1; } - ret = mmnif_tx_bypass(mmnif_dev, data + (j - 1) * ((MMNIF_RX_BUFFERLEN / 2) - 1), k, s); - return ret; + + if (mmnif_tx_bypass(mmnif_dev, data + (j - 1) * ((MMNIF_RX_BUFFERLEN / 2) - 1), k, s) != ERR_OK) + return total_size; + return total_size + k; } } @@ -914,8 +892,7 @@ int mmnif_send(int s, void *data, size_t size, int flags) * because we have no link layer and everything is reliable we don't need * to add anything so we just pass it to our tx function */ -static err_t -mmnif_link_layer(struct netif *netif, struct pbuf *q, ip_addr_t * ipaddr) +static err_t mmnif_link_layer(struct netif *netif, struct pbuf *q, ip_addr_t * ipaddr) { return netif->linkoutput(netif, q); } @@ -934,7 +911,6 @@ err_t mmnif_init(struct netif *netif) DEBUGPRINTF("mmnif init attempt\n"); mmnif_dev = netif; - own_ip_address += RCCE_ue() + 1; /* Alloc and clear memory for the device struct */ @@ -959,7 +935,7 @@ err_t mmnif_init(struct netif *netif) // map physical address in the virtual address space header_start_address = (void*) map_region(0, (size_t) header_start_address, (MMNIF_CORES * header_size) >> PAGE_SHIFT, MAP_KERNEL_SPACE | MAP_WT | MAP_NO_CACHE); DEBUGPRINTF("map_region : %p\n", header_start_address); - mmnif->rx_buff = (mm_rx_buffer_t *) (header_start_address + (header_size) * (own_ip_address - router_ip_address)); + mmnif->rx_buff = (mm_rx_buffer_t *) (header_start_address + header_size * RCCE_IAM); /* Alloc and clear shared memory for rx_buff */ @@ -980,25 +956,25 @@ err_t mmnif_init(struct netif *netif) // align size to the granularity of a page size heap_size = (heap_size + PAGE_SIZE - 1) & ~(PAGE_SIZE - 1); heap_start_address = (void*) RCCE_shmalloc(heap_size * MMNIF_CORES); - DEBUGPRINTF("RCCE_shmalloc : %p (size %u)\n", header_start_address, MMNIF_CORES * header_size); + DEBUGPRINTF("RCCE_shmalloc : %p (size %u)\n", heap_start_address, MMNIF_CORES * header_size); // map physical address in the virtual address space #if USE_CACHE uint32_t n = (uint32_t) heap_start_address; heap_start_address = map_region(0, heap_start_address, (MMNIF_CORES * heap_size) >> PAGE_SHIFT, MAP_KERNEL_SPACE | MAP_NO_CACHE | MAP_MPE | MAP_WT); - map_region(heap_start_address + (heap_size) * (own_ip_address - router_ip_address), n + (heap_size) * (own_ip_address - router_ip_address), header_size >> PAGE_SHIFT, MAP_KERNEL_SPACE | MAP_MPE | MAP_WT | MAP_REMAP); + map_region(heap_start_address + (heap_size) * RCCE_IAM, n + (heap_size) * RCCE_IAM, header_size >> PAGE_SHIFT, MAP_KERNEL_SPACE | MAP_MPE | MAP_WT | MAP_REMAP); #else heap_start_address = (void*) map_region(0, (size_t) heap_start_address, (MMNIF_CORES * heap_size) >> PAGE_SHIFT, MAP_KERNEL_SPACE | MAP_NO_CACHE | MAP_MPE | MAP_WT); #endif // USE_CACHE #endif // MMNIF_USE_MPB - DEBUGPRINTF("map_region : %p\n", header_start_address); + DEBUGPRINTF("map_region : %p\n", heap_start_address); #if MMNIF_USE_MPB mmnif->rx_heap = heap_start_address; heap_start_address = heap_start_address - (RC_MY_COREID/2 * 16*1024*1024 ) - (RC_MY_COREID%2 * 0x2000); DEBUGPRINTF("heap_start_address : %p\n", heap_start_address); #else - mmnif->rx_heap = heap_start_address + (heap_size) * (own_ip_address - router_ip_address); + mmnif->rx_heap = heap_start_address + heap_size * RCCE_IAM; #endif if (!(heap_start_address)) @@ -1007,13 +983,9 @@ err_t mmnif_init(struct netif *netif) return ERR_MEM; } -#if !MMNIF_USE_MPB - memset(mmnif->rx_buff, 0x00, header_size); - memset(mmnif->rx_heap, 0x00, heap_size); - - *((int *)RCCE_fool_write_combine_buffer) = 1; -#else +#if MMNIF_USE_MPB asm volatile (".byte 0x0f; .byte 0x0a;\n"); +#endif for(i=0; irx_buff)[i] = 0x00; @@ -1021,6 +993,7 @@ err_t mmnif_init(struct netif *netif) ((uint8_t*)mmnif->rx_heap)[i] = 0x00; *((int *)RCCE_fool_write_combine_buffer) = 1; +#if MMNIF_USE_MPB asm volatile (".byte 0x0f; .byte 0x0a;\n"); #endif @@ -1030,9 +1003,7 @@ err_t mmnif_init(struct netif *netif) /* init the lock's for the hdr */ - spinlock_init(&mmnif->rx_buff->rlock); spinlock_init(&pseudolock); - spinlock_init(&mmnif->lock); /* init the sems for communication art */ @@ -1056,9 +1027,6 @@ err_t mmnif_init(struct netif *netif) mmnif->rx_buff->acceptors[i].rsock = -1; mmnif->rx_buff->acceptors[i].src_ip = 0; mmnif->rx_buff->acceptors[i].port = 0; - spinlock_init(&mmnif->rx_buff->acceptors[i].alock); - spinlock_lock(&mmnif->rx_buff->acceptors[i].alock); - spinlock_unlock(&mmnif->rx_buff->acceptors[i].alock); } /* pass the device state to lwip */ @@ -1100,7 +1068,7 @@ err_t mmnif_init(struct netif *netif) static void mmnif_rx(struct netif *netif) { mmnif_t *mmnif = netif->state; - mm_rx_buffer_t *b = mmnif->rx_buff; + volatile mm_rx_buffer_t *b = mmnif->rx_buff; uint16_t length = 0; struct pbuf *p; struct pbuf *q; @@ -1112,13 +1080,11 @@ static void mmnif_rx(struct netif *netif) anotherpacket: rdesc = 0xFF; - spinlock_lock(&b->rlock); /* check if this call to mmnif_rx makes any sense */ if (b->desc_table[b->dread].stat == MMNIF_STATUS_FREE) { - spinlock_unlock(&b->rlock); goto out; } @@ -1142,6 +1108,7 @@ anotherpacket: DEBUGPRINTF("mmnif_rx(): no fast socket associated with %d", b->desc_table[rdesc].fast_sock); mmnif->rx_buff->desc_table[rdesc].stat = MMNIF_STATUS_PROC; mmnif_rxbuff_free(); + goto out; } else { b->desc_table[rdesc].stat = MMNIF_STATUS_INPROC; #if MMNIF_FAST_SOCKET_BLOCK @@ -1149,19 +1116,16 @@ anotherpacket: #else atomic_int32_inc(&bp->cnt); #endif + goto anotherpacket; } - spinlock_unlock(&b->rlock); - goto out; } } if (b->desc_table[(j + i) % MMNIF_MAX_DESCRIPTORS].stat == MMNIF_STATUS_FREE) { - spinlock_unlock(&b->rlock); goto out; } } - spinlock_unlock(&b->rlock); /* if there is no packet finished we encountered a random error */ @@ -1265,19 +1229,15 @@ out: static int mmnif_rx_bypass(struct netif *netif, int s, void *data, uint32_t len) { mmnif_t *mmnif = netif->state; - mm_rx_buffer_t *b = mmnif->rx_buff; - uint16_t length; - char *packet; + volatile mm_rx_buffer_t *b = mmnif->rx_buff; + uint16_t length = 0; + char *packet = NULL; uint32_t i, j; uint8_t rdesc = 0xFF; - // spinlock_lock(&b->rlock); - /* check if this call to mmnif_rx makes any sense */ - if (b->desc_table[b->dread].stat == MMNIF_STATUS_FREE) - { - // spinlock_unlock(&b->rlock); + if (b->desc_table[b->dread].stat == MMNIF_STATUS_FREE) { return -1; } @@ -1296,14 +1256,10 @@ static int mmnif_rx_bypass(struct netif *netif, int s, void *data, uint32_t len) } } - // spinlock_unlock(&b->rlock); - /* if there is no packet finished we encountered a random error */ if (rdesc == 0xFF) - { return -1; - } /* If length is zero return silently */ @@ -1320,13 +1276,28 @@ static int mmnif_rx_bypass(struct netif *netif, int s, void *data, uint32_t len) #ifdef DEBUG_MMNIF_PACKET DEBUGPRINTF("\n RECIEVED - %p with legth: %d\n", packet, length); hex_dump(length, packet); - #endif - if (len >= length) - memcpy(data, (void*) mmnif->rx_buff->desc_table[rdesc].addr, mmnif->rx_buff->desc_table[rdesc].len); - else + + if (BUILTIN_EXPECT(len < length, 0)) goto drop_packet; +#if USE_CACHE || MMNIF_USE_MPB + asm volatile (".byte 0x0f; .byte 0x0a;\n"); +#endif + +#if !USE_CACHE && !MMNIF_USE_MBP + memcpy_from_nc(data, (void*) mmnif->rx_buff->desc_table[rdesc].addr, mmnif->rx_buff->desc_table[rdesc].len); +#elif MMNIF_USE_MPB + memcpy_get(data, (void*) mmnif->rx_buff->desc_table[rdesc].addr, mmnif->rx_buff->desc_table[rdesc].len); +#else + memcpy(data, (void*) mmnif->rx_buff->desc_table[rdesc].addr, mmnif->rx_buff->desc_table[rdesc].len); +#endif + + *((int *)RCCE_fool_write_combine_buffer) = 1; +#if MMNIF_USE_MPB + asm volatile (".byte 0x0f; .byte 0x0a;\n"); +#endif + /* indicate that the copy process is done and the packet can be freed * note that we did not lock here because we are the only one editing this value */ @@ -1345,10 +1316,6 @@ static int mmnif_rx_bypass(struct netif *netif, int s, void *data, uint32_t len) return length; drop_packet: - //spinlock_lock(&mmnif->rx_buff->rlock); - - /*error handling */ - //spinlock_unlock(&mmnif->rx_buff->rlock); LINK_STATS_INC(link.drop); mmnif->stats.rx_err++; @@ -1375,7 +1342,6 @@ int mmnif_recv(int s, void *data, uint32_t len, int flags) //reschedule(); NOP8; } - #endif ret = mmnif_rx_bypass(mmnif_dev, s, data, len); @@ -1407,27 +1373,30 @@ int mmnif_socket(int domain, int type, int protocol) */ int mmnif_accept(int s, struct sockaddr *addr, socklen_t * addrlen) { - struct sockaddr_in *bp = (struct sockaddr_in*)addr; - uint16_t port = bp->sin_port; - mm_rx_buffer_t *b = ((mmnif_t *) mmnif_dev->state)->rx_buff; - int i; + struct sockaddr_in *client = (struct sockaddr_in*)addr; + volatile mm_rx_buffer_t *b = ((mmnif_t *) mmnif_dev->state)->rx_buff; bypass_rxdesc_t *p; int tmp1 = get_clock_tick(); - int tmp2 = 0; + int i, tmp2 = 0; + uint16_t port; + + // TODO: Bug, not compatible with BSD sockets + port = client->sin_port; + if ((unsigned int)s >= MMNIF_PSEUDO_SOCKET_START) { for (i = 0; i < MMNIF_MAX_ACCEPTORS; i++) { if (b->acceptors[(i + port) % MMNIF_MAX_ACCEPTORS].stat == MMNIF_ACC_STAT_CLOSED) { - spinlock_lock(&b->acceptors[(i + port) % MMNIF_MAX_ACCEPTORS].alock); + RCCE_acquire_lock(RC_MY_COREID); b->acceptors[(i + port) % MMNIF_MAX_ACCEPTORS].port = port; b->acceptors[(i + port) % MMNIF_MAX_ACCEPTORS].stat = MMNIF_ACC_STAT_ACCEPTING; spinlock_lock(&pseudolock); mmnif_hashadd(npseudosocket, -1, 0); b->acceptors[(i + port) % MMNIF_MAX_ACCEPTORS].nsock = npseudosocket++; spinlock_unlock(&pseudolock); - spinlock_unlock(&b->acceptors[(i + port) % MMNIF_MAX_ACCEPTORS].alock); + RCCE_release_lock(RC_MY_COREID); while (b->acceptors[(i + port) % MMNIF_MAX_ACCEPTORS].stat != MMNIF_ACC_STAT_ACCEPT_ME) NOP8; @@ -1435,48 +1404,46 @@ int mmnif_accept(int s, struct sockaddr *addr, socklen_t * addrlen) p = mmnif_hashlookup(b->acceptors[(i + port) % MMNIF_MAX_ACCEPTORS].nsock); p->dest_ip = b->acceptors[(i + port) % MMNIF_MAX_ACCEPTORS].src_ip; p->remote_socket = b->acceptors[(i + port) % MMNIF_MAX_ACCEPTORS].rsock; - spinlock_lock(&b->acceptors[(i + port) % MMNIF_MAX_ACCEPTORS].alock); + RCCE_acquire_lock(RC_MY_COREID); b->acceptors[(i + port) % MMNIF_MAX_ACCEPTORS].stat = MMNIF_ACC_STAT_ACCEPTED; i = b->acceptors[(i + port) % MMNIF_MAX_ACCEPTORS].nsock; - spinlock_unlock(&b->acceptors[(i + port) % MMNIF_MAX_ACCEPTORS].alock); + RCCE_release_lock(RC_MY_COREID); return i; } } - return -1; - } - else - { + return -1; + } else { for (i = 0; i < MMNIF_MAX_ACCEPTORS; i++) { if (b->acceptors[(i + port) % MMNIF_MAX_ACCEPTORS].stat == MMNIF_ACC_STAT_CLOSED) { - spinlock_lock(&b->acceptors[(i + port) % MMNIF_MAX_ACCEPTORS].alock); + RCCE_acquire_lock(RC_MY_COREID); b->acceptors[(i + port) % MMNIF_MAX_ACCEPTORS].port = port; b->acceptors[(i + port) % MMNIF_MAX_ACCEPTORS].stat = MMNIF_ACC_STAT_ACCEPTING; spinlock_lock(&pseudolock); mmnif_hashadd(npseudosocket, -1, 0); b->acceptors[(i + port) % MMNIF_MAX_ACCEPTORS].nsock = npseudosocket++; spinlock_unlock(&pseudolock); - spinlock_unlock(&b->acceptors[(i + port) % MMNIF_MAX_ACCEPTORS].alock); + RCCE_release_lock(RC_MY_COREID); while (b->acceptors[(i + port) % MMNIF_MAX_ACCEPTORS].stat != MMNIF_ACC_STAT_ACCEPT_ME) { tmp2 = get_clock_tick(); if (tmp2 - tmp1 > MMNIF_AUTO_SOCKET_TIMEOUT) { - spinlock_lock(&b->acceptors[(i + port) % MMNIF_MAX_ACCEPTORS].alock); + RCCE_acquire_lock(RC_MY_COREID); if (b->acceptors[(i + port) % MMNIF_MAX_ACCEPTORS].stat == MMNIF_ACC_STAT_ACCEPT_ME) { - spinlock_unlock(&b->acceptors[(i + port) % MMNIF_MAX_ACCEPTORS].alock); + RCCE_acquire_lock(RC_MY_COREID); break; } DEBUGPRINTF("mmnif_accept(): Timout occoured, switching to normal accept()"); mmnif_hashdelete(b->acceptors[(i + port) % MMNIF_MAX_ACCEPTORS].nsock); b->acceptors[(i + port) % MMNIF_MAX_ACCEPTORS].stat = MMNIF_ACC_STAT_CLOSED; - spinlock_unlock(&b->acceptors[(i + port) % MMNIF_MAX_ACCEPTORS].alock); + RCCE_release_lock(RC_MY_COREID); goto normalaccept; } NOP8; @@ -1485,14 +1452,15 @@ int mmnif_accept(int s, struct sockaddr *addr, socklen_t * addrlen) p = mmnif_hashlookup(b->acceptors[(i + port) % MMNIF_MAX_ACCEPTORS].nsock); p->dest_ip = b->acceptors[(i + port) % MMNIF_MAX_ACCEPTORS].src_ip; p->remote_socket = b->acceptors[(i + port) % MMNIF_MAX_ACCEPTORS].rsock; - spinlock_lock(&b->acceptors[(i + port) % MMNIF_MAX_ACCEPTORS].alock); + RCCE_acquire_lock(RC_MY_COREID); b->acceptors[(i + port) % MMNIF_MAX_ACCEPTORS].stat = MMNIF_ACC_STAT_ACCEPTED; i = b->acceptors[(i + port) % MMNIF_MAX_ACCEPTORS].nsock; - spinlock_unlock(&b->acceptors[(i + port) % MMNIF_MAX_ACCEPTORS].alock); + RCCE_release_lock(RC_MY_COREID); return i; } } + return -1; } @@ -1507,48 +1475,29 @@ int mmnif_connect(int s, const struct sockaddr *name, socklen_t namelen) { struct sockaddr_in *p = (struct sockaddr_in*) name; uint16_t port = p->sin_port; - mm_rx_buffer_t *b; + volatile mm_rx_buffer_t *b; int i; //int tmp1 = get_clock_tick(); //int tmp2 = 0; uint8_t core; - uint8_t *ip4addr; - uint8_t addr[4]; - //uint32_t netmask = 0xFFFFFF00; - /* grab the destination ip address out of the ip header - * for internal routing the last ocet is interpreted as core ID. - */ - ip4addr = (uint8_t*) &p->sin_addr.s_addr; - - /* revert the address to host format */ - addr[3] = ip4addr[0]; - addr[2] = ip4addr[1]; - addr[1] = ip4addr[2]; - addr[0] = ip4addr[3]; - - /* check if the ip address is in the Local Area Network of the 48 cores */ - // if (!((netmask & *(uint32_t*)addr) == (netmask & own_ip_address) )) - // return -1; - - core = addr[0]; + core = ip4_addr4(&p->sin_addr.s_addr); if ((core) < 1 || (core > MMNIF_CORES)) return lwip_connect(s, name, namelen); - b = (mm_rx_buffer_t *) ((char *)header_start_address + - (core - 1) * header_size); + b = (volatile mm_rx_buffer_t *) ((char *)header_start_address + (core - 1) * header_size); for (i = 0; i < MMNIF_MAX_ACCEPTORS; i++) { if (b->acceptors[(i + port) % MMNIF_MAX_ACCEPTORS].stat == MMNIF_ACC_STAT_ACCEPTING && b->acceptors[(i + port) % MMNIF_MAX_ACCEPTORS].port == port) { - spinlock_lock(&b->acceptors[(i + port) % MMNIF_MAX_ACCEPTORS].alock); + RCCE_acquire_lock(RC_COREID[core-1]); b->acceptors[(i + port) % MMNIF_MAX_ACCEPTORS].stat = MMNIF_ACC_STAT_ACCEPT_ME; b->acceptors[(i + port) % MMNIF_MAX_ACCEPTORS].rsock = s; - b->acceptors[(i + port) % MMNIF_MAX_ACCEPTORS].src_ip = own_ip_address & 0xFF; - mmnif_hashadd(s, - b->acceptors[(i + port) % MMNIF_MAX_ACCEPTORS].nsock, core); - spinlock_unlock(&b->acceptors[(i + port) % MMNIF_MAX_ACCEPTORS].alock); + b->acceptors[(i + port) % MMNIF_MAX_ACCEPTORS].src_ip = ip4_addr4(&mmnif_dev->ip_addr); + mmnif_hashadd(s, b->acceptors[(i + port) % MMNIF_MAX_ACCEPTORS].nsock, core); + RCCE_release_lock(RC_COREID[core-1]); + while (b->acceptors[(i + port) % MMNIF_MAX_ACCEPTORS].stat != MMNIF_ACC_STAT_ACCEPTED) { @@ -1563,9 +1512,11 @@ int mmnif_connect(int s, const struct sockaddr *name, socklen_t namelen) // } NOP8; } - spinlock_lock(&b->acceptors[(i + port) % MMNIF_MAX_ACCEPTORS].alock); + + RCCE_acquire_lock(RC_COREID[core-1]); b->acceptors[(i + port) % MMNIF_MAX_ACCEPTORS].stat = MMNIF_ACC_STAT_CLOSED; - spinlock_unlock(&b->acceptors[(i + port) % MMNIF_MAX_ACCEPTORS].alock); + RCCE_release_lock(RC_COREID[core-1]); + return 0; } } @@ -1589,6 +1540,13 @@ int mmnif_bind(int s, const struct sockaddr *name, socklen_t namelen) return 0; } +int mmnif_setsockopt(int s, int level, int optname, const void *optval, socklen_t optlen) +{ + if ((unsigned int)s < MMNIF_PSEUDO_SOCKET_START) + return lwip_setsockopt(s, level, optname, optval, optlen); + return 0; +} + /* mmnif_closesocket(): replacement if lwip_close for * fast_sockets */ diff --git a/drivers/net/mmnif.h b/drivers/net/mmnif.h index 69046984..c3030aae 100644 --- a/drivers/net/mmnif.h +++ b/drivers/net/mmnif.h @@ -24,26 +24,53 @@ #ifdef CONFIG_LWIP #include #include /* lwip netif */ +#include -#define AF_MMNIF_NET 0x1337 +#define AF_MMNIF_NET 0x42 -#define MMNIF_AUTOACTIVATE_FAST_SOCKETS 0 +#define MMNIF_AUTOACTIVATE_FAST_SOCKETS 1 #if MMNIF_AUTOACTIVATE_FAST_SOCKETS +//#ifndef socklen_t +//# define socklen_t u32_t +//#endif + +int mmnif_socket(int domain, int type, int protocol); +int mmnif_send(int s, void *data, size_t size, int flags); +int mmnif_recv(int s, void *data, uint32_t len, int flags); +int mmnif_accept(int s, struct sockaddr *addr, socklen_t * addrlen); +int mmnif_connect(int s, const struct sockaddr *name, socklen_t namelen); +int mmnif_listen(int s, int backlog); +int mmnif_bind(int s, const struct sockaddr *name, socklen_t namelen); +int mmnif_closesocket(int s); +int mmnif_getsockopt (int s, int level, int optname, void *optval, socklen_t *optlen); +int mmnif_setsockopt (int s, int level, int optname, const void *optval, socklen_t optlen); + +#undef accept #define accept(a,b,c) mmnif_accept(a,b,c) +#undef closesocket #define closesocket(s) mmnif_closesocket(s) +#undef connect #define connect(a,b,c) mmnif_connect(a,b,c) +#undef recv #define recv(a,b,c,d) mmnif_recv(a,b,c,d) +#undef send #define send(a,b,c,d) mmnif_send(a,b,c,d) +#undef socket #define socket(a,b,c) mmnif_socket(a,b,c) +#undef bind #define bind(a,b,c) mmnif_bind(a,b,c) +#undef listen #define listen(a,b) mmnif_listen(a,b) +#undef setsockopt +#define setsockopt(a,b,c,d,e) mmnif_setsockopt(a,b,c,d,e) +#undef select #endif err_t mmnif_init(struct netif*); err_t mmnif_shutdown(void); int mmnif_worker(void *e); -void mmnif_print_driver_status(); +void mmnif_print_driver_status(void); #endif From fcbc567c716d3005882a3d16d8d66f206944719d Mon Sep 17 00:00:00 2001 From: Stefan Lankes Date: Thu, 20 Oct 2011 03:45:51 -0700 Subject: [PATCH 03/17] minor optimizations --- drivers/net/mmnif.c | 160 +++++++++++++++++++++----------------------- drivers/net/mmnif.h | 3 - 2 files changed, 75 insertions(+), 88 deletions(-) diff --git a/drivers/net/mmnif.c b/drivers/net/mmnif.c index a8a728ac..6ca4a96b 100644 --- a/drivers/net/mmnif.c +++ b/drivers/net/mmnif.c @@ -85,10 +85,8 @@ #define MMNIF_AUTO_SOCKET_TIMEOUT 500 -#define MMNIF_FAST_SOCKET_BLOCK 0 - #ifdef DEBUG_MMNIF -#include "util.h" /* hex dump */ +#include /* hex dump */ #endif /* define constants @@ -145,7 +143,6 @@ typedef struct acceptor { /* stat: status of the acceptor * src_ip: where did the connect request came from * port: port on which the acceptor is listening - * alock : acceptor lock * nsock : next pseudo socket which is used in further connection * rsock : remote socket which has to be assosicated with the nsock */ @@ -161,13 +158,13 @@ typedef struct acceptor { typedef struct bypass_rxdesc { /* socket : hashtarget * remote_socket: socket on the remote end - * cnt : atomic counter for the recv function + * counter : packet counter + * last_id : last packet id * dest_ip : associated destination ip/core */ int socket; int remote_socket; sem_t sem; - atomic_int32_t cnt; uint8_t dest_ip; } bypass_rxdesc_t; @@ -205,6 +202,7 @@ typedef struct rx_desc { * addr : memory address of the packet * fast_sock: (-1) if no socket is associated * else the socket n of the fast socket + * id : packet id */ uint8_t stat; uint16_t len; @@ -251,12 +249,12 @@ typedef struct mmnif { uint32_t ipaddr; // checks the TCPIP thread already the rx buffers? - uint8_t check_in_progress; + volatile uint8_t check_in_progress; /* memory interaction variables: * - pointer to recive buffer */ - mm_rx_buffer_t *rx_buff; + volatile mm_rx_buffer_t *rx_buff; uint8_t *rx_heap; /* semaphore to regulate polling vs. interrupts @@ -326,7 +324,7 @@ inline static void* memcpy_to_nc(void* dest, const void *src, size_t count) /* trigger an interrupt on the remote processor * so he knows there is a packet to read */ -inline static int mmnif_trigger_irq(dest_ip) +inline static int mmnif_trigger_irq(int dest_ip, int safe) { int tmp, x, y, z, addr; int ue = dest_ip - 1; @@ -336,32 +334,19 @@ inline static int mmnif_trigger_irq(dest_ip) y = Y_PID(RC_COREID[ue]); addr = CRB_ADDR(x, y) + (z == 0 ? GLCFG0 : GLCFG1); - // send interrupt to ue - //do { - // NOP1; - tmp = ReadConfigReg(addr); - //} while (tmp & 1); + if (safe) { + // send interrupt to ue + do { + NOP8; + tmp = ReadConfigReg(addr); + } while (tmp & 1); + } else tmp = ReadConfigReg(addr); tmp |= 1; SetConfigReg(addr, tmp); return 0; } -/* mmnif_get_device_stats(): Returns a copy of the - * current device - */ -static mmnif_device_stats_t mmnif_get_device_stats(void) -{ - mmnif_device_stats_t stats = { 0 }; - - if (!mmnif_dev) - DEBUGPRINTF("mmnif_get_device_stats(): the device is not initialized yet.\n"); - else - stats = ((mmnif_t *) mmnif_dev->state)->stats; - - return stats; -} - /* mmnif_print_stats(): Print the devices stats of the * current device */ @@ -452,7 +437,6 @@ static uint8_t mmnif_get_destination(struct netif *netif, struct pbuf *p) /* check if the address is legitimata else return router core again */ if ((core) < 1 || (core > MMNIF_CORES)) core = 1; - kprintf("core %d\n", (int) core); return core; } @@ -461,7 +445,7 @@ static uint8_t mmnif_get_destination(struct netif *netif, struct pbuf *p) * right inside of the buffer which is used for communication * with the remote end */ -static uint32_t mmnif_rxbuff_alloc(uint8_t dest, uint16_t len) +static uint32_t mmnif_rxbuff_alloc(uint8_t dest, uint16_t len/*, uint32_t id*/) { uint32_t ret = 0; volatile mm_rx_buffer_t *rb = (mm_rx_buffer_t *) ((char *)header_start_address + (dest - 1) * header_size); @@ -631,7 +615,6 @@ static err_t mmnif_tx(struct netif *netif, struct pbuf *p) uint32_t i; struct pbuf *q; /* interator */ uint32_t dest_ip = mmnif_get_destination(netif, p); - //int32_t chances = 4000; /* check for over/underflow */ if (BUILTIN_EXPECT((p->tot_len < 20 /* IP header size */) || (p->tot_len > 1536), 0)) { @@ -646,14 +629,7 @@ realloc: { //DEBUGPRINTF("mmnif_tx(): concurrency"); - //chances--; - //if (chances <= 0) - // goto drop_packet; - //if (chances % 17 == 0) - // mmnif_trigger_irq(dest_ip); - NOP8;NOP8;NOP8;NOP8;NOP8;NOP8;NOP8;NOP8; - //udelay(10); goto realloc; } @@ -691,7 +667,7 @@ realloc: mmnif->stats.tx++; mmnif->stats.tx_bytes += p->tot_len; - mmnif_trigger_irq(dest_ip); + mmnif_trigger_irq(dest_ip, 1); return ERR_OK; @@ -787,20 +763,16 @@ static err_t mmnif_tx_bypass(struct netif * netif, void *pbuff, uint16_t size, i { mmnif_t *mmnif = netif->state; uint32_t write_address; + //uint32_t id; bypass_rxdesc_t *dest = mmnif_hashlookup(s); - //uint32_t exp_delay = 2; //mm_rx_buffer_t *rb = (mm_rx_buffer_t *) ((char *)header_start_address + (dest->dest_ip - 1) * header_size); /* allocate memory for the packet in the remote buffer */ + //id = ++dest->counter; realloc: write_address = mmnif_rxbuff_alloc(dest->dest_ip, size); if (!write_address) { - - // DEBUGPRINTF("mmnif_tx_bypass(): concurrency"); - // udelay(exp_delay); - // exp_delay << 1; - // reschedule(); NOP8;NOP8;NOP8;NOP8;NOP8;NOP8;NOP8;NOP8; goto realloc; } @@ -817,7 +789,7 @@ realloc: #if !MMNIF_USE_MPB memcpy_to_nc((void*) write_address, pbuff, size); #else - memcpy_put(write_address, pbuff, size); + memcpy_put((void*) write_address, pbuff, size); #endif *((int *)RCCE_fool_write_combine_buffer) = 1; @@ -839,16 +811,10 @@ realloc: mmnif->stats.tx++; mmnif->stats.tx_bytes += size; - mmnif_trigger_irq(dest->dest_ip); + if (size >= ((MMNIF_RX_BUFFERLEN / 2) - 1)) + mmnif_trigger_irq(dest->dest_ip, 0); return ERR_OK; - -drop_packet: - /* drop packet for one or another reason - */ - LINK_STATS_INC(link.drop); - mmnif->stats.tx_err++; - return ERR_IF; } /* mmnif_send(): is going to be used as replacement of @@ -864,24 +830,24 @@ int mmnif_send(int s, void *data, size_t size, int flags) { if (size < ((MMNIF_RX_BUFFERLEN / 2) - 1)) { if (mmnif_tx_bypass(mmnif_dev, data, size, s) == ERR_OK) - return size; - else - return -1; + total_size = size; } else { j = size / (((MMNIF_RX_BUFFERLEN / 2) - 1)); k = size - (j * (((MMNIF_RX_BUFFERLEN / 2) - 1))); for (i = 0; i < j; i++) { - if (mmnif_tx_bypass(mmnif_dev, data + i * ((MMNIF_RX_BUFFERLEN / 2) - 1), ((MMNIF_RX_BUFFERLEN / 2) - 1), s) != ERR_OK) - return total_size; + if (mmnif_tx_bypass(mmnif_dev, (char*) data + i * ((MMNIF_RX_BUFFERLEN / 2) - 1), ((MMNIF_RX_BUFFERLEN / 2) - 1), s) != ERR_OK) + goto out; total_size += (MMNIF_RX_BUFFERLEN / 2) - 1; } - if (mmnif_tx_bypass(mmnif_dev, data + (j - 1) * ((MMNIF_RX_BUFFERLEN / 2) - 1), k, s) != ERR_OK) - return total_size; - return total_size + k; + if (mmnif_tx_bypass(mmnif_dev, data + (j - 1) * ((MMNIF_RX_BUFFERLEN / 2) - 1), k, s) == ERR_OK) + total_size += k; } +out: + mmnif_trigger_irq(p->dest_ip, 1); + return total_size; } return lwip_send(s, data, size, flags); @@ -1014,10 +980,9 @@ err_t mmnif_init(struct netif *netif) mmnif_hashtable[i].socket = -1; mmnif_hashtable[i].remote_socket = -1; mmnif_hashtable[i].dest_ip = 0; + //mmnif_hashtable[i].counter = 0; -#if MMNIF_FAST_SOCKET_BLOCK sem_init(&mmnif_hashtable[i].sem, 0); -#endif } for (i=0; idesc_table[rdesc].stat = MMNIF_STATUS_INPROC; -#if MMNIF_FAST_SOCKET_BLOCK sem_post(&bp->sem); -#else - atomic_int32_inc(&bp->cnt); -#endif - goto anotherpacket; + irq_nested_enable(flags); + return; } } } @@ -1140,6 +1103,8 @@ anotherpacket: goto out; } + irq_nested_enable(flags); + /* check for over/underflow */ if (BUILTIN_EXPECT((length < 20 /* IP header size */) || (length > 1536), 0)) { @@ -1217,9 +1182,12 @@ drop_packet: /* TODO: error handling */ LINK_STATS_INC(link.drop); mmnif->stats.rx_err++; + mmnif->check_in_progress = 0; + return; out: mmnif->check_in_progress = 0; + irq_nested_enable(flags); return; } @@ -1327,27 +1295,49 @@ drop_packet: */ int mmnif_recv(int s, void *data, uint32_t len, int flags) { + mmnif_t* mmnif = (mmnif_t *) mmnif_dev->state; bypass_rxdesc_t *p = mmnif_hashlookup(s); int ret; if (p == 0) return lwip_recv(s, data, len, flags); -#if MMNIF_FAST_SOCKET_BLOCK + if (sem_trywait(&p->sem) == 0) + return mmnif_rx_bypass(mmnif_dev, s, data, len); + + uint32_t state = irq_nested_disable(); + if (mmnif->check_in_progress) { + uint32_t i,j; + volatile mm_rx_buffer_t *b = mmnif->rx_buff; + bypass_rxdesc_t *bp; + uint8_t rdesc; + + /* search the packet whose transmission is finished + */ + for (i = 0, j = b->dread; i < MMNIF_MAX_DESCRIPTORS; i++) + { + if (b->desc_table[(j + i) % MMNIF_MAX_DESCRIPTORS].stat == MMNIF_STATUS_RDY) + { + rdesc = (j + i) % MMNIF_MAX_DESCRIPTORS; + if (b->desc_table[(j + i) % MMNIF_MAX_DESCRIPTORS].fast_sock != -1) { + bp = mmnif_hashlookup(b->desc_table[rdesc].fast_sock); + if (bp) { + b->desc_table[rdesc].stat = MMNIF_STATUS_INPROC; + ret = mmnif_rx_bypass(mmnif_dev, s, data, len); + irq_nested_enable(state); + return ret; + } + } + } + } + + mmnif->check_in_progress = 0; + } + irq_nested_enable(state); + sem_wait(&p->sem, 0); -#else - while (!atomic_int32_read(&p->cnt)) - { - //reschedule(); - NOP8; - } -#endif - - ret = mmnif_rx_bypass(mmnif_dev, s, data, len); - atomic_int32_dec(&p->cnt); - - return ret; + return mmnif_rx_bypass(mmnif_dev, s, data, len); } /* mmnif_socket(): replacement of lwip_socket for @@ -1586,7 +1576,7 @@ static void mmnif_irqhandler(struct state* s) mmnif = (mmnif_t *) mmnif_dev->state; if (!mmnif->check_in_progress) { if (tcpip_callback_with_block(mmnif_rx, (void*) mmnif_dev, 0) == ERR_OK) { - mmnif->check_in_progress = 1; + mmnif->check_in_progress = 1; } else { DEBUGPRINTF("rckemacif_handler: unable to send a poll request to the tcpip thread\n"); } diff --git a/drivers/net/mmnif.h b/drivers/net/mmnif.h index c3030aae..67fd46a7 100644 --- a/drivers/net/mmnif.h +++ b/drivers/net/mmnif.h @@ -31,9 +31,6 @@ #define MMNIF_AUTOACTIVATE_FAST_SOCKETS 1 #if MMNIF_AUTOACTIVATE_FAST_SOCKETS -//#ifndef socklen_t -//# define socklen_t u32_t -//#endif int mmnif_socket(int domain, int type, int protocol); int mmnif_send(int s, void *data, size_t size, int flags); From 844acb30b97092342c2a580dd8e3124e271ad400 Mon Sep 17 00:00:00 2001 From: Stefan Lankes Date: Thu, 20 Oct 2011 04:51:34 -0700 Subject: [PATCH 04/17] fix bug in get_destination --- drivers/net/mmnif.c | 21 +++++++-------------- 1 file changed, 7 insertions(+), 14 deletions(-) diff --git a/drivers/net/mmnif.c b/drivers/net/mmnif.c index 6ca4a96b..33fef2d8 100644 --- a/drivers/net/mmnif.c +++ b/drivers/net/mmnif.c @@ -417,7 +417,6 @@ static uint8_t mmnif_get_destination(struct netif *netif, struct pbuf *p) { struct ip_hdr *iphdr; ip_addr_p_t ip; - uint8_t core; /* grab the destination ip address out of the ip header * for internal routing the last ocet is interpreted as core ID. @@ -425,19 +424,7 @@ static uint8_t mmnif_get_destination(struct netif *netif, struct pbuf *p) iphdr = (struct ip_hdr *)(p->payload); ip = iphdr->dest; - /* check if the ip address is in the Local Area Network of the 48 cores */ - - /* if it's not the same network the router core is addressed - * Note: the router core is core 1 - */ - if (ip_addr_netcmp(&ip, &netif->ip_addr, &netif->netmask)) - return 1; - core = ip4_addr4(&ip); - - /* check if the address is legitimata else return router core again */ - if ((core) < 1 || (core > MMNIF_CORES)) - core = 1; - return core; + return ip4_addr4(&ip); } /* mmnif_rxbuff_alloc(): @@ -622,6 +609,12 @@ static err_t mmnif_tx(struct netif *netif, struct pbuf *p) goto drop_packet; } + /* check destination ip */ + if (BUILTIN_EXPECT((dest_ip < 1) || (dest_ip > MMNIF_CORES), 0)) { + DEBUGPRINTF("mmnif_tx: invalid destination IP %d => drop\n", dest_ip); + goto drop_packet; + } + /* allocate memory for the packet in the remote buffer */ realloc: write_address = mmnif_rxbuff_alloc(dest_ip, p->tot_len); From 60cfb9b5866b8156a1f6b873bdade02972184231 Mon Sep 17 00:00:00 2001 From: Stefan Lankes Date: Thu, 20 Oct 2011 06:28:14 -0700 Subject: [PATCH 05/17] using of clear function names --- include/metalsvm/init.h | 4 ++-- kernel/init.c | 11 +---------- 2 files changed, 3 insertions(+), 12 deletions(-) diff --git a/include/metalsvm/init.h b/include/metalsvm/init.h index 8ca4a89a..ab77b793 100644 --- a/include/metalsvm/init.h +++ b/include/metalsvm/init.h @@ -36,8 +36,8 @@ extern "C" { * initialize the VGA output. If configured.*/ int lowlevel_init(void); -/** @brief Shutdown the system */ -int shutdown(void); +/** @brief Shutdown the network */ +int network_shutdown(void); /** @brief Entry point of the init task */ int initd(void* arg); diff --git a/kernel/init.c b/kernel/init.c index eea20644..294d240f 100644 --- a/kernel/init.c +++ b/kernel/init.c @@ -171,7 +171,7 @@ static void tcpip_init_done(void* arg) } #endif -static int network_shutdown(void) +int network_shutdown(void) { #if defined(CONFIG_LWIP) && defined(CONFIG_ROCKCREEK) mmnif_shutdown(); @@ -183,15 +183,6 @@ static int network_shutdown(void) return 0; } -int shutdown(void) -{ - int ret; - - ret = network_shutdown(); - - return ret; -} - static void list_fs(vfs_node_t* node, uint32_t depth) { int j, i = 0; From e3a8c1ae779f8ff08e2f144e7d672fa6342d6517 Mon Sep 17 00:00:00 2001 From: Stefan Lankes Date: Thu, 20 Oct 2011 06:44:53 -0700 Subject: [PATCH 06/17] cosmetic changes --- apps/netio.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/apps/netio.c b/apps/netio.c index c6969fd1..088fe6b0 100644 --- a/apps/netio.c +++ b/apps/netio.c @@ -77,7 +77,7 @@ typedef struct #define DEFAULTPORT 0x494F #define TMAXSIZE 65536 -static int tSizes[] = {/*1, 2, 4, 8, 16, */32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384, 32767}; +static int tSizes[] = {1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384, 32767}; static size_t ntSizes = sizeof(tSizes) / sizeof(int); static int nPort = DEFAULTPORT; static const int sobufsize = 131072; @@ -192,7 +192,7 @@ static int TCPServer(void* arg) { kprintf("TCP server listening.\n"); -#ifdef select +#if !USE_SOCKET_BYPASSING FD_ZERO(&fds); FD_SET(server, &fds); tv.tv_sec = 3600; From c74ee965cad9c0f92287e1d10a634eeabb74bec8 Mon Sep 17 00:00:00 2001 From: Stefan Lankes Date: Thu, 20 Oct 2011 08:46:27 -0700 Subject: [PATCH 07/17] minor optimizations --- drivers/net/mmnif.c | 45 +++++++++++++++++++-------------------------- 1 file changed, 19 insertions(+), 26 deletions(-) diff --git a/drivers/net/mmnif.c b/drivers/net/mmnif.c index 33fef2d8..d63149d0 100644 --- a/drivers/net/mmnif.c +++ b/drivers/net/mmnif.c @@ -324,7 +324,7 @@ inline static void* memcpy_to_nc(void* dest, const void *src, size_t count) /* trigger an interrupt on the remote processor * so he knows there is a packet to read */ -inline static int mmnif_trigger_irq(int dest_ip, int safe) +inline static int mmnif_trigger_irq(int dest_ip) { int tmp, x, y, z, addr; int ue = dest_ip - 1; @@ -334,13 +334,11 @@ inline static int mmnif_trigger_irq(int dest_ip, int safe) y = Y_PID(RC_COREID[ue]); addr = CRB_ADDR(x, y) + (z == 0 ? GLCFG0 : GLCFG1); - if (safe) { - // send interrupt to ue - do { - NOP8; - tmp = ReadConfigReg(addr); - } while (tmp & 1); - } else tmp = ReadConfigReg(addr); + // send interrupt to ue + do { + NOP8; + tmp = ReadConfigReg(addr); + } while (tmp & 1); tmp |= 1; SetConfigReg(addr, tmp); @@ -432,7 +430,7 @@ static uint8_t mmnif_get_destination(struct netif *netif, struct pbuf *p) * right inside of the buffer which is used for communication * with the remote end */ -static uint32_t mmnif_rxbuff_alloc(uint8_t dest, uint16_t len/*, uint32_t id*/) +static uint32_t mmnif_rxbuff_alloc(uint8_t dest, uint16_t len) { uint32_t ret = 0; volatile mm_rx_buffer_t *rb = (mm_rx_buffer_t *) ((char *)header_start_address + (dest - 1) * header_size); @@ -660,7 +658,7 @@ realloc: mmnif->stats.tx++; mmnif->stats.tx_bytes += p->tot_len; - mmnif_trigger_irq(dest_ip, 1); + mmnif_trigger_irq(dest_ip); return ERR_OK; @@ -761,7 +759,6 @@ static err_t mmnif_tx_bypass(struct netif * netif, void *pbuff, uint16_t size, i //mm_rx_buffer_t *rb = (mm_rx_buffer_t *) ((char *)header_start_address + (dest->dest_ip - 1) * header_size); /* allocate memory for the packet in the remote buffer */ - //id = ++dest->counter; realloc: write_address = mmnif_rxbuff_alloc(dest->dest_ip, size); if (!write_address) @@ -804,8 +801,7 @@ realloc: mmnif->stats.tx++; mmnif->stats.tx_bytes += size; - if (size >= ((MMNIF_RX_BUFFERLEN / 2) - 1)) - mmnif_trigger_irq(dest->dest_ip, 0); + mmnif_trigger_irq(dest->dest_ip); return ERR_OK; } @@ -821,25 +817,24 @@ int mmnif_send(int s, void *data, size_t size, int flags) if (p != 0) { - if (size < ((MMNIF_RX_BUFFERLEN / 2) - 1)) { + if (size < ((MMNIF_RX_BUFFERLEN / 2) - CLINE_SIZE)) { if (mmnif_tx_bypass(mmnif_dev, data, size, s) == ERR_OK) - total_size = size; + return size; } else { - j = size / (((MMNIF_RX_BUFFERLEN / 2) - 1)); - k = size - (j * (((MMNIF_RX_BUFFERLEN / 2) - 1))); + j = size / (((MMNIF_RX_BUFFERLEN / 2) - CLINE_SIZE)); + k = size - (j * (((MMNIF_RX_BUFFERLEN / 2) - CLINE_SIZE))); for (i = 0; i < j; i++) { - if (mmnif_tx_bypass(mmnif_dev, (char*) data + i * ((MMNIF_RX_BUFFERLEN / 2) - 1), ((MMNIF_RX_BUFFERLEN / 2) - 1), s) != ERR_OK) - goto out; - total_size += (MMNIF_RX_BUFFERLEN / 2) - 1; + if (mmnif_tx_bypass(mmnif_dev, (char*) data + i * ((MMNIF_RX_BUFFERLEN / 2) - CLINE_SIZE), ((MMNIF_RX_BUFFERLEN / 2) - CLINE_SIZE), s) != ERR_OK) + return total_size; + total_size += (MMNIF_RX_BUFFERLEN / 2) - CLINE_SIZE; } - if (mmnif_tx_bypass(mmnif_dev, data + (j - 1) * ((MMNIF_RX_BUFFERLEN / 2) - 1), k, s) == ERR_OK) + if (mmnif_tx_bypass(mmnif_dev, data + (j - 1) * ((MMNIF_RX_BUFFERLEN / 2) - CLINE_SIZE), k, s) == ERR_OK) total_size += k; } -out: - mmnif_trigger_irq(p->dest_ip, 1); + return total_size; } @@ -1290,7 +1285,6 @@ int mmnif_recv(int s, void *data, uint32_t len, int flags) { mmnif_t* mmnif = (mmnif_t *) mmnif_dev->state; bypass_rxdesc_t *p = mmnif_hashlookup(s); - int ret; if (p == 0) return lwip_recv(s, data, len, flags); @@ -1316,9 +1310,8 @@ int mmnif_recv(int s, void *data, uint32_t len, int flags) bp = mmnif_hashlookup(b->desc_table[rdesc].fast_sock); if (bp) { b->desc_table[rdesc].stat = MMNIF_STATUS_INPROC; - ret = mmnif_rx_bypass(mmnif_dev, s, data, len); irq_nested_enable(state); - return ret; + return mmnif_rx_bypass(mmnif_dev, s, data, len); } } } From ca1abe00f6d34c224958e6a8599c93e91e927d55 Mon Sep 17 00:00:00 2001 From: Stefan Lankes Date: Thu, 20 Oct 2011 23:23:16 -0700 Subject: [PATCH 08/17] add Laplace example and GFX code --- apps/Makefile | 2 +- apps/gfx_client.c | 146 +++++++++++++++++++++++ apps/gfx_client.h | 50 ++++++++ apps/gfx_generic.c | 138 ++++++++++++++++++++++ apps/gfx_generic.h | 84 +++++++++++++ apps/laplace.c | 288 +++++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 707 insertions(+), 1 deletion(-) create mode 100644 apps/gfx_client.c create mode 100644 apps/gfx_client.h create mode 100644 apps/gfx_generic.c create mode 100644 apps/gfx_generic.h create mode 100644 apps/laplace.c diff --git a/apps/Makefile b/apps/Makefile index 94912228..e45d1a38 100644 --- a/apps/Makefile +++ b/apps/Makefile @@ -1,4 +1,4 @@ -C_source := tests.c echo.c netio.c +C_source := tests.c echo.c netio.c laplace.c gfx_client.c gfx_generic.c MODULE := apps include $(TOPDIR)/Makefile.inc diff --git a/apps/gfx_client.c b/apps/gfx_client.c new file mode 100644 index 00000000..5614849d --- /dev/null +++ b/apps/gfx_client.c @@ -0,0 +1,146 @@ +/* + * Copyright 2011 Sarah Fischer, Nicolas Berr, Pablo Reble + * Chair for Operating Systems, RWTH Aachen University + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * This file is part of MetalSVM. + */ + +#include + +#ifdef CONFIG_LWIP + +#include "gfx_client.h" +#include + +static int myrank; +static int sockfd; + +#ifndef SINGLE_CONNECT +#define SINGLE_CONNECT 1 +#endif +#define SINGLE_CONNECT_RANK 0 +#define USE_GETHOSTBYNAME 0 + +//int gfx_init(int* pargc, char*** pargv, int rank){ +int gfx_init(char* ip_str, char* port_str, int rank) { + char* hostname; + int port; + struct sockaddr_in serveraddr; + struct hostent *server; + + //*pargc -=2; + myrank = rank; + +#if SINGLE_CONNECT + // currently only rank 0 will connect to the gfx-server + if (rank != SINGLE_CONNECT_RANK) { + return 0; + } +#endif + + /* hostname und port aus den parametern ermitteln) */ + //kprintf("pargc: %d\n", *pargc); + + hostname = ip_str; //(*pargv)[(*pargc)]; + port = atoi(port_str); //atoi((*pargv)[(*pargc)+1]); + + kprintf("gfx-client connecting to host: %s, port: %d\n", hostname, port); + + /* socket erzeugen */ + sockfd = socket(AF_INET, SOCK_STREAM, 0); + if (sockfd < 0) + return -1; + +#if 1 + /* Disable the Nagle (TCP No Delay) algorithm */ + int flag = 1; + if (setsockopt(sockfd, IPPROTO_TCP, TCP_NODELAY, (char *)&flag, sizeof(flag)) < 0) { + kprintf("Couldn't setsockopt(TCP_NODELAY)\n"); + return -1; + } +#endif + +#if USE_GETHOSTBYNAME + /* dns eintrag vom server ermitteln */ + serveraddr = gethostbyname(hostname); + if (server == NULL){ + fprintf(stderr, "%s: no such host\n", hostname); + return -2; + } +#endif + + /* adresse vom server ermitteln */ + memset((char *) &serveraddr, 0x00, sizeof(serveraddr)); + serveraddr.sin_family = AF_INET; +#if USE_GETHOSTBYNAME + bcopy((char *)server->h_addr, (char *)&serveraddr.sin_addr.s_addr, server->h_length); +#else + serveraddr.sin_addr.s_addr = inet_addr(hostname); +#endif + serveraddr.sin_port = htons(port); + + /* verbindung herstellen */ + if (connect(sockfd, (const struct sockaddr *) &serveraddr, sizeof(serveraddr)) < 0) + return -3; + + return 0; +} + +int gfx_send(char* buf, int size, int tag){ + int ret, pos = 0; + uint32_t u32size, u32tag; + +#if SINGLE_CONNECT + if (myrank != SINGLE_CONNECT_RANK) + return 0; +#endif + + u32size = size; + u32tag = tag; + +// printf("sending stuff...."); +// printf("tag: %d, size: %d\n", tag, size); +// fflush(stdout); + + ret = write(sockfd, &u32tag, sizeof(u32tag)); + if (ret != sizeof(tag)) + return -1; + ret = write(sockfd, &u32size, sizeof(u32size)); + if (ret != sizeof(size)) + return -2; + + + do{ + ret = write(sockfd, &buf[pos], size-pos); + pos += ret; + } while (pos < size); + + + return 0; +} + +int gfx_finalize(){ + // uint32_t u32tag = (uint32_t)(-1); + uint32_t u32tag = 1111; +#if SINGLE_CONNECT + if (myrank == SINGLE_CONNECT_RANK) { + write(sockfd, &u32tag, sizeof(u32tag)); + close(sockfd); + } +#endif + return 0; +} + +#endif diff --git a/apps/gfx_client.h b/apps/gfx_client.h new file mode 100644 index 00000000..c36743c7 --- /dev/null +++ b/apps/gfx_client.h @@ -0,0 +1,50 @@ +/* + * Copyright 2011 Sarah Fischer, Nicolas Berr, Pablo Reble + * Chair for Operating Systems, RWTH Aachen University + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * This file is part of MetalSVM. + */ + +#ifndef __GFX_CLIENT_H__ +#define __GFX_CLIENT_H__ + +#include +#include +#include + +#ifdef CONFIG_LWIP + +#define BUFSIZE 1024 + +/* + * grafik initialisieren, letzte beide argumente, ip und port, werden entfernt + * verbindung zu server aufbauen, kollektiver aufruf, erstmal nur rank 0 connecten +*/ +//int gfx_init(int* pargc, char*** pargv, int rank); +int gfx_init(char* ip_str, char* port_str, int rank); + +/* + * paket mit der geg. laenge wird an den server gesehendet, das tag soll auch uebertragen werden +*/ +int gfx_send(char* buf, int size, int tag); + +/* + * kollektiver aufruf, vorhandene verbindungen kontrolliert beenden +*/ +int gfx_finalize(); + +#endif + +#endif diff --git a/apps/gfx_generic.c b/apps/gfx_generic.c new file mode 100644 index 00000000..63b6e570 --- /dev/null +++ b/apps/gfx_generic.c @@ -0,0 +1,138 @@ +/* + * Copyright 2011 Sarah Fischer, Nicolas Berr, Pablo Reble + * Chair for Operating Systems, RWTH Aachen University + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * This file is part of MetalSVM. + */ + +#include "gfx_client.h" +#include "gfx_generic.h" + +#ifdef CONFIG_LWIP + +int GFX_init(int* pargc, char*** pargv, int rank) +{ + return gfx_init(pargc, pargv, rank); +} + +int GFX_send(char* buf, int size, int tag) +{ + return gfx_send(buf, size, tag); +} + +int GFX_finalize() +{ + return gfx_finalize(); +} + +int GFX_update() +{ + return gfx_send(NULL, 0, GFX_UPDATE); +} + +int GFX_usleep(int sleep_time) +{ + int buf[1]; + buf[0] = sleep_time; + return gfx_send((char*)buf, sizeof(int), GFX_USLEEP); +} + +int GFX_set_rgb(int index, int r, int g, int b) +{ + int buf[4]; + buf[0] = index; + buf[1] = r; + buf[2] = g; + buf[3] = b; + return gfx_send((char*)buf, 4*sizeof(int), GFX_SET_RGB); +} + +int GFX_set_xy(int x, int y) +{ + int buf[2]; + buf[0] = x; + buf[1] = y; + return gfx_send((char*)buf, 2*sizeof(int), GFX_SET_XY); +} + +int GFX_set_hd(int height, int direction) +{ + int buf[2]; + buf[0] = height; + buf[1] = direction; + return gfx_send((char*)buf, 2*sizeof(int), GFX_SET_HD); +} + + +int GFX_draw_data(char *buf, int len) +{ + return gfx_send(buf, len, GFX_DRAW_DATA); +} + +int GFX_draw_pixel(int x, int y, int color) +{ + int buf[3]; + buf[0] = x; + buf[1] = y; + buf[2] = color; + return gfx_send((char*)buf, 3*sizeof(int), GFX_DRAW_PIXEL); +} + +int GFX_draw_line(int x1, int y1, int x2, int y2, int color) +{ + int buf[5]; + buf[0] = x1; + buf[1] = y1; + buf[2] = x2; + buf[3] = y2; + buf[4] = color; + return gfx_send((char*)buf, 5*sizeof(int), GFX_DRAW_LINE); +} + +int GFX_draw_box(int x, int y, int height, int width, int color) +{ + int buf[5]; + buf[0] = x; + buf[1] = y; + buf[2] = height; + buf[3] = width; + buf[4] = color; + return gfx_send((char*)buf, 5*sizeof(int), GFX_DRAW_BOX); +} + +int GFX_draw_text(int x, int y, int color, char *text) +{ + int buf[32]; + buf[0] = x; + buf[1] = y; + buf[2] = color; + strcpy((char*)&(buf[3]), text); + + return gfx_send((char*)buf, 32*sizeof(int), GFX_DRAW_TEXT); +} + +int GFX_draw_points(int* points, int num) +{ + return gfx_send((char*)points, num*3*sizeof(int), GFX_DRAW_POINTS); +} + +int GFX_draw_poly(int* points, int num, int color) +{ + int i; + for(i=0; i +#include +#include +#include + +#ifdef CONFIG_ROCKCREEK + +#include +#include + +#define _LAPLACE_SHMEM_ +//#define _USE_GFX + +#ifdef _LAPLACE_SHMEM_ +#define SINGLE_CONNECT 1 +#else +#define SINGLE_CONNECT 0 +#endif + +#ifdef _USE_GFX +#include "gfx_generic.h" +#include "gfx_client.h" +#endif + +#define ABS(a) (((a) < 0) ? -(a) : (a)) +#define MAX(a,b) (((a) < (b)) ? (b) : (a)) + +#define N 512 +#define M 512 + +#define TMAX 100*50 + +//#define DATA unsigned int +#define DATA double +//#define FIX 1024 +#define FIX 1 + +#define USE_SVM 0 +#define MEMTYPE SVM_STRONG +//#define MEMTYPE SVM_LAZYRELEASE + +static inline double pow(double a, int b) +{ + double base = a; + int i; + + for (i = 1; i < b; ++i) + a *= base; + + return a; +} + +int laplace(void *arg) +{ + //char* argv[] = {"/bin/laplace", "192.168.4.254", "12301", NULL}; + //int argc = 3; + uint32_t flags; +#ifdef _USE_GFX + uint32_t ret; +#endif + int t; + + int i, I, j, J; + int my_rank; + int num_ranks; + + int n; + int m; + + volatile DATA **NewValues; + volatile DATA **OldValues; + + volatile DATA **tmp; + + volatile char **BufValues; + + uint64_t start, end; + + flags = irq_nested_disable(); + my_rank = RCCE_ue(); + num_ranks = RCCE_num_ues(); + irq_nested_enable(flags); + +#ifdef _USE_GFX + kprintf("Laplace calls gfx_init\n"); + ret = gfx_init("192.168.4.254" /*&argc */ , "5000" /*&argv */ , my_rank); + kprintf("gfx_init: %d\n", ret); +#endif + + m = M; + J = 0; + + n = N / num_ranks; + + if (my_rank == num_ranks - 1) + n += N % num_ranks; + I = n * my_rank; + + kprintf("(%d) %d x %d / offsets: %d, %d / (%d x %d)\n", my_rank, N, M, I, J, n, m); + +#ifdef _USE_GFX + if (my_rank == 0) { + for (i = 0; i < 256; i++) { +//set color index, r, g, b + if (i < 64) + GFX_set_rgb(i, 0, i * 256 / 64, 255); + else if (i < 128) + GFX_set_rgb(i, 0, 255, 255 - ((i - 64) * 256 / 64)); + else if (i < 192) + GFX_set_rgb(i, (i - 128) * 256 / 64, 255, 0); + else if (i < 256) + GFX_set_rgb(i, 255, 255 - ((i - 192) * 256 / 64), 0); + } +// TODO: move draw area to center + //GFX_set_xy(150, 150); + GFX_set_hd(N, 0); + } +#endif + + NewValues = (volatile DATA **)kmalloc((N + 2) * sizeof(DATA *)); +#if USE_SVM + NewValues[0] = (DATA *) svmmalloc((N + 2) * (M + 2) * sizeof(DATA), MEMTYPE); +#else + NewValues[0] = (DATA *) kmalloc((N + 2) * (M + 2) * sizeof(DATA)); +#endif + + OldValues = (volatile DATA **)kmalloc((N + 2) * sizeof(DATA *)); +#if USE_SVM + OldValues[0] = (DATA *) svmmalloc((N + 2) * (M + 2) * sizeof(DATA), MEMTYPE); +#else + OldValues[0] = (DATA *) kmalloc((N + 2) * (M + 2) * sizeof(DATA)); +#endif + + for (i = 1; i < N + 2; i++) { + NewValues[i] = NewValues[i - 1] + (M + 2); + OldValues[i] = OldValues[i - 1] + (M + 2); + } + + BufValues = (volatile char **)kmalloc((N) * sizeof(char *)); + BufValues[0] = (char *)kmalloc((N) * (M) * sizeof(char)); + + for (i = 1; i < N; i++) { + BufValues[i] = BufValues[i - 1] + (M); + } + + RCCE_barrier(&RCCE_COMM_WORLD); + + kprintf("(%d) Memory allocated!\n", my_rank); + + //while (1) + { + int height = N + 2; + int width = M + 2; + + if (my_rank == 0) { + for (i = 0; i < N + 2; i++) { + for (j = 0; j < M + 2; j++) { + + double X = (((double)j / (double)width) * 5.0) - 2.5; + double Y = (((double)i / (double)height) * 5.0) - 2.5; + double Z = 0.0; + + Z = pow((4 - (X + 1) * (X + 1) - 4 * Y * Y), 2) + pow(1.2 * (1 - X), 3) - 10; + + if (Z < 0.0) + Z = 1.0; + else if (Z > 0.0) + Z = 0.0; + + NewValues[i][j] = (DATA) ((Z) * 255.0) * FIX; + + //if(NewValues[i][j] < 0) NewValues[i][j] = 0; + } + } + + for (i = 0; i < N + 2; i++) { + for (j = 0; j < M + 2; j++) { + OldValues[i][j] = NewValues[i][j]; + } + } + } + + svm_flush(); + svm_invalidate(); + RCCE_barrier(&RCCE_COMM_WORLD); + + kprintf("(%d) Arrays initialized!\n", my_rank); + + start = rdtsc(); + start = rdtsc(); + +// START ITERATIONS LOOP + for (t = 0; t < TMAX; t++) { + + //kprintf("(%d): o:%u n:%u \n",my_rank,(unsigned int)(OldValues[I+1][J+1]), (unsigned int)(NewValues[I+1][J+1]) ); + +// over all collumns + for (i = 1; i < n + 1; i++) { +// over all rows + for (j = 1; j < m + 1; j++) { +#if 1 + NewValues[I + i][J + j] = + (OldValues[I + i - 1][J + j] + + OldValues[I + i + 1][J + j] + + OldValues[I + i][J + j - 1] + + OldValues[I + i][J + j + 1]) / 4; + //if ( NewValues[I+i][J+j] < 0.0 ) NewValues[I+i][J+j] = 0.0 * FIX; + //else if ( NewValues[I+i][J+j] > 255.0 ) NewValues[I+i][J+j] = 255.0 * FIX; +#else + NewValues[I + i][J + j] = 25 * (DATA) (my_rank + 1); +#endif + } + } + + svm_flush(); + svm_invalidate(); + + tmp = NewValues; + NewValues = OldValues; + OldValues = tmp; + + RCCE_barrier(&RCCE_COMM_WORLD); + +#ifdef _USE_GFX + if ((my_rank == 0) && (t % 50 == 0)) { + int diff, res = 0; + + for (i = 1; i < N + 1; i++) { + for (j = 1; j < M + 1; j++) { + + diff = ABS(NewValues[i][j] - OldValues[i][j]); + if (diff > res) + res = diff; + + BufValues[i - 1][j - 1] = (unsigned char)(NewValues[i][j] / FIX); + //GFX_draw_pixel(150+j, 150+my_rank*n+i, (unsigned char)NewValues[i+1][j+1]); + //GFX_update(); + } + } + + kprintf("Graphic UPDATE! (t=%d) residual:%u \n", t, res); + GFX_draw_data((char *)(BufValues[0]), (N) * (M)); + GFX_update(); + } +#endif +// END ITERATIONS LOOP + } + + RCCE_barrier(&RCCE_COMM_WORLD); + + end = rdtsc(); + + kprintf("Calculation time: %llu ms (%llu ticks)\n", (end-start)/(1000ULL*get_cpu_frequency()), end-start); + + svm_statistics(); + } + + kprintf("(%d) Algorithm completed!\n", my_rank); + + // TODO: Freeing memory regions + +#ifdef _USE_GFX + gfx_finalize(); +#endif + + return 0; +} + +#endif From dcfbb44213d9a314f311fc13f3b1e32c11151a78 Mon Sep 17 00:00:00 2001 From: Stefan Lankes Date: Fri, 21 Oct 2011 00:10:57 -0700 Subject: [PATCH 09/17] use a benchmark friendly problem size --- apps/laplace.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/apps/laplace.c b/apps/laplace.c index f1d9988d..10fd3d76 100644 --- a/apps/laplace.c +++ b/apps/laplace.c @@ -44,8 +44,8 @@ #define ABS(a) (((a) < 0) ? -(a) : (a)) #define MAX(a,b) (((a) < (b)) ? (b) : (a)) -#define N 512 -#define M 512 +#define N 510 +#define M 510 #define TMAX 100*50 From 5687380ea0124f7cd2d37e8cf98c55728cbe19b0 Mon Sep 17 00:00:00 2001 From: Stefan Lankes Date: Fri, 21 Oct 2011 14:16:39 -0700 Subject: [PATCH 10/17] remove bug in the calculation of I --- apps/laplace.c | 16 +++++++++++----- apps/tests.c | 9 ++++++--- 2 files changed, 17 insertions(+), 8 deletions(-) diff --git a/apps/laplace.c b/apps/laplace.c index 10fd3d76..a892d61b 100644 --- a/apps/laplace.c +++ b/apps/laplace.c @@ -44,17 +44,17 @@ #define ABS(a) (((a) < 0) ? -(a) : (a)) #define MAX(a,b) (((a) < (b)) ? (b) : (a)) -#define N 510 +#define N 1022 #define M 510 -#define TMAX 100*50 +#define TMAX (100*50) //#define DATA unsigned int #define DATA double //#define FIX 1024 #define FIX 1 -#define USE_SVM 0 +#define USE_SVM 1 #define MEMTYPE SVM_STRONG //#define MEMTYPE SVM_LAZYRELEASE @@ -110,10 +110,10 @@ int laplace(void *arg) J = 0; n = N / num_ranks; + I = n * my_rank; if (my_rank == num_ranks - 1) n += N % num_ranks; - I = n * my_rank; kprintf("(%d) %d x %d / offsets: %d, %d / (%d x %d)\n", my_rank, N, M, I, J, n, m); @@ -199,8 +199,10 @@ int laplace(void *arg) } } +#if USE_SVM svm_flush(); svm_invalidate(); +#endif RCCE_barrier(&RCCE_COMM_WORLD); kprintf("(%d) Arrays initialized!\n", my_rank); @@ -230,14 +232,16 @@ int laplace(void *arg) #endif } } - +#if USE_SVM svm_flush(); svm_invalidate(); +#endif tmp = NewValues; NewValues = OldValues; OldValues = tmp; + //RCCE_TNS_barrier(&RCCE_COMM_WORLD); RCCE_barrier(&RCCE_COMM_WORLD); #ifdef _USE_GFX @@ -271,7 +275,9 @@ int laplace(void *arg) kprintf("Calculation time: %llu ms (%llu ticks)\n", (end-start)/(1000ULL*get_cpu_frequency()), end-start); +#if USE_SVM svm_statistics(); +#endif } kprintf("(%d) Algorithm completed!\n", my_rank); diff --git a/apps/tests.c b/apps/tests.c index e6bd093b..65f4f465 100644 --- a/apps/tests.c +++ b/apps/tests.c @@ -40,6 +40,8 @@ static sem_t consuming, producing; static mailbox_int32_t mbox; static int val = 0; +int laplace(void* arg); + static int consumer(void* arg) { int i, m = 0; @@ -310,16 +312,17 @@ int test_init(void) sem_init(&consuming, 0); mailbox_int32_init(&mbox); - create_kernel_task(NULL, foo, "Hello from foo1", NORMAL_PRIO); - create_kernel_task(NULL, join_test, NULL, NORMAL_PRIO); + //create_kernel_task(NULL, foo, "Hello from foo1", NORMAL_PRIO); + //create_kernel_task(NULL, join_test, NULL, NORMAL_PRIO); //create_kernel_task(NULL, producer, , NORMAL_PRIO); //create_kernel_task(NULL, consumer, NULL, NORMAL_PRIO); //create_kernel_task(NULL, mail_ping, NULL, NORMAL_PRIO); //create_kernel_task(NULL, mail_noise, NULL, NORMAL_PRIO); //create_kernel_task(NULL, svm_test, NULL, NORMAL_PRIO); //create_kernel_task(NULL, pi, NULL, NORMAL_PRIO); + create_kernel_task(NULL, laplace, NULL, NORMAL_PRIO); //create_user_task(NULL, "/bin/hello", argv); - create_user_task(NULL, "/bin/tests", argv); + //create_user_task(NULL, "/bin/tests", argv); //create_user_task(NULL, "/bin/jacobi", argv); //create_user_task(NULL, "/bin/mshell", argv); //create_user_task(NULL, "/bin/jacobi", argv); From 2db441bc176107ba2348c19a6361508a96a212c6 Mon Sep 17 00:00:00 2001 From: Stefan Lankes Date: Fri, 21 Oct 2011 14:17:54 -0700 Subject: [PATCH 11/17] add temporary workaround to distribute shared pages over all memory controllers --- arch/x86/mm/svm.c | 79 ++++++++++++++++++++++++++++++++++++---------- arch/x86/scc/icc.c | 12 +++---- 2 files changed, 69 insertions(+), 22 deletions(-) diff --git a/arch/x86/mm/svm.c b/arch/x86/mm/svm.c index a757bc2c..6a0449b2 100644 --- a/arch/x86/mm/svm.c +++ b/arch/x86/mm/svm.c @@ -49,7 +49,6 @@ static volatile uint8_t* page_owner = NULL; // helper array to convert a physical to a virtual address static size_t phys2virt[SHARED_PAGES] = {[0 ... SHARED_PAGES-1] = 0}; static size_t shmbegin = 0; -static int my_ue = 0; static uint32_t emit[RCCE_MAXNP] = {[0 ... RCCE_MAXNP-1] = 0}; static uint32_t request[RCCE_MAXNP] = {[0 ... RCCE_MAXNP-1] = 0}; static uint32_t forward[RCCE_MAXNP] = {[0 ... RCCE_MAXNP-1] = 0}; @@ -61,7 +60,6 @@ int svm_init(void) // iRCCE is not thread save => disable interrupts flags = irq_nested_disable(); - my_ue = RCCE_ue(); shmbegin = (size_t)RC_SHM_BUFFER_START(); phyaddr = (size_t) RCCE_shmalloc(OWNER_SIZE); irq_nested_enable(flags); @@ -84,7 +82,7 @@ int svm_init(void) } // per default is core 0 owner - if (!my_ue) + if (!RCCE_IAM) memset((void*)page_owner, 0x00, OWNER_SIZE); // iRCCE is not thread save => disable interrupts @@ -112,32 +110,29 @@ int svm_access_request(size_t addr) return -EINVAL; pageid = (phyaddr-shmbegin) >> PAGE_SHIFT; - //svm_flush(); - if (page_owner[pageid] == my_ue) + if (page_owner[pageid] == RCCE_IAM) return 0; remote_rank = page_owner[pageid]; - ((size_t*) payload)[0] = my_ue; + ((size_t*) payload)[0] = RCCE_IAM; ((size_t*) payload)[1] = phyaddr; - //kprintf("send access request to %d of 0x%x\n", remote_rank, phyaddr); /* send ping request */ iRCCE_mail_send(2*sizeof(size_t), SVM_REQUEST, 0, payload, remote_rank); request[remote_rank]++; - NOP8; icc_send_gic_irq(remote_rank); - /* check for incoming messages */ - icc_mail_check(); - - while (page_owner[pageid] != my_ue) { - check_workqueues(); + while (page_owner[pageid] != RCCE_IAM) { + icc_mail_check(); + NOP8; } return change_page_permissions(addr, addr+PAGE_SIZE, VMA_READ|VMA_WRITE|VMA_CACHEABLE); } +static atomic_int32_t size_counter = ATOMIC_INIT(0); + void* svmmalloc(size_t size, uint32_t consistency) { size_t phyaddr, viraddr, i; @@ -153,11 +148,61 @@ void* svmmalloc(size_t size, uint32_t consistency) // currently, we allocate memory in page size granulation size = (size + PAGE_SIZE - 1) & ~(PAGE_SIZE - 1); +#if 0 // Workaround for our MARC paper + // iRCCE is not thread save => disable interrupts + flags = irq_nested_disable(); + + kprintf("Entering shmmalloc: size 0x%x, owner_size 0x%x\n", size, OWNER_SIZE); + if (RCCE_IAM && (consistency & SVM_STRONG)) + map_flags |= MAP_NO_ACCESS; + + viraddr = vm_alloc(size >> PAGE_SHIFT, map_flags); + kprintf("vm_alloc returns 0x%x\n", viraddr); + + static uint32_t last = 0; + // get memory on MC0 + if (last) + phyaddr = last + size/4; + else + last = phyaddr = (size_t) RCCE_shmalloc(size/4); + map_region(viraddr, phyaddr, (size/4) >> PAGE_SHIFT, map_flags|MAP_REMAP); + for(i=0; i> PAGE_SHIFT] = viraddr + i; + kprintf("svmmalloc on MC0: phyaddr 0x%x, viraddr 0x%x, size 0x%x\n", phyaddr, viraddr, size); + + // get memory on MC1 + phyaddr = shmbegin + 0x1000000 + atomic_int32_read(&size_counter); + map_region(viraddr + size/4, phyaddr, (size/4) >> PAGE_SHIFT, map_flags|MAP_REMAP); + for(i=0; i> PAGE_SHIFT] = viraddr + size/4 + i; + kprintf("svmmalloc on MC1: phyaddr 0x%x, viraddr 0x%x, size 0x%x\n", phyaddr, viraddr+size/4, size); + + // get memory on MC2 + phyaddr = shmbegin + 0x2000000 + atomic_int32_read(&size_counter); + map_region(viraddr + 2 * size/4, phyaddr, (size/4) >> PAGE_SHIFT, map_flags|MAP_REMAP); + for(i=0; i> PAGE_SHIFT] = viraddr + 2 * size/4 + i; + kprintf("svmmalloc on MC2: phyaddr 0x%x, viraddr 0x%x, size 0x%x\n", phyaddr, viraddr+2*size/4, size); + + // get memory on MC3 + phyaddr = shmbegin + 0x3000000 + atomic_int32_read(&size_counter); + map_region(viraddr + 3 * size/4, phyaddr, (size/4) >> PAGE_SHIFT, map_flags|MAP_REMAP); + for(i=0; i> PAGE_SHIFT] = viraddr + 3 * size/4 + i; + kprintf("svmmalloc on MC3: phyaddr 0x%x, viraddr 0x%x, size 0x%x\n", phyaddr, viraddr+3*size/4, size); + + atomic_int32_add(&size_counter, size/4); + irq_nested_enable(flags); + + kprintf("shmmalloc returns 0x%x\n", viraddr); + + return (void*) viraddr; +#else // iRCCE is not thread save => disable interrupts flags = irq_nested_disable(); phyaddr = (size_t) RCCE_shmalloc(size); - if (RCCE_ue() && (consistency & SVM_STRONG)) + if (RCCE_IAM && (consistency & SVM_STRONG)) map_flags |= MAP_NO_ACCESS; irq_nested_enable(flags); @@ -175,6 +220,7 @@ void* svmmalloc(size_t size, uint32_t consistency) kprintf("svmmalloc: phyaddr 0x%x, viraddr 0x%x, size 0x%x\n", phyaddr, viraddr, size); return (void*) viraddr; +#endif } void svmfree(void* addr, size_t size) @@ -218,12 +264,12 @@ int svm_emit_page(size_t phyaddr, int ue) return -EINVAL; pageid = (phyaddr-shmbegin) >> PAGE_SHIFT; - if (page_owner[pageid] != my_ue) { + if (page_owner[pageid] != RCCE_IAM) { // Core is nor owner => forward request to new owner int remote_rank; uint8_t payload[iRCCE_MAIL_HEADER_PAYLOAD]; - kprintf("Ups, core %d is not owner of page 0x%x\n", my_ue, phyaddr); + kprintf("Ups, core %d is not owner of page 0x%x\n", RCCE_IAM, phyaddr); remote_rank = page_owner[pageid]; ((size_t*) payload)[0] = ue; @@ -258,6 +304,7 @@ void svm_flush(void) *(int *)RCCE_fool_write_combine_buffer = 1; flush_cache(); +#error Currently not supported #if 0 // try to flush L2 cache z = Z_PID(RC_COREID[my_ue]); diff --git a/arch/x86/scc/icc.c b/arch/x86/scc/icc.c index 81258250..97796658 100644 --- a/arch/x86/scc/icc.c +++ b/arch/x86/scc/icc.c @@ -161,7 +161,7 @@ static void icc_handler(struct state *s) /* empty mail queue */ while( iRCCE_mail_recv(&header) == iRCCE_SUCCESS ) { icc_mail_check_tag(header); - iRCCE_mail_release( &header ); + iRCCE_mail_release(&header); NOP8; NOP8; NOP8; @@ -331,7 +331,7 @@ int icc_mail_ping(void) /* leave function if not participating in pingpong */ if( (RCCE_IAM != CORE_A) && (RCCE_IAM != CORE_B) ) return -1; - kprintf( "my_ue = %d\n", RCCE_IAM); + kprintf( "my rank = %d\n", RCCE_IAM); kprintf( "Hello from mail_ping ... \n" ); kprintf( "rounds = %d\n", ROUNDS ); @@ -401,7 +401,7 @@ int icc_mail_ping_irq(void) int res; iRCCE_MAIL_HEADER* recv_header = NULL; - kprintf( "my_rank = %d\n", RCCE_IAM ); + kprintf( "my rank = %d\n", RCCE_IAM ); kprintf( "rem_rank = %d\n", CORE_B ); kprintf( "rounds = %d\n", ROUNDS ); @@ -503,9 +503,6 @@ void icc_mail_check(void) iRCCE_mail_check(iRCCE_MAILBOX_ALL); - /* enable interrupts */ - irq_nested_enable(flags); - /* empty mail queue */ while( iRCCE_mail_recv(&header) == iRCCE_SUCCESS ) { icc_mail_check_tag(header); @@ -514,6 +511,9 @@ void icc_mail_check(void) NOP8; NOP8; } + + /* enable interrupts */ + irq_nested_enable(flags); } #endif From 1f7f702ae37bf9ab415203af7f49de408e86d611 Mon Sep 17 00:00:00 2001 From: Stefan Lankes Date: Fri, 21 Oct 2011 14:19:05 -0700 Subject: [PATCH 12/17] cosmetic changes --- arch/x86/scc/iRCCE_mailbox.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/scc/iRCCE_mailbox.c b/arch/x86/scc/iRCCE_mailbox.c index b311970d..d63e6c4b 100644 --- a/arch/x86/scc/iRCCE_mailbox.c +++ b/arch/x86/scc/iRCCE_mailbox.c @@ -170,7 +170,7 @@ static int iRCCE_mail_fetch( iRCCE_MAIL_HEADER dummy_header = {0, 0, 0, NULL, 0, 0, 0, {[0 ... iRCCE_MAIL_HEADER_PAYLOAD-1] = 0} }; -static int iRCCE_mailbox_check() { +static int iRCCE_mailbox_check(void) { int i,j; uint32_t flags; From e54b0e132b0f1ec6529812f6852b8b61422cd88d Mon Sep 17 00:00:00 2001 From: Stefan Lankes Date: Fri, 21 Oct 2011 14:21:39 -0700 Subject: [PATCH 13/17] add test and set barrier --- arch/x86/include/asm/RCCE.h | 1 + arch/x86/scc/RCCE_admin.c | 34 ++++++++++++++++++++++++++++++++++ 2 files changed, 35 insertions(+) diff --git a/arch/x86/include/asm/RCCE.h b/arch/x86/include/asm/RCCE.h index 128ba664..d88e5723 100644 --- a/arch/x86/include/asm/RCCE.h +++ b/arch/x86/include/asm/RCCE.h @@ -190,6 +190,7 @@ int RCCE_comm_size(RCCE_COMM, int *); int RCCE_comm_rank(RCCE_COMM, int *); void RCCE_fence(void); int RCCE_barrier(RCCE_COMM *); +int RCCE_TNS_barrier(RCCE_COMM* comm); int RCCE_error_string(int, char *, int *); int RCCE_debug_set(int); int RCCE_debug_unset(int); diff --git a/arch/x86/scc/RCCE_admin.c b/arch/x86/scc/RCCE_admin.c index 78b26b21..09e70166 100644 --- a/arch/x86/scc/RCCE_admin.c +++ b/arch/x86/scc/RCCE_admin.c @@ -48,6 +48,8 @@ // En-/ or disable debug prints... #define DEBUG 0 +#define Test_and_Set(a) ((*(virtual_lockaddress[a])) & 0x01) + //...................................................................................... // GLOBAL VARIABLES USED BY THE LIBRARY //...................................................................................... @@ -105,6 +107,38 @@ void RC_cache_invalidate() { return; } +int RCCE_TNS_barrier(RCCE_COMM* comm) { + +// two roundtrips to realize a barrier using a T&S Register for each core. + +// 1. search first free T&S Register to spin +// 2. last waiter wakes up first waiter and continues local wait +// 3. first waiter wakes up second waiter by releasing its lock ... +// At least every used T&S Register is 0 and no UE can overtake a barrier. + + int num = comm->size; + int step = 0; + //fprintf(stderr,"%d:\t enter barrier \n",id); + + while( !Test_and_Set(step) ) ++step; + // only one UE runs until T&S # num-1 + + //fprintf(stderr,"%d:\t step %d\n",id,step); + + if(step == num-1) { + //fprintf(stderr,"%d:\t I am the last one\n",id); + *(virtual_lockaddress[0]) = 0x0; + while(!Test_and_Set(step)) ; + *(virtual_lockaddress[step]) = 0x0; + } else { + while(!Test_and_Set(step)) ; + *(virtual_lockaddress[step]) = 0x0; + *(virtual_lockaddress[step+1]) = 0x0; + } + //fprintf(stderr,"released barrier! step: %d\n", step); + return RCCE_SUCCESS; +} + //-------------------------------------------------------------------------------------- // FUNCTION: RC_COMM_BUFFER_SIZE //-------------------------------------------------------------------------------------- From 97a648020a9dc39ab05016978042df01e889ff6d Mon Sep 17 00:00:00 2001 From: Stefan Lankes Date: Sat, 22 Oct 2011 06:13:16 -0700 Subject: [PATCH 14/17] add prefetching for the sequentiel laplace --- apps/laplace.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/apps/laplace.c b/apps/laplace.c index a892d61b..bd672299 100644 --- a/apps/laplace.c +++ b/apps/laplace.c @@ -219,6 +219,12 @@ int laplace(void *arg) for (i = 1; i < n + 1; i++) { // over all rows for (j = 1; j < m + 1; j++) { +#if !USE_SVM + if (j % CACHE_LINE == 1) { + asm volatile ("movl %0, %%eax" :: "r"(&(NewValues[I + i][J + j])) : "%eax"); + } +#endif + #if 1 NewValues[I + i][J + j] = (OldValues[I + i - 1][J + j] + @@ -241,8 +247,8 @@ int laplace(void *arg) NewValues = OldValues; OldValues = tmp; - //RCCE_TNS_barrier(&RCCE_COMM_WORLD); - RCCE_barrier(&RCCE_COMM_WORLD); + RCCE_TNS_barrier(&RCCE_COMM_WORLD); + //RCCE_barrier(&RCCE_COMM_WORLD); #ifdef _USE_GFX if ((my_rank == 0) && (t % 50 == 0)) { From a36cc3f3a9b93f98bd57eb50c48f65f2ff94a302 Mon Sep 17 00:00:00 2001 From: Stefan Lankes Date: Mon, 24 Oct 2011 01:24:37 -0700 Subject: [PATCH 15/17] redesign of the init code => now, all cores initialize the arrays --- apps/laplace.c | 50 +++++++++++++++++++++---------------------- apps/scc_bootinfo.asm | 0 2 files changed, 24 insertions(+), 26 deletions(-) create mode 100644 apps/scc_bootinfo.asm diff --git a/apps/laplace.c b/apps/laplace.c index bd672299..f017f89b 100644 --- a/apps/laplace.c +++ b/apps/laplace.c @@ -54,9 +54,12 @@ //#define FIX 1024 #define FIX 1 -#define USE_SVM 1 -#define MEMTYPE SVM_STRONG -//#define MEMTYPE SVM_LAZYRELEASE +#define USE_STRONG 1 +#define USE_LAZYRELEASE 0 + +#if USE_STRONG && USE_LAZYRELEASE +#error Please, use only one memory model +#endif static inline double pow(double a, int b) { @@ -137,15 +140,19 @@ int laplace(void *arg) #endif NewValues = (volatile DATA **)kmalloc((N + 2) * sizeof(DATA *)); -#if USE_SVM - NewValues[0] = (DATA *) svmmalloc((N + 2) * (M + 2) * sizeof(DATA), MEMTYPE); +#if USE_STRONG + NewValues[0] = (DATA *) svmmalloc((N + 2) * (M + 2) * sizeof(DATA), SVM_STRONG); +#elif USE_LATYRELEASE + NewValues[0] = (DATA *) svmmalloc((N + 2) * (M + 2) * sizeof(DATA), SVM_LAZYRELEASE); #else NewValues[0] = (DATA *) kmalloc((N + 2) * (M + 2) * sizeof(DATA)); #endif OldValues = (volatile DATA **)kmalloc((N + 2) * sizeof(DATA *)); -#if USE_SVM - OldValues[0] = (DATA *) svmmalloc((N + 2) * (M + 2) * sizeof(DATA), MEMTYPE); +#if USE_STRONG + OldValues[0] = (DATA *) svmmalloc((N + 2) * (M + 2) * sizeof(DATA), SVM_STRONG); +#elif USE_LATYRELEASE + OldValues[0] = (DATA *) svmmalloc((N + 2) * (M + 2) * sizeof(DATA), SVM_LAZYRELEASE); #else OldValues[0] = (DATA *) kmalloc((N + 2) * (M + 2) * sizeof(DATA)); #endif @@ -171,8 +178,11 @@ int laplace(void *arg) int height = N + 2; int width = M + 2; - if (my_rank == 0) { + /*if (my_rank == 0) { for (i = 0; i < N + 2; i++) { + for (j = 0; j < M + 2; j++) {*/ + { + for (i = I; i < I + n + 2; i++) { for (j = 0; j < M + 2; j++) { double X = (((double)j / (double)width) * 5.0) - 2.5; @@ -186,20 +196,14 @@ int laplace(void *arg) else if (Z > 0.0) Z = 0.0; - NewValues[i][j] = (DATA) ((Z) * 255.0) * FIX; + OldValues[i][j] = NewValues[i][j] = (DATA) ((Z) * 255.0) * FIX; //if(NewValues[i][j] < 0) NewValues[i][j] = 0; } } - - for (i = 0; i < N + 2; i++) { - for (j = 0; j < M + 2; j++) { - OldValues[i][j] = NewValues[i][j]; - } - } } -#if USE_SVM +#if USE_LAZYRELEASE svm_flush(); svm_invalidate(); #endif @@ -219,12 +223,6 @@ int laplace(void *arg) for (i = 1; i < n + 1; i++) { // over all rows for (j = 1; j < m + 1; j++) { -#if !USE_SVM - if (j % CACHE_LINE == 1) { - asm volatile ("movl %0, %%eax" :: "r"(&(NewValues[I + i][J + j])) : "%eax"); - } -#endif - #if 1 NewValues[I + i][J + j] = (OldValues[I + i - 1][J + j] + @@ -238,7 +236,7 @@ int laplace(void *arg) #endif } } -#if USE_SVM +#if USE_LAZYRELEASE svm_flush(); svm_invalidate(); #endif @@ -247,8 +245,8 @@ int laplace(void *arg) NewValues = OldValues; OldValues = tmp; - RCCE_TNS_barrier(&RCCE_COMM_WORLD); - //RCCE_barrier(&RCCE_COMM_WORLD); + //RCCE_TNS_barrier(&RCCE_COMM_WORLD); + RCCE_barrier(&RCCE_COMM_WORLD); #ifdef _USE_GFX if ((my_rank == 0) && (t % 50 == 0)) { @@ -281,7 +279,7 @@ int laplace(void *arg) kprintf("Calculation time: %llu ms (%llu ticks)\n", (end-start)/(1000ULL*get_cpu_frequency()), end-start); -#if USE_SVM +#if USE_STRONG || USE_LAZYRELEASE svm_statistics(); #endif } diff --git a/apps/scc_bootinfo.asm b/apps/scc_bootinfo.asm new file mode 100644 index 00000000..e69de29b From 63b9e2e177e9efedd89748f00f19dc6cd58a56fc Mon Sep 17 00:00:00 2001 From: Stefan Lankes Date: Mon, 24 Oct 2011 16:20:29 +0200 Subject: [PATCH 16/17] switch back to default test cases --- apps/tests.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/apps/tests.c b/apps/tests.c index 65f4f465..d6b8a5eb 100644 --- a/apps/tests.c +++ b/apps/tests.c @@ -312,17 +312,17 @@ int test_init(void) sem_init(&consuming, 0); mailbox_int32_init(&mbox); - //create_kernel_task(NULL, foo, "Hello from foo1", NORMAL_PRIO); - //create_kernel_task(NULL, join_test, NULL, NORMAL_PRIO); + create_kernel_task(NULL, foo, "Hello from foo1", NORMAL_PRIO); + create_kernel_task(NULL, join_test, NULL, NORMAL_PRIO); //create_kernel_task(NULL, producer, , NORMAL_PRIO); //create_kernel_task(NULL, consumer, NULL, NORMAL_PRIO); //create_kernel_task(NULL, mail_ping, NULL, NORMAL_PRIO); //create_kernel_task(NULL, mail_noise, NULL, NORMAL_PRIO); //create_kernel_task(NULL, svm_test, NULL, NORMAL_PRIO); //create_kernel_task(NULL, pi, NULL, NORMAL_PRIO); - create_kernel_task(NULL, laplace, NULL, NORMAL_PRIO); + //create_kernel_task(NULL, laplace, NULL, NORMAL_PRIO); //create_user_task(NULL, "/bin/hello", argv); - //create_user_task(NULL, "/bin/tests", argv); + create_user_task(NULL, "/bin/tests", argv); //create_user_task(NULL, "/bin/jacobi", argv); //create_user_task(NULL, "/bin/mshell", argv); //create_user_task(NULL, "/bin/jacobi", argv); From 610f8177d805c807b924ae253d68b576a8d0d9b2 Mon Sep 17 00:00:00 2001 From: Stefan Lankes Date: Mon, 24 Oct 2011 16:23:25 +0200 Subject: [PATCH 17/17] remove compiling problems on non-SCC plattforms --- apps/netio.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/apps/netio.c b/apps/netio.c index 088fe6b0..9a6c374c 100644 --- a/apps/netio.c +++ b/apps/netio.c @@ -47,20 +47,24 @@ /* See http://www.nwlab.net/art/netio/netio.html to get the netio tool */ #ifdef CONFIG_LWIP +#ifdef CONFIG_ROCKCREEK #if USE_SOCKET_BYPASSING // for socket bypassing #include #undef LWIP_COMPAT_SOCKETS #endif +#endif #include #include #include +#ifdef CONFIG_ROCKCREEK #if USE_SOCKET_BYPASSING // for socket bypassing #include #undef AF_INET #define AF_INET AF_MMNIF_NET #endif +#endif typedef struct {