From f556608010601739a603c9d55ed3f666fa7f2e09 Mon Sep 17 00:00:00 2001 From: Stefan Lankes Date: Tue, 19 Apr 2011 07:17:07 -0700 Subject: [PATCH 01/13] - add iRCCE code --- arch/x86/include/asm/iRCCE.h | 154 ++++++++++++ arch/x86/include/asm/iRCCE_lib.h | 39 +++ arch/x86/scc/Makefile | 2 +- arch/x86/scc/iRCCE_admin.c | 67 ++++++ arch/x86/scc/iRCCE_get.c | 85 +++++++ arch/x86/scc/iRCCE_irecv.c | 393 +++++++++++++++++++++++++++++++ arch/x86/scc/iRCCE_isend.c | 355 ++++++++++++++++++++++++++++ arch/x86/scc/iRCCE_put.c | 87 +++++++ arch/x86/scc/iRCCE_recv.c | 190 +++++++++++++++ arch/x86/scc/iRCCE_send.c | 165 +++++++++++++ arch/x86/scc/iRCCE_synch.c | 127 ++++++++++ 11 files changed, 1663 insertions(+), 1 deletion(-) create mode 100644 arch/x86/include/asm/iRCCE.h create mode 100644 arch/x86/include/asm/iRCCE_lib.h create mode 100644 arch/x86/scc/iRCCE_admin.c create mode 100644 arch/x86/scc/iRCCE_get.c create mode 100644 arch/x86/scc/iRCCE_irecv.c create mode 100644 arch/x86/scc/iRCCE_isend.c create mode 100644 arch/x86/scc/iRCCE_put.c create mode 100644 arch/x86/scc/iRCCE_recv.c create mode 100644 arch/x86/scc/iRCCE_send.c create mode 100644 arch/x86/scc/iRCCE_synch.c diff --git a/arch/x86/include/asm/iRCCE.h b/arch/x86/include/asm/iRCCE.h new file mode 100644 index 00000000..8b878bfd --- /dev/null +++ b/arch/x86/include/asm/iRCCE.h @@ -0,0 +1,154 @@ +// +// Copyright 2010 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// [2010-10-25] added support for non-blocking send/recv operations +// - iRCCE_isend(), ..._test(), ..._wait(), ..._push() +// - iRCCE_irecv(), ..._test(), ..._wait(), ..._push() +// by Carsten Clauss, Chair for Operating Systems, +// RWTH Aachen University +// +// [2010-11-12] extracted non-blocking code into separate library +// by Carsten Scholtes, University of Bayreuth +// +// [2010-12-09] added functions for a convenient handling of multiple +// pending non-blocking requests +// by Jacek Galowicz, Chair for Operating Systems +// RWTH Aachen University +// +#ifndef IRCCE_H +#define IRCCE_H + +#include + +#define iRCCE_SUCCESS RCCE_SUCCESS +#define iRCCE_PENDING -1 +#define iRCCE_RESERVED -2 +#define iRCCE_NOT_ENQUEUED -3 + +typedef struct _iRCCE_SEND_REQUEST { + char *privbuf; // source buffer in local private memory (send buffer) + t_vcharp combuf; // intermediate buffer in MPB + size_t chunk; // size of MPB available for this message (bytes) + RCCE_FLAG *ready; // flag indicating whether receiver is ready + RCCE_FLAG *sent; // flag indicating whether message has been sent by source + size_t size; // size of message (bytes) + int dest; // UE that will receive the message + + size_t wsize; // offset within send buffer when putting in "chunk" bytes + size_t remainder; // bytes remaining to be sent + size_t nbytes; // number of bytes to be sent in single RCCE_put call + char *bufptr; // running pointer inside privbuf for current location + + int label; // jump/goto label for the reentrance of the respective poll function + int finished; // flag that indicates whether the request has already been finished + + struct _iRCCE_SEND_REQUEST *next; +} iRCCE_SEND_REQUEST; + + +typedef struct _iRCCE_RECV_REQUEST { + char *privbuf; // source buffer in local private memory (send buffer) + t_vcharp combuf; // intermediate buffer in MPB + size_t chunk; // size of MPB available for this message (bytes) + RCCE_FLAG *ready; // flag indicating whether receiver is ready + RCCE_FLAG *sent; // flag indicating whether message has been sent by source + size_t size; // size of message (bytes) + int source; // UE that will send the message + + size_t wsize; // offset within send buffer when putting in "chunk" bytes + size_t remainder; // bytes remaining to be sent + size_t nbytes; // number of bytes to be sent in single RCCE_put call + char *bufptr; // running pointer inside privbuf for current location + + int label; // jump/goto label for the reentrance of the respective poll function + int finished; // flag that indicates whether the request has already been finished + int started; // flag that indicates whether message parts have already been received + + struct _iRCCE_RECV_REQUEST *next; +} iRCCE_RECV_REQUEST; + +#define iRCCE_WAIT_LIST_RECV_TYPE 0 +#define iRCCE_WAIT_LIST_SEND_TYPE 1 + +typedef struct _iRCCE_WAIT_LISTELEM { + int type; + struct _iRCCE_WAIT_LISTELEM * next; + void * req; +} iRCCE_WAIT_LISTELEM; + +typedef struct _iRCCE_WAIT_LIST { + iRCCE_WAIT_LISTELEM * first; + iRCCE_WAIT_LISTELEM * last; +} iRCCE_WAIT_LIST; + + +/////////////////////////////////////////////////////////////// +// +// THE iRCCE API: +// +// Initialize function: +int iRCCE_init(void); +// +// Non-blocking send/recv functions: +int iRCCE_isend(char *, size_t, int, iRCCE_SEND_REQUEST *); +int iRCCE_isend_test(iRCCE_SEND_REQUEST *, int *); +int iRCCE_isend_wait(iRCCE_SEND_REQUEST *); +int iRCCE_isend_push(void); +int iRCCE_irecv(char *, size_t, int, iRCCE_RECV_REQUEST *); +int iRCCE_irecv_test(iRCCE_RECV_REQUEST *, int *); +int iRCCE_irecv_wait(iRCCE_RECV_REQUEST *); +int iRCCE_irecv_push(void); +// +// Blocking but pipelined send/recv functions: +int iRCCE_send(char *, size_t, int); +int iRCCE_recv(char *, size_t, int); +// +// SCC-customized put/get and memcpy functions: +int iRCCE_put(t_vcharp, t_vcharp, int, int); +int iRCCE_get(t_vcharp, t_vcharp, int, int); +void* iRCCE_memcpy_put(void*, const void*, size_t); +void* iRCCE_memcpy_get(void*, const void*, size_t); +// +// Wait/test-all/any functions: +void iRCCE_init_wait_list(iRCCE_WAIT_LIST*); +void iRCCE_add_to_wait_list(iRCCE_WAIT_LIST*, iRCCE_SEND_REQUEST *, iRCCE_RECV_REQUEST *); +int iRCCE_test_all(iRCCE_WAIT_LIST*, int *); +int iRCCE_wait_all(iRCCE_WAIT_LIST*); +int iRCCE_test_any(iRCCE_WAIT_LIST*, iRCCE_SEND_REQUEST **, iRCCE_RECV_REQUEST **); +int iRCCE_wait_any(iRCCE_WAIT_LIST*, iRCCE_SEND_REQUEST **, iRCCE_RECV_REQUEST **); +// +// Cancel functions for yet not started non-blocking requests: +int iRCCE_isend_cancel(iRCCE_SEND_REQUEST *, int *); +int iRCCE_irecv_cancel(iRCCE_RECV_REQUEST *, int *); +// +/////////////////////////////////////////////////////////////// +// +// Just for for convenience: +#if 1 +#define RCCE_isend iRCCE_isend +#define RCCE_isend_test iRCCE_isend_test +#define RCCE_isend_wait iRCCE_isend_wait +#define RCCE_isend_push iRCCE_isend_push +#define RCCE_irecv iRCCE_irecv +#define RCCE_irecv_test iRCCE_irecv_test +#define RCCE_irecv_wait iRCCE_irecv_wait +#define RCCE_irecv_push iRCCE_irecv_push +#define RCCE_SEND_REQUEST iRCCE_SEND_REQUEST +#define RCCE_RECV_REQUEST iRCCE_RECV_REQUEST +#endif +/////////////////////////////////////////////////////////////// + +#endif + diff --git a/arch/x86/include/asm/iRCCE_lib.h b/arch/x86/include/asm/iRCCE_lib.h new file mode 100644 index 00000000..0d8b4e16 --- /dev/null +++ b/arch/x86/include/asm/iRCCE_lib.h @@ -0,0 +1,39 @@ +// +// Copyright 2010 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// [2010-10-25] added support for non-blocking send/recv operations +// - iRCCE_isend(), ..._test(), ..._wait(), ..._push() +// - iRCCE_irecv(), ..._test(), ..._wait(), ..._push() +// by Carsten Clauss, Chair for Operating Systems, +// RWTH Aachen University +// +// [2010-11-12] extracted non-blocking code into separate library +// by Carsten Scholtes +// +#ifndef IRCCE_LIB_H +#define IRCCE_LIB_H +#include +#include + +extern iRCCE_SEND_REQUEST* iRCCE_isend_queue; +extern iRCCE_RECV_REQUEST* iRCCE_irecv_queue[RCCE_MAXNP]; +#ifdef _OPENMP +#pragma omp threadprivate (iRCCE_isend_queue, iRCCE_irecv_queue) +#endif + +int iRCCE_test_flag(RCCE_FLAG, RCCE_FLAG_STATUS, int *); + +#endif + diff --git a/arch/x86/scc/Makefile b/arch/x86/scc/Makefile index d4866f2b..f32bc11f 100644 --- a/arch/x86/scc/Makefile +++ b/arch/x86/scc/Makefile @@ -1,4 +1,4 @@ -C_source := scc_init.c SCC_API.c RCCE_malloc.c RCCE_shmalloc.c RCCE_debug.c RCCE_qsort.c RCCE_DCMflush.c RCCE_send.c RCCE_recv.c RCCE_flags.c RCCE_comm.c RCCE_put.c RCCE_get.c RCCE_synch.c RCCE_bcast.c RCCE_admin.c # RCCE_power_management.c +C_source := scc_init.c SCC_API.c iRCCE_admin.c iRCCE_send.c iRCCE_isend.c iRCCE_irecv.c iRCCE_recv.c iRCCE_get.c iRCCE_put.c iRCCE_synch.c RCCE_malloc.c RCCE_shmalloc.c RCCE_debug.c RCCE_qsort.c RCCE_DCMflush.c RCCE_send.c RCCE_recv.c RCCE_flags.c RCCE_comm.c RCCE_put.c RCCE_get.c RCCE_synch.c RCCE_bcast.c RCCE_admin.c # RCCE_power_management.c ASM_source := MODULE := arch_x86_scc diff --git a/arch/x86/scc/iRCCE_admin.c b/arch/x86/scc/iRCCE_admin.c new file mode 100644 index 00000000..c61d66b9 --- /dev/null +++ b/arch/x86/scc/iRCCE_admin.c @@ -0,0 +1,67 @@ +//*************************************************************************************** +// Administrative routines. +//*************************************************************************************** +// +// Author: Rob F. Van der Wijngaart +// Intel Corporation +// Date: 008/30/2010 +// +//*************************************************************************************** +// +// +// Copyright 2010 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// [2010-10-25] added support for non-blocking send/recv operations +// - iRCCE_isend(), ..._test(), ..._wait(), ..._push() +// - iRCCE_irecv(), ..._test(), ..._wait(), ..._push() +// by Carsten Clauss, Chair for Operating Systems, +// RWTH Aachen University +// +// [2010-11-12] extracted non-blocking code into separate library +// by Carsten Scholtes +// +// [2011-02-21] added support for multiple incoming queues +// (one recv queue per remote rank) +// + +#include + +#ifdef CONFIG_ROCKCREEK + +#include + +// send request queue +iRCCE_SEND_REQUEST* iRCCE_isend_queue; +// recv request queue +iRCCE_RECV_REQUEST* iRCCE_irecv_queue[RCCE_MAXNP]; + +//-------------------------------------------------------------------------------------- +// FUNCTION: iRCCE_init +//-------------------------------------------------------------------------------------- +// initialize the library +//-------------------------------------------------------------------------------------- +int iRCCE_init(void) { + int i; + + for(i=0; i +#include + +#ifdef CONFIG_ROCKCREEK + +#include + +#ifdef COPPERRIDGE +#include "scc_memcpy.h" +#endif + +void* iRCCE_memcpy_get(void *dest, const void *src, size_t count) +{ +#ifdef COPPERRIDGE + return memcpy_from_mpb(dest, src, count); +#else + return memcpy(dest, src, count); +#endif +} + +//-------------------------------------------------------------------------------------- +// FUNCTION: iRCCE_get +//-------------------------------------------------------------------------------------- +// copy data from address "source" in the remote MPB to address "target" in either the +// local MPB, or in the calling UE's private memory. We do not test to see if a move +// into the calling UE's private memory stays within allocated memory * +//-------------------------------------------------------------------------------------- +int iRCCE_get( + t_vcharp target, // target buffer, MPB or private memory + t_vcharp source, // source buffer, MPB + int num_bytes, // number of bytes to copy (must be multiple of cache line size + int ID // rank of source UE + ) { + + // in non-GORY mode we only need to retain the MPB source shift; we + // already know the source is in the MPB, not private memory + source = RCCE_comm_buffer[ID]+(source-RCCE_comm_buffer[RCCE_IAM]); + + // do the actual copy, making sure we copy fresh data +#ifdef _OPENMP + #pragma omp flush +#endif + RC_cache_invalidate(); + + iRCCE_memcpy_get((void *)target, (void *)source, num_bytes); + + // flush data to make sure it is visible to all threads; cannot use a flush list + // because it concerns malloced space +#ifdef _OPENMP + #pragma omp flush +#endif + return(iRCCE_SUCCESS); +} + +#endif diff --git a/arch/x86/scc/iRCCE_irecv.c b/arch/x86/scc/iRCCE_irecv.c new file mode 100644 index 00000000..e7d5ad1d --- /dev/null +++ b/arch/x86/scc/iRCCE_irecv.c @@ -0,0 +1,393 @@ +//*************************************************************************************** +// Synchronized receive routines. +//*************************************************************************************** +// +// Author: Rob F. Van der Wijngaart +// Intel Corporation +// Date: 008/30/2010 +// +//*************************************************************************************** +// +// Copyright 2010 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// [2010-10-25] added support for non-blocking send/recv operations +// - iRCCE_isend(), ..._test(), ..._wait(), ..._push() +// - iRCCE_irecv(), ..._test(), ..._wait(), ..._push() +// by Carsten Clauss, Chair for Operating Systems, +// RWTH Aachen University +// +// [2010-11-12] extracted non-blocking code into separate library +// by Carsten Scholtes +// +// [2010-12-09] added cancel functions for non-blocking send/recv requests +// by Carsten Clauss +// +// [2011-02-21] added support for multiple incoming queues +// (one recv queue per remote rank) +// + +#include +#include + +#ifdef CONFIG_ROCKCREEK + +#include + +static int iRCCE_push_recv_request(iRCCE_RECV_REQUEST *request) { + + char padline[RCCE_LINE_SIZE]; // copy buffer, used if message not multiple of line size + int test; // flag for calling iRCCE_test_flag() + + if(request->finished) return(iRCCE_SUCCESS); + + if(request->label == 1) goto label1; + if(request->label == 2) goto label2; + if(request->label == 3) goto label3; + + // receive data in units of available chunk size of MPB + for (; request->wsize < (request->size / request->chunk) * request->chunk; request->wsize += request->chunk) { + request->bufptr = request->privbuf + request->wsize; + request->nbytes = request->chunk; +label1: + iRCCE_test_flag(*(request->sent), RCCE_FLAG_SET, &test); + if(!test) { + request->label = 1; + return(iRCCE_PENDING); + } + request->started = 1; + + RCCE_flag_write(request->sent, RCCE_FLAG_UNSET, RCCE_IAM); + // copy data from source's MPB space to private memory + iRCCE_get((t_vcharp)request->bufptr, request->combuf, request->nbytes, request->source); + + // tell the source I have moved data out of its comm buffer + RCCE_flag_write(request->ready, RCCE_FLAG_SET, request->source); + } + + request->remainder = request->size % request->chunk; + // if nothing is left over, we are done + if (!request->remainder) { + request->finished = 1; + return(iRCCE_SUCCESS); + } + + // receive remainder of data--whole cache lines + request->bufptr = request->privbuf + (request->size / request->chunk) * request->chunk; + request->nbytes = request->remainder - request->remainder % RCCE_LINE_SIZE; + if (request->nbytes) { +label2: + iRCCE_test_flag(*(request->sent), RCCE_FLAG_SET, &test); + if(!test) { + request->label = 2; + return(iRCCE_PENDING); + } + request->started = 1; + + RCCE_flag_write(request->sent, RCCE_FLAG_UNSET, RCCE_IAM); + // copy data from source's MPB space to private memory + iRCCE_get((t_vcharp)request->bufptr, request->combuf, request->nbytes, request->source); + + // tell the source I have moved data out of its comm buffer + RCCE_flag_write(request->ready, RCCE_FLAG_SET, request->source); + } + + request->remainder = request->size % request->chunk; + request->remainder = request->remainder % RCCE_LINE_SIZE; + if (!request->remainder) { + request->finished = 1; + return(iRCCE_SUCCESS); + } + + // remainder is less than cache line. This must be copied into appropriately sized + // intermediate space before exact number of bytes get copied to the final destination + request->bufptr = request->privbuf + (request->size / request->chunk) * request->chunk + request->nbytes; + request->nbytes = RCCE_LINE_SIZE; +label3: + iRCCE_test_flag(*(request->sent), RCCE_FLAG_SET, &test); + if(!test) { + request->label = 3; + return(iRCCE_PENDING); + } + request->started = 1; + + RCCE_flag_write(request->sent, RCCE_FLAG_UNSET, RCCE_IAM); + // copy data from source's MPB space to private memory + iRCCE_get((t_vcharp)padline, request->combuf, request->nbytes, request->source); + memcpy(request->bufptr,padline,request->remainder); + + // tell the source I have moved data out of its comm buffer + RCCE_flag_write(request->ready, RCCE_FLAG_SET, request->source); + + request->finished = 1; + return(iRCCE_SUCCESS); +} + +static void iRCCE_init_recv_request( + char *privbuf, // source buffer in local private memory (send buffer) + t_vcharp combuf, // intermediate buffer in MPB + size_t chunk, // size of MPB available for this message (bytes) + RCCE_FLAG *ready, // flag indicating whether receiver is ready + RCCE_FLAG *sent, // flag indicating whether message has been sent by source + size_t size, // size of message (bytes) + int source, // UE that will send the message + iRCCE_RECV_REQUEST *request + ) { + + request->privbuf = privbuf; + request->combuf = combuf; + request->chunk = chunk; + request->ready = ready; + request->sent = sent; + request->size = size; + request->source = source; + + request->wsize = 0; + request->remainder = 0; + request->nbytes = 0; + request->bufptr = NULL; + + request->label = 0; + request->finished = 0; + request->started = 0; + + request->next = NULL; + + return; +} + + +//-------------------------------------------------------------------------------------- +// FUNCTION: iRCCE_irecv +//-------------------------------------------------------------------------------------- +// non-blocking recv function; returns an handle of type iRCCE_RECV_REQUEST +//-------------------------------------------------------------------------------------- +static iRCCE_RECV_REQUEST blocking_irecv_request; +int iRCCE_irecv(char *privbuf, size_t size, int source, iRCCE_RECV_REQUEST *request) { + + if(request == NULL) request = &blocking_irecv_request; + + if (source<0 || source >= RCCE_NP) + return(RCCE_error_return(RCCE_debug_comm,RCCE_ERROR_ID)); + else { + iRCCE_init_recv_request(privbuf, RCCE_buff_ptr, RCCE_chunk, + &RCCE_ready_flag[RCCE_IAM], &RCCE_sent_flag[source], + size, source, request); + + if(iRCCE_irecv_queue[source] == NULL) { + + if(iRCCE_push_recv_request(request) == iRCCE_SUCCESS) { + return(iRCCE_SUCCESS); + } + else { + iRCCE_irecv_queue[source] = request; + + if(request == &blocking_irecv_request) { + iRCCE_irecv_wait(request); + return(iRCCE_SUCCESS); + } + + return(iRCCE_PENDING); + } + } + else { + if(iRCCE_irecv_queue[source]->next == NULL) { + iRCCE_irecv_queue[source]->next = request; + } + else { + iRCCE_RECV_REQUEST *run = iRCCE_irecv_queue[source]; + while(run->next != NULL) run = run->next; + run->next = request; + } + + if(request == &blocking_irecv_request) { + iRCCE_irecv_wait(request); + return(iRCCE_SUCCESS); + } + + return(iRCCE_RESERVED); + } + } +} + +//-------------------------------------------------------------------------------------- +// FUNCTION: iRCCE_irecv_test +//-------------------------------------------------------------------------------------- +// test function for completion of the requestes non-blocking recv operation +// Just provide NULL instead of the testvar if you don't need it +//-------------------------------------------------------------------------------------- +int iRCCE_irecv_test(iRCCE_RECV_REQUEST *request, int *test) { + + int source; + + if(request == NULL) { + + if(iRCCE_irecv_push() == iRCCE_SUCCESS) { + if (test) (*test) = 1; + return(iRCCE_SUCCESS); + } + else { + if (test) (*test) = 0; + return(iRCCE_PENDING); + } + } + + source = request->source; + + if(request->finished) { + if (test) (*test) = 1; + return(iRCCE_SUCCESS); + } + + if(iRCCE_irecv_queue[source] != request) { + if (test) (*test) = 0; + return(iRCCE_RESERVED); + } + + iRCCE_push_recv_request(request); + + if(request->finished) { + iRCCE_irecv_queue[source] = request->next; + + if (test) (*test) = 1; + return(iRCCE_SUCCESS); + } + + if (test) (*test) = 0; + return(iRCCE_PENDING); +} + + +//-------------------------------------------------------------------------------------- +// FUNCTION: iRCCE_irecv_push +//-------------------------------------------------------------------------------------- +// progress function for pending requests in the irecv queue +//-------------------------------------------------------------------------------------- +static int iRCCE_irecv_push_source(int source) { + + iRCCE_RECV_REQUEST *request = iRCCE_irecv_queue[source]; + + if(request == NULL) { + return(iRCCE_SUCCESS); + } + + if(request->finished) { + return(iRCCE_SUCCESS); + } + + iRCCE_push_recv_request(request); + + if(request->finished) { + iRCCE_irecv_queue[source] = request->next; + return(iRCCE_SUCCESS); + } + + return(iRCCE_PENDING); +} + +int iRCCE_irecv_push(void) { + + int i, j; + int retval = iRCCE_SUCCESS; + + for(i=0; ifinished) { + iRCCE_irecv_push(); + iRCCE_isend_push(); + } + } + else { + do { + iRCCE_isend_push(); + } + while( iRCCE_irecv_push() != iRCCE_SUCCESS ); + } + + return(iRCCE_SUCCESS); +} + + +//-------------------------------------------------------------------------------------- +// FUNCTION: iRCCE_irecv_cancel +//-------------------------------------------------------------------------------------- +// try to cancel a pending non-blocking recv request +//-------------------------------------------------------------------------------------- +int iRCCE_irecv_cancel(iRCCE_RECV_REQUEST *request, int *test) { + + int source; + iRCCE_RECV_REQUEST *run; + + if( (request == NULL) || (request->finished) ) { + if (test) (*test) = 0; + return iRCCE_NOT_ENQUEUED; + } + + source = request->source; + + if(iRCCE_irecv_queue[source] == NULL) { + if (test) (*test) = 0; + return iRCCE_NOT_ENQUEUED; + } + + if(iRCCE_irecv_queue[source] == request) { + + // have parts of the message already been received? + if(request->started) { + if (test) (*test) = 0; + return iRCCE_PENDING; + } + else { + // no, thus request can be canceld just in time: + iRCCE_irecv_queue[source] = request->next; + if (test) (*test) = 1; + return iRCCE_SUCCESS; + } + } + + for(run = iRCCE_irecv_queue[source]; run->next != NULL; run = run->next) { + + // request found --> remove it from recv queue: + if(run->next == request) { + + run->next = run->next->next; + + if (test) (*test) = 1; + return iRCCE_SUCCESS; + } + } + + if (test) (*test) = 0; + return iRCCE_NOT_ENQUEUED; +} + +#endif diff --git a/arch/x86/scc/iRCCE_isend.c b/arch/x86/scc/iRCCE_isend.c new file mode 100644 index 00000000..18c9dca0 --- /dev/null +++ b/arch/x86/scc/iRCCE_isend.c @@ -0,0 +1,355 @@ +//*************************************************************************************** +// Non-blocking send routines. +//*************************************************************************************** +// +// Author: Rob F. Van der Wijngaart +// Intel Corporation +// Date: 008/30/2010 +// +//*************************************************************************************** +// +// Copyright 2010 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// [2010-10-25] added support for non-blocking send/recv operations +// - iRCCE_isend(), ..._test(), ..._wait(), ..._push() +// - iRCCE_irecv(), ..._test(), ..._wait(), ..._push() +// by Carsten Clauss, Chair for Operating Systems, +// RWTH Aachen University +// +// [2010-11-12] extracted non-blocking code into separate library +// by Carsten Scholtes +// +// [2010-12-09] added cancel functions for non-blocking send/recv requests +// by Carsten Clauss +// + +#include +#include + +#ifdef CONFIG_ROCKCREEK + +#include + +static int iRCCE_push_send_request(iRCCE_SEND_REQUEST *request) { + + char padline[RCCE_LINE_SIZE]; // copy buffer, used if message not multiple of line size + int test; // flag for calling iRCCE_test_flag() + + if(request->finished) return(iRCCE_SUCCESS); + + if(request->label == 1) goto label1; + if(request->label == 2) goto label2; + if(request->label == 3) goto label3; + + // send data in units of available chunk size of comm buffer + for (; request->wsize< (request->size / request->chunk) * request->chunk; request->wsize += request->chunk) { + request->bufptr = request->privbuf + request->wsize; + request->nbytes = request->chunk; + // copy private data to own comm buffer + iRCCE_put(request->combuf, (t_vcharp) request->bufptr, request->nbytes, RCCE_IAM); + RCCE_flag_write(request->sent, RCCE_FLAG_SET, request->dest); + // wait for the destination to be ready to receive a message +label1: + iRCCE_test_flag(*(request->ready), RCCE_FLAG_SET, &test); + if(!test) { + request->label = 1; + return(iRCCE_PENDING); + } + RCCE_flag_write(request->ready, RCCE_FLAG_UNSET, RCCE_IAM); + } + + request->remainder = request->size % request->chunk; + // if nothing is left over, we are done + if (!request->remainder) { + request->finished = 1; + return(iRCCE_SUCCESS); + } + + // send remainder of data--whole cache lines + request->bufptr = request->privbuf + (request->size / request->chunk) * request->chunk; + request->nbytes = request->remainder - request->remainder % RCCE_LINE_SIZE; + if (request->nbytes) { + // copy private data to own comm buffer + iRCCE_put(request->combuf, (t_vcharp)request->bufptr, request->nbytes, RCCE_IAM); + RCCE_flag_write(request->sent, RCCE_FLAG_SET, request->dest); + // wait for the destination to be ready to receive a message +label2: + iRCCE_test_flag(*(request->ready), RCCE_FLAG_SET, &test); + if(!test) { + request->label = 2; + return(iRCCE_PENDING); + } + RCCE_flag_write(request->ready, RCCE_FLAG_UNSET, RCCE_IAM); + } + + request->remainder = request->size % request->chunk; + request->remainder = request->remainder%RCCE_LINE_SIZE; + // if nothing is left over, we are done + if (!request->remainder) + { + request->finished = 1; + return(iRCCE_SUCCESS); + } + + // remainder is less than a cache line. This must be copied into appropriately sized + // intermediate space before it can be sent to the receiver + request->bufptr = request->privbuf + (request->size / request->chunk) * request->chunk + request->nbytes; + request->nbytes = RCCE_LINE_SIZE; + // copy private data to own comm buffer + memcpy(padline,request->bufptr,request->remainder); + iRCCE_put(request->combuf, (t_vcharp)padline, request->nbytes, RCCE_IAM); + RCCE_flag_write(request->sent, RCCE_FLAG_SET, request->dest); + // wait for the destination to be ready to receive a message +label3: + iRCCE_test_flag(*(request->ready), RCCE_FLAG_SET, &test); + if(!test) { + request->label = 3; + return(iRCCE_PENDING); + } + RCCE_flag_write(request->ready, RCCE_FLAG_UNSET, RCCE_IAM); + + request->finished = 1; + return(iRCCE_SUCCESS); +} + +static void iRCCE_init_send_request( + char *privbuf, // source buffer in local private memory (send buffer) + t_vcharp combuf, // intermediate buffer in MPB + size_t chunk, // size of MPB available for this message (bytes) + RCCE_FLAG *ready, // flag indicating whether receiver is ready + RCCE_FLAG *sent, // flag indicating whether message has been sent by source + size_t size, // size of message (bytes) + int dest, // UE that will receive the message + iRCCE_SEND_REQUEST *request + ) { + + request->privbuf = privbuf; + request->combuf = combuf; + request->chunk = chunk; + request->ready = ready; + request->sent = sent; + request->size = size; + request->dest = dest; + + request->wsize = 0; + request->remainder = 0; + request->nbytes = 0; + request->bufptr = NULL; + + request->label = 0; + + request->finished = 0; + + request->next = NULL; + + return; +} + +//-------------------------------------------------------------------------------------- +// FUNCTION: iRCCE_isend +//-------------------------------------------------------------------------------------- +// non-blocking send function; returns a handle of type iRCCE_SEND_REQUEST +//-------------------------------------------------------------------------------------- +static iRCCE_SEND_REQUEST blocking_isend_request; +int iRCCE_isend(char *privbuf, size_t size, int dest, iRCCE_SEND_REQUEST *request) { + + if(request == NULL) request = &blocking_isend_request; + + if (dest<0 || dest >= RCCE_NP) + return(RCCE_error_return(RCCE_debug_comm,RCCE_ERROR_ID)); + else { + iRCCE_init_send_request(privbuf, RCCE_buff_ptr, RCCE_chunk, + &RCCE_ready_flag[dest], &RCCE_sent_flag[RCCE_IAM], + size, dest, request); + + if(iRCCE_isend_queue == NULL) { + + if(iRCCE_push_send_request(request) == iRCCE_SUCCESS) { + return(iRCCE_SUCCESS); + } + else { + iRCCE_isend_queue = request; + + if(request == &blocking_isend_request) { + iRCCE_isend_wait(request); + return(iRCCE_SUCCESS); + } + + return(iRCCE_PENDING); + } + } + else { + if(iRCCE_isend_queue->next == NULL) { + iRCCE_isend_queue->next = request; + } + else { + iRCCE_SEND_REQUEST *run = iRCCE_isend_queue; + while(run->next != NULL) run = run->next; + run->next = request; + } + + if(request == &blocking_isend_request) { + iRCCE_isend_wait(request); + return(iRCCE_SUCCESS); + } + + return(iRCCE_RESERVED); + } + } +} + +//-------------------------------------------------------------------------------------- +// FUNCTION: iRCCE_isend_test +//-------------------------------------------------------------------------------------- +// test function for completion of the requestes non-blocking send operation +// Just provide NULL instead of testvar if you don't need it +//-------------------------------------------------------------------------------------- +int iRCCE_isend_test(iRCCE_SEND_REQUEST *request, int *test) { + + if(request == NULL) { + + iRCCE_isend_push(); + + if(iRCCE_isend_queue == NULL) { + if (test) (*test) = 1; + return(iRCCE_SUCCESS); + } + else { + if (test) (*test) = 0; + return(iRCCE_PENDING); + } + } + + if(request->finished) { + if (test) (*test) = 1; + return(iRCCE_SUCCESS); + } + + if(iRCCE_isend_queue != request) { + if (test) (*test) = 0; + return(iRCCE_RESERVED); + } + + iRCCE_push_send_request(request); + + if(request->finished) { + iRCCE_isend_queue = request->next; + + if (test) (*test) = 1; + return(iRCCE_SUCCESS); + } + + if (test) (*test) = 0; + return(iRCCE_PENDING); +} + + +//-------------------------------------------------------------------------------------- +// FUNCTION: iRCCE_isend_push +//-------------------------------------------------------------------------------------- +// progress function for pending requests in the isend queue +//-------------------------------------------------------------------------------------- +int iRCCE_isend_push(void) { + + iRCCE_SEND_REQUEST *request = iRCCE_isend_queue; + + if(request == NULL) { + return(iRCCE_SUCCESS); + } + + if(request->finished) { + return(iRCCE_SUCCESS); + } + + iRCCE_push_send_request(request); + + if(request->finished) { + iRCCE_isend_queue = request->next; + return(iRCCE_SUCCESS); + } + + return(iRCCE_PENDING); +} + + +//-------------------------------------------------------------------------------------- +// FUNCTION: iRCCE_isend_wait +//-------------------------------------------------------------------------------------- +// just wait for completion of the requestes non-blocking send operation +//-------------------------------------------------------------------------------------- +int iRCCE_isend_wait(iRCCE_SEND_REQUEST *request) { + + if(request != NULL) { + + while(!request->finished) { + + iRCCE_isend_push(); + iRCCE_irecv_push(); + } + } + else { + + while(iRCCE_isend_queue != NULL) { + + iRCCE_isend_push(); + iRCCE_irecv_push(); + } + } + + return(iRCCE_SUCCESS); +} + + +//-------------------------------------------------------------------------------------- +// FUNCTION: iRCCE_isend_cancel +//-------------------------------------------------------------------------------------- +// try to cancel a pending non-blocking send request +//-------------------------------------------------------------------------------------- +int iRCCE_isend_cancel(iRCCE_SEND_REQUEST *request, int *test) { + + iRCCE_SEND_REQUEST *run; + + if( (request == NULL) || (request->finished) ) { + if (test) (*test) = 0; + return iRCCE_NOT_ENQUEUED; + } + + if(iRCCE_isend_queue == NULL) { + if (test) (*test) = 0; + return iRCCE_NOT_ENQUEUED; + } + + if(iRCCE_isend_queue == request) { + if (test) (*test) = 0; + return iRCCE_PENDING; + } + + for(run = iRCCE_isend_queue; run->next != NULL; run = run->next) { + + // request found --> remove it from send queue: + if(run->next == request) { + + run->next = run->next->next; + + if (test) (*test) = 1; + return iRCCE_SUCCESS; + } + } + + if (test) (*test) = 0; + return iRCCE_NOT_ENQUEUED; +} + +#endif diff --git a/arch/x86/scc/iRCCE_put.c b/arch/x86/scc/iRCCE_put.c new file mode 100644 index 00000000..93cea070 --- /dev/null +++ b/arch/x86/scc/iRCCE_put.c @@ -0,0 +1,87 @@ +//*************************************************************************************** +// Put data into communication buffer. +//*************************************************************************************** +// +// Author: Rob F. Van der Wijngaart +// Intel Corporation +// Date: 008/30/2010 +// +//*************************************************************************************** +// +// Copyright 2010 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// [2010-11-03] switched to SCC-optimized memcpy() functions in scc_memcpy.h: +// - memcpy_to_mpb() +// - memcpy_from_mpb() +// by Stefan Lankes, Carsten Clauss, Chair for Operating Systems, +// RWTH Aachen University +// +#include +#include + +#ifdef CONFIG_ROCKCREEK + +#include + +#ifdef COPPERRIDGE +#include "scc_memcpy.h" +#endif + +void* iRCCE_memcpy_put(void *dest, const void *src, size_t count) +{ +#ifdef COPPERRIDGE + return memcpy_to_mpb(dest, src, count); +#else + return memcpy(dest, src, count); +#endif +} + +//-------------------------------------------------------------------------------------- +// FUNCTION: iRCCE_put +//-------------------------------------------------------------------------------------- +// copy data from address "source" in the local MPB or the calling UE's private memory +// to address "target" in the remote MPB. We do not test to see if a move from the +// calling UE's private memory stays within allocated memory +//-------------------------------------------------------------------------------------- +int iRCCE_put( + t_vcharp target, // target buffer, MPB + t_vcharp source, // source buffer, MPB or private memory + int num_bytes, + int ID + ) { + + // in non-GORY mode we only need to retain the MPB target shift; we + // already know the target is in the MPB, not private memory + target = RCCE_comm_buffer[ID]+(target-RCCE_comm_buffer[RCCE_IAM]); + + // make sure that any data that has been put in our MPB by another UE is visible +#ifdef _OPENMP + #pragma omp flush +#endif + + // do the actual copy + RC_cache_invalidate(); + + iRCCE_memcpy_put((void *)target, (void *)source, num_bytes); + + // flush data to make it visible to all threads; cannot use flush list because it + // concerns malloced space +#ifdef _OPENMP + #pragma omp flush +#endif + return(iRCCE_SUCCESS); +} + +#endif diff --git a/arch/x86/scc/iRCCE_recv.c b/arch/x86/scc/iRCCE_recv.c new file mode 100644 index 00000000..17beccfd --- /dev/null +++ b/arch/x86/scc/iRCCE_recv.c @@ -0,0 +1,190 @@ +//*************************************************************************************** +// Non-blocking receive routines. +//*************************************************************************************** +// +// Author: Rob F. Van der Wijngaart +// Intel Corporation +// Date: 008/30/2010 +// +//*************************************************************************************** +// +// Copyright 2010 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// [2010-11-26] added a _pipelined_ version of blocking send/recv +// by Carsten Clauss, Chair for Operating Systems, +// RWTH Aachen University +// +#include +#include + +#ifdef CONFIG_ROCKCREEK + +#include + +//-------------------------------------------------------------------------------------- +// FUNCTION: iRCCE_recv_general +//-------------------------------------------------------------------------------------- +// pipelined receive function +//-------------------------------------------------------------------------------------- +static int iRCCE_recv_general( + char *privbuf, // destination buffer in local private memory (receive buffer) + t_vcharp combuf, // intermediate buffer in MPB + size_t chunk, // size of MPB available for this message (bytes) + RCCE_FLAG *ready, // flag indicating whether receiver is ready + RCCE_FLAG *sent, // flag indicating whether message has been sent by source + size_t size, // size of message (bytes) + int source, // UE that sent the message + int *test // if 1 upon entry, do nonblocking receive; if message available + // set to 1, otherwise to 0 + ) { + + char padline[RCCE_LINE_SIZE]; // copy buffer, used if message not multiple of line size + size_t wsize, // offset within receive buffer when pulling in "chunk" bytes + remainder, // bytes remaining to be received + nbytes; // number of bytes to be received in single iRCCE_get call + int first_test; // only use first chunk to determine if message has been received yet + char *bufptr; // running pointer inside privbuf for current location + + first_test = 1; + +#if 0 + // receive data in units of available chunk size of MPB + for (wsize=0; wsize< (size/chunk)*chunk; wsize+=chunk) { + bufptr = privbuf + wsize; + nbytes = chunk; + // if function is called in test mode, check if first chunk has been sent already. + // If so, proceed as usual. If not, exit immediately + if (*test && first_test) { + first_test = 0; + if (!(*test = RCCE_probe(*sent))) return(iRCCE_SUCCESS); + } + RCCE_wait_until(*sent, RCCE_FLAG_SET); + RCCE_flag_write(sent, RCCE_FLAG_UNSET, RCCE_IAM); + // copy data from local MPB space to private memory + iRCCE_get((t_vcharp)bufptr, combuf, nbytes, source); + + // tell the source I have moved data out of its comm buffer + RCCE_flag_write(ready, RCCE_FLAG_SET, source); + } +#else + { // pipelined version of send/recv: + + size_t subchunk1 = chunk / 2; + size_t subchunk2 = chunk - subchunk1; + + for (wsize=0; wsize < (size/chunk)*chunk; wsize+=chunk) { + + if (*test && first_test) { + first_test = 0; + if (!(*test = RCCE_probe(*sent))) return(iRCCE_SUCCESS); + } + + bufptr = privbuf + wsize; + nbytes = subchunk1; + + RCCE_wait_until(*ready, RCCE_FLAG_SET); + RCCE_flag_write(ready, RCCE_FLAG_UNSET, RCCE_IAM); + iRCCE_get((t_vcharp)bufptr, combuf, nbytes, source); + + RCCE_flag_write(ready, RCCE_FLAG_SET, source); + + bufptr = privbuf + wsize + subchunk1; + nbytes = subchunk2; + + RCCE_wait_until(*sent, RCCE_FLAG_SET); + RCCE_flag_write(sent, RCCE_FLAG_UNSET, RCCE_IAM); + iRCCE_get((t_vcharp)bufptr, combuf + subchunk1, nbytes, source); + + RCCE_flag_write(sent, RCCE_FLAG_SET, source); + } + } +#endif + + remainder = size%chunk; + // if nothing is left over, we are done + if (!remainder) return(iRCCE_SUCCESS); + + // receive remainder of data--whole cache lines + bufptr = privbuf + (size/chunk)*chunk; + nbytes = remainder - remainder % RCCE_LINE_SIZE; + if (nbytes) { + // if function is called in test mode, check if first chunk has been sent already. + // If so, proceed as usual. If not, exit immediately + if (*test && first_test) { + first_test = 0; + if (!(*test = RCCE_probe(*sent))) return(iRCCE_SUCCESS); + } + RCCE_wait_until(*sent, RCCE_FLAG_SET); + RCCE_flag_write(sent, RCCE_FLAG_UNSET, RCCE_IAM); + // copy data from local MPB space to private memory + iRCCE_get((t_vcharp)bufptr, combuf, nbytes, source); + + // tell the source I have moved data out of its comm buffer + RCCE_flag_write(ready, RCCE_FLAG_SET, source); + } + + remainder = remainder % RCCE_LINE_SIZE; + if (!remainder) return(iRCCE_SUCCESS); + + // remainder is less than cache line. This must be copied into appropriately sized + // intermediate space before exact number of bytes get copied to the final destination + bufptr = privbuf + (size/chunk)*chunk + nbytes; + nbytes = RCCE_LINE_SIZE; + + // if function is called in test mode, check if first chunk has been sent already. + // If so, proceed as usual. If not, exit immediately + if (*test && first_test) { + first_test = 0; + if (!(*test = RCCE_probe(*sent))) return(iRCCE_SUCCESS); + } + RCCE_wait_until(*sent, RCCE_FLAG_SET); + RCCE_flag_write(sent, RCCE_FLAG_UNSET, RCCE_IAM); + + // copy data from local MPB space to private memory + iRCCE_get((t_vcharp)padline, combuf, nbytes, source); + memcpy(bufptr,padline,remainder); + + // tell the source I have moved data out of its comm buffer + RCCE_flag_write(ready, RCCE_FLAG_SET, source); + + return(iRCCE_SUCCESS); +} + + +//-------------------------------------------------------------------------------------- +// FUNCTION: iRCCE_recv +//-------------------------------------------------------------------------------------- +// pipelined recv function (blocking!) +//-------------------------------------------------------------------------------------- +int iRCCE_recv(char *privbuf, size_t size, int source) { + int ignore; + + while(iRCCE_irecv_queue[source] != NULL) { + // wait for completion of pending non-blocking requests + iRCCE_irecv_push(); + iRCCE_isend_push(); + } + + if (source<0 || source >= RCCE_NP) + return(RCCE_error_return(RCCE_debug_comm,RCCE_ERROR_ID)); + else { + ignore = 0; + return(iRCCE_recv_general(privbuf, RCCE_buff_ptr, RCCE_chunk, + &RCCE_ready_flag[RCCE_IAM], &RCCE_sent_flag[source], + size, source, &ignore)); + } +} + +#endif diff --git a/arch/x86/scc/iRCCE_send.c b/arch/x86/scc/iRCCE_send.c new file mode 100644 index 00000000..ad1582b3 --- /dev/null +++ b/arch/x86/scc/iRCCE_send.c @@ -0,0 +1,165 @@ +//*************************************************************************************** +// Synchronized receive routines. +//*************************************************************************************** +// +// Author: Rob F. Van der Wijngaart +// Intel Corporation +// Date: 008/30/2010 +// +//*************************************************************************************** +// +// Copyright 2010 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// [2010-11-26] added a _pipelined_ version of blocking send/recv +// by Carsten Clauss, Chair for Operating Systems, +// RWTH Aachen University +// +#include +#include + +#ifdef CONFIG_ROCKCREEK + +#include + +//-------------------------------------------------------------------------------------- +// FUNCTION: iRCCE_send_general +//-------------------------------------------------------------------------------------- +// pipelined send function +//-------------------------------------------------------------------------------------- +static int iRCCE_send_general( + char *privbuf, // source buffer in local private memory (send buffer) + t_vcharp combuf, // intermediate buffer in MPB + size_t chunk, // size of MPB available for this message (bytes) + RCCE_FLAG *ready, // flag indicating whether receiver is ready + RCCE_FLAG *sent, // flag indicating whether message has been sent by source + size_t size, // size of message (bytes) + int dest // UE that will receive the message + ) { + + char padline[RCCE_LINE_SIZE]; // copy buffer, used if message not multiple of line size + size_t wsize, // offset within send buffer when putting in "chunk" bytes + remainder, // bytes remaining to be sent + nbytes; // number of bytes to be sent in single iRCCE_put call + char *bufptr; // running pointer inside privbuf for current location + +#if 0 + // send data in units of available chunk size of comm buffer + for (wsize=0; wsize< (size/chunk)*chunk; wsize+=chunk) { + bufptr = privbuf + wsize; + nbytes = chunk; + // copy private data to own comm buffer + iRCCE_put(combuf, (t_vcharp) bufptr, nbytes, RCCE_IAM); + RCCE_flag_write(sent, RCCE_FLAG_SET, dest); + // wait for the destination to be ready to receive a message + RCCE_wait_until(*ready, RCCE_FLAG_SET); + RCCE_flag_write(ready, RCCE_FLAG_UNSET, RCCE_IAM); + } +#else + { // pipelined version of send/recv: + size_t subchunk1 = chunk / 2; + size_t subchunk2 = chunk - subchunk1; + + wsize = 0; + + for (; wsize < (size/chunk)*chunk; wsize+=chunk) { + + bufptr = privbuf + wsize; + nbytes = subchunk1; + + iRCCE_put(combuf, (t_vcharp) bufptr, nbytes, RCCE_IAM); + RCCE_flag_write(ready, RCCE_FLAG_SET, dest); + + if(wsize>0) + { + RCCE_wait_until(*sent, RCCE_FLAG_SET); + RCCE_flag_write(sent, RCCE_FLAG_UNSET, RCCE_IAM); + } + + bufptr = privbuf + wsize + subchunk1; + nbytes = subchunk2; + + iRCCE_put(combuf + subchunk1, (t_vcharp) bufptr, nbytes, RCCE_IAM); + RCCE_flag_write(sent, RCCE_FLAG_SET, dest); + + RCCE_wait_until(*ready, RCCE_FLAG_SET); + RCCE_flag_write(ready, RCCE_FLAG_UNSET, RCCE_IAM); + } + + if(wsize>0) { + RCCE_wait_until(*sent, RCCE_FLAG_SET); + RCCE_flag_write(sent, RCCE_FLAG_UNSET, RCCE_IAM); + } + } +#endif + + remainder = size%chunk; + // if nothing is left over, we are done + if (!remainder) return(iRCCE_SUCCESS); + + // send remainder of data--whole cache lines + bufptr = privbuf + (size/chunk)*chunk; + nbytes = remainder - remainder%RCCE_LINE_SIZE; + if (nbytes) { + // copy private data to own comm buffer + iRCCE_put(combuf, (t_vcharp)bufptr, nbytes, RCCE_IAM); + RCCE_flag_write(sent, RCCE_FLAG_SET, dest); + // wait for the destination to be ready to receive a message + RCCE_wait_until(*ready, RCCE_FLAG_SET); + RCCE_flag_write(ready, RCCE_FLAG_UNSET, RCCE_IAM); + } + + remainder = remainder%RCCE_LINE_SIZE; + if (!remainder) return(iRCCE_SUCCESS); + + // remainder is less than a cache line. This must be copied into appropriately sized + // intermediate space before it can be sent to the receiver + bufptr = privbuf + (size/chunk)*chunk + nbytes; + nbytes = RCCE_LINE_SIZE; + + // copy private data to own comm buffer + memcpy(padline,bufptr,remainder); + iRCCE_put(combuf, (t_vcharp)padline, nbytes, RCCE_IAM); + RCCE_flag_write(sent, RCCE_FLAG_SET, dest); + + // wait for the destination to be ready to receive a message + RCCE_wait_until(*ready, RCCE_FLAG_SET); + RCCE_flag_write(ready, RCCE_FLAG_UNSET, RCCE_IAM); + + return(iRCCE_SUCCESS); +} + + +//-------------------------------------------------------------------------------------- +// FUNCTION: iRCCE_send +//-------------------------------------------------------------------------------------- +// pipelined send function (blocking!) +//-------------------------------------------------------------------------------------- +int iRCCE_send(char *privbuf, size_t size, int dest) { + + while(iRCCE_isend_queue != NULL) { + // wait for completion of pending non-blocking requests + iRCCE_isend_push(); + iRCCE_irecv_push(); + } + + if (dest<0 || dest >= RCCE_NP) + return(RCCE_error_return(RCCE_debug_comm,RCCE_ERROR_ID)); + else + return(iRCCE_send_general(privbuf, RCCE_buff_ptr, RCCE_chunk, + &RCCE_ready_flag[dest], &RCCE_sent_flag[RCCE_IAM], + size, dest)); +} + +#endif diff --git a/arch/x86/scc/iRCCE_synch.c b/arch/x86/scc/iRCCE_synch.c new file mode 100644 index 00000000..5512728b --- /dev/null +++ b/arch/x86/scc/iRCCE_synch.c @@ -0,0 +1,127 @@ +///************************************************************************************* +// Synchronization functions. +// Single-bit and whole-cache-line flags are sufficiently different that we provide +// separate implementations of the synchronization routines for each case +//************************************************************************************** +// +// Author: Rob F. Van der Wijngaart +// Intel Corporation +// Date: 008/30/2010 +// +//************************************************************************************** +// +// Copyright 2010 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// [2010-10-25] added support for non-blocking send/recv operations +// - iRCCE_isend(), ..._test(), ..._wait(), ..._push() +// - iRCCE_irecv(), ..._test(), ..._wait(), ..._push() +// by Carsten Clauss, Chair for Operating Systems, +// RWTH Aachen University +// +// [2010-11-12] extracted non-blocking code into separate library +// by Carsten Scholtes +// +// [2011-01-21] updated the datatype of RCCE_FLAG according to the +// recent version of RCCE +// +// [2011-04-12] added marco test for rcce version +// +#include +#include + +#ifdef CONFIG_ROCKCREEK + +#include + +#ifdef SINGLEBITFLAGS + +int iRCCE_test_flag(RCCE_FLAG flag, RCCE_FLAG_STATUS val, int *result) { + + t_vcharp cflag; + +#ifdef RCCE_VERSION + // this is a newer version than V1.0.13 + t_vcharp flaga; +#endif + + cflag = flag.line_address; + +#ifdef RCCE_VERSION + // this is a newer version than V1.0.13 + flaga = flag.flag_addr; +#endif + + // always flush/invalidate to ensure we read the most recent value of *flag + // keep reading it until it has the required value + +#ifdef _OPENMP +#pragma omp flush +#endif + RC_cache_invalidate(); + +#ifdef RCCE_VERSION + // this is a newer version than V1.0.13 + if(RCCE_bit_value(flaga, (flag.location)%RCCE_FLAGS_PER_BYTE) != val) { +#else + if(RCCE_bit_value(cflag, flag.location) != val) { +#endif + (*result) = 0; + } + else { + (*result) = 1; + } + + return(iRCCE_SUCCESS); +} + +#else + +////////////////////////////////////////////////////////////////// +// LOCKLESS SYNCHRONIZATION USING ONE WHOLE CACHE LINE PER FLAG // +////////////////////////////////////////////////////////////////// + +int iRCCE_test_flag(RCCE_FLAG flag, RCCE_FLAG_STATUS val, int *result) { + +#ifdef RCCE_VERSION + // this is a newer version than V1.0.13 + t_vcharp flaga = flag.flag_addr; +#endif + + // always flush/invalidate to ensure we read the most recent value of *flag + // keep reading it until it has the required value. We only need to read the + // first int of the MPB cache line containing the flag +#ifdef _OPENMP +#pragma omp flush +#endif + RC_cache_invalidate(); + +#ifdef RCCE_VERSION + // this is a newer version than V1.0.13 + if((RCCE_FLAG_STATUS)(*flaga) != val) { +#else + if((*flag) != val) { +#endif + (*result) = 0; + } + else { + (*result) = 1; + } + + return(iRCCE_SUCCESS); +} + +#endif + +#endif From 622b403f08025ad1e178fb8dc849e09ac10be5d6 Mon Sep 17 00:00:00 2001 From: Stefan Lankes Date: Tue, 19 Apr 2011 07:17:51 -0700 Subject: [PATCH 02/13] add code to initialize (i)RCCE and to test RCCE --- arch/x86/scc/scc_init.c | 52 +++++++++++++++++++++++++------ include/metalsvm/config.h.example | 2 +- 2 files changed, 44 insertions(+), 10 deletions(-) diff --git a/arch/x86/scc/scc_init.c b/arch/x86/scc/scc_init.c index 3279c967..33645aaa 100644 --- a/arch/x86/scc/scc_init.c +++ b/arch/x86/scc/scc_init.c @@ -17,7 +17,8 @@ #include #include #include -#include +#include +#include #include #ifdef CONFIG_ROCKCREEK @@ -35,6 +36,9 @@ bootinfo_t* bootinfo = (bootinfo_t*) SCC_BOOTINFO; static int scc_clear(void) { int tmp, x, y, z, offset; + + // Initialize API + InitAPI(0); // Find out who I am... tmp=ReadConfigReg(CRB_OWN+MYTILEID); @@ -57,6 +61,9 @@ static int scc_clear(void) // Clear test&set register write. Next read-access will read "1" (lock granted). SetConfigReg(CRB_ADDR(x,y)+((z)?LOCK1:LOCK0), 1); + // frees Message Passing Buffer + MPBunalloc(&MPB); + return 0; } @@ -64,31 +71,58 @@ int scc_init(void) { int num_ranks; int i, my_rank; + uint64_t start, end, ticks, freq = 533; + uint32_t cr4, msg = 0; + + kputs("Initialize Rock Creek!\n"); + + /* Enable Messagepassing in CR4 */ + cr4 = read_cr4(); + cr4 = cr4 | _CR4_MPE; + write_cr4(cr4); - kprintf("Initialize Rock Creek!\n"); kprintf("address of the initrd: 0x%x\n", bootinfo->addr); kprintf("size of the initrd: %d\n", bootinfo->size); kprintf("rcce argc = %d\n", bootinfo->argc); for(i=0; iargc; i++) kprintf("rcce argv[%d] = %s\n", i, bootinfo->argv[i]); + if (bootinfo->argc >= 3) + freq = atoi(bootinfo->argv[2]); + + kputs("Reset SCC!\n"); + scc_clear(); + kputs("Wait some time...\n"); + mb(); + start = rdtsc(); + do { + mb(); + end = rdtsc(); + ticks = end > start ? end - start : start - end; + } while(ticks*TIMER_FREQ < 1000ULL*freq*1000000ULL); + kprintf("ticks %llu\n", ticks); + if (RCCE_init(&bootinfo->argc, &bootinfo->argv) != RCCE_SUCCESS) return -ENODEV; + if (iRCCE_init() != iRCCE_SUCCESS) + return -ENODEV; my_rank = RCCE_ue(); num_ranks = RCCE_num_ues(); kprintf("Got rank %d of %d ranks\n", my_rank, num_ranks); - /* Enable Messagepassing in CR4 */ - uint32_t cr4 = read_cr4(); - cr4 = cr4 | _CR4_MPE; - write_cr4(cr4); - i = ReadConfigReg(CRB_OWN+GLCFG0); kprintf("glcfg0 0x%x\n", i); - /* synchronize before starting MetalSVM: */ - //RCCE_barrier(&RCCE_COMM_WORLD); + RCCE_barrier(&RCCE_COMM_WORLD); + + kputs("RCCE test...\t"); + if (my_rank == 0) + msg = 0x4711; + if (RCCE_bcast((char*) &msg, sizeof(msg), 0, RCCE_COMM_WORLD) == RCCE_SUCCESS) + kprintf("successfull! (0x%x)\n", msg); + else + kprintf("failed! (0x%x)\n", msg); kputs("Now, the SCC is initialized!\n"); diff --git a/include/metalsvm/config.h.example b/include/metalsvm/config.h.example index 31c02682..b5c0536e 100644 --- a/include/metalsvm/config.h.example +++ b/include/metalsvm/config.h.example @@ -58,7 +58,7 @@ extern "C" { // RCCE specific flags #define SCC #define MS_BAREMETAL -#define GORY +//#define GORY //#define SHMADD //#define SHMADD_CACHEABLE /* default values for 16 GB system */ From 52c924f62a631f1b9c45bfc89ef8a02a6e66e287 Mon Sep 17 00:00:00 2001 From: Stefan Lankes Date: Tue, 19 Apr 2011 07:18:35 -0700 Subject: [PATCH 03/13] add fallback code for memcpy --- libkern/string.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/libkern/string.c b/libkern/string.c index acf76be9..7d6a5220 100644 --- a/libkern/string.c +++ b/libkern/string.c @@ -8,9 +8,9 @@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. */ -#include +#include -#ifndef HAVE_ARCH_MEMCPY +//#ifndef HAVE_ARCH_MEMCPY void *memcpy(void *dest, const void *src, size_t count) { size_t i; @@ -23,7 +23,7 @@ void *memcpy(void *dest, const void *src, size_t count) return dest; } -#endif +//#endif #ifndef HAVE_ARCH_MEMSET void *memset(void *dest, int val, size_t count) From ed6aa2eca8a443e763d4fa6a528bcd40996bc18c Mon Sep 17 00:00:00 2001 From: Stefan Lankes Date: Tue, 19 Apr 2011 07:39:53 -0700 Subject: [PATCH 04/13] add Intel's bin2obj tool - this is part of http://marcbug.scc-dc.com/svn/repository/trunk/linuxkernel/bin2obj --- tools/Makefile | 9 ++- tools/bin2obj.c | 147 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 153 insertions(+), 3 deletions(-) create mode 100644 tools/bin2obj.c diff --git a/tools/Makefile b/tools/Makefile index e0375ae0..fb77c026 100644 --- a/tools/Makefile +++ b/tools/Makefile @@ -41,16 +41,19 @@ scc_bootinfo.asm: bootinfo.sh scc_bootinfo.bin: scc_bootinfo.asm $(NASM) $(NASMFLAGS) -o $@ $< -SCC: scc_bootinfo.bin scc_setup.bin reset_vector.bin initrd.img +bin2obj: bin2obj.c + $(CC) $(CFLAGS) -o $@ $< + +SCC: scc_bootinfo.bin scc_setup.bin reset_vector.bin initrd.img bin2obj cp ../metalsvm.elf . $(CROSS_OBJCOPY) -j .mboot -j .text -j .data -j .rodata -j .bss -O binary metalsvm.elf metalsvm.bin chmod a-x *.bin . ./prepare.sh - /home/lankes/tools/bin2obj -m load.map -o metalsvm.obj + ./bin2obj -m load.map -o metalsvm.obj sccMerge -noimage -m 8 -n 12 -force ./metalsvm.mt clean: - $(RM) -rf *.o *~ make_initrd initrd.img *.bin *.obj *.hex *.elf obj + $(RM) -rf *.o *~ bin2obj make_initrd initrd.img *.bin *.obj *.hex *.elf obj depend: $(CC) -MM $(CFLAGS) *.c > Makefile.dep diff --git a/tools/bin2obj.c b/tools/bin2obj.c new file mode 100644 index 00000000..45c07caa --- /dev/null +++ b/tools/bin2obj.c @@ -0,0 +1,147 @@ +#include +#include +#include + + +const char BIN2OBJIDSTRING[] = "$Id: bin2obj.c 8016 2007-11-01 14:24:42Z tlehnig $"; + + +long long convertToHex(char *fn, unsigned long origin, FILE *outfile) { + FILE *datafile; + unsigned char data1, data2, data3, data4; + int res = 0; + long long count = 0; + + datafile = fopen(fn, "r"); + if (!datafile) { + printf("Datafile >%s< could not be opened, not writing data for this file\n", fn); + return -1; + } + + printf("Converting file >%s< to .32.obj format at origin 0x%08lx (0x%08lx) ... ", + fn, origin >> 2, origin); + fprintf(outfile, "/origin %08lx\n", origin >> 2); + + do { + data1 = 0; + data2 = 0; + data3 = 0; + data4 = 0; + + res = fscanf(datafile, "%c%c%c%c", &data1, &data2, &data3, &data4); + if (res > 0) { + count += res; + + fprintf(outfile, "%02x%02x%02x%02x", data4, data3, data2, data1); + + if ((count % 16) == 0) + fprintf(outfile, "\n"); + else + fprintf(outfile, " "); + } + + } while (res > 0); + + if ((count % 16) != 0) fprintf(outfile, "\n"); + + printf("done with %lli Bytes.\n", count); + + fclose(datafile); + + return count; + +} + + +void print_help() { + printf("Usage: bin2obj [FLAGS] [OPTIONS]\n"); + printf("\nFLAGS: -h, -v\n"); + printf("-h Print this help\n"); + printf("-v Print Version ID\n"); + printf("\nOPTIONS: -m, -o\n"); + printf("-m Defines mapfile to use for bin2obj\n"); + printf("-o Defines output file to use for bin2obj\n"); + printf("\nbin2obj converts the binary files defined in the mapfile to a hex based textfile\n"); + printf("used by MCEMU\n"); +} + + +int main(int argc, char **argv) { + + FILE *mapfile = NULL, *outfile = NULL; + unsigned long origin; + char datafn[255]; + char outfn[255] = "output.obj"; + char mapfn[255] = "load.map"; + int res = 0; + unsigned long long count = 0; + long long thiscount = 0; + int retval = 0; + + int c, doOptLoop = 1; + + while (doOptLoop) { + c = getopt(argc, argv, "m:o:hv"); + + if (c == -1) { + doOptLoop = 0; + break; + } + + switch (c) { + case 'h': + print_help(); + return 0; + break; + case 'v': + printf("%s %s\n", argv[0], BIN2OBJIDSTRING); + return 0; + break; + case 'm': + printf("Mapfile: >%s<\n", optarg); + strncpy(mapfn, optarg, 255); + break; + case 'o': + printf("Outfile: >%s<\n", optarg); + strncpy(outfn, optarg, 255); + break; + default: + print_help(); + return 0; + } + } + + + mapfile = fopen(mapfn, "r"); + if (!mapfile) { + printf("Mapfile >%s< not found, exiting.\n", mapfn); + return -1; + } + + outfile = fopen(outfn, "w"); + if (!outfile) { + printf("Outputfile >%s< could not be created, exiting\n", outfn); + return -1; + } + + // res = fscanf(mapfile, "%lx %s\n", &origin, datafn); + + while ((res = fscanf(mapfile, "%lx %s\n", &origin, datafn)) == 2) { + //printf("ReadMapFile origin: 0x%08lx, filename: >%s<\n", origin, datafn); + + thiscount = convertToHex(datafn, origin, outfile); + if (thiscount < 0) { + retval = -1; + } + else count += thiscount; + + } + + fprintf(outfile, "/eof\n"); + fclose(mapfile); + fclose (outfile); + + printf("Total conversion: %lli Bytes\n", count); + + return retval; +} From 7d36bb7ed104ff698b7ec052c2bf9e99895ec82b Mon Sep 17 00:00:00 2001 From: Stefan Lankes Date: Tue, 19 Apr 2011 08:36:00 -0700 Subject: [PATCH 05/13] define that we also load metalsvm.obj to pid 1 --- tools/metalsvm.mt | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/metalsvm.mt b/tools/metalsvm.mt index 9d40eaab..e08b520d 100644 --- a/tools/metalsvm.mt +++ b/tools/metalsvm.mt @@ -1,2 +1,3 @@ # pid mch-route mch-dest-id mch-offset-base testcase 0x00 0x00 6 0x00 metalsvm.obj +0x01 0x00 6 0x01 metalsvm.obj From f3972a36c4b7796ee37d2501d587b2049f926837 Mon Sep 17 00:00:00 2001 From: Stefan Lankes Date: Tue, 19 Apr 2011 08:36:58 -0700 Subject: [PATCH 06/13] enables runtime debug messages for RCCE library calls --- arch/x86/scc/RCCE_admin.c | 2 +- arch/x86/scc/scc_init.c | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/arch/x86/scc/RCCE_admin.c b/arch/x86/scc/RCCE_admin.c index 8b4cbccc..34f0e677 100644 --- a/arch/x86/scc/RCCE_admin.c +++ b/arch/x86/scc/RCCE_admin.c @@ -45,7 +45,7 @@ // #include // En-/ or disable debug prints... -#define DEBUG 1 +#define DEBUG 0 //...................................................................................... // GLOBAL VARIABLES USED BY THE LIBRARY diff --git a/arch/x86/scc/scc_init.c b/arch/x86/scc/scc_init.c index 33645aaa..d9f2628a 100644 --- a/arch/x86/scc/scc_init.c +++ b/arch/x86/scc/scc_init.c @@ -99,14 +99,16 @@ int scc_init(void) mb(); end = rdtsc(); ticks = end > start ? end - start : start - end; - } while(ticks*TIMER_FREQ < 1000ULL*freq*1000000ULL); - kprintf("ticks %llu\n", ticks); + } while(ticks*TIMER_FREQ < 300ULL*freq*1000000ULL); if (RCCE_init(&bootinfo->argc, &bootinfo->argv) != RCCE_SUCCESS) return -ENODEV; if (iRCCE_init() != iRCCE_SUCCESS) return -ENODEV; + // enable additional outputs + RCCE_debug_set(RCCE_DEBUG_ALL); + my_rank = RCCE_ue(); num_ranks = RCCE_num_ues(); kprintf("Got rank %d of %d ranks\n", my_rank, num_ranks); From 6a1fdca0ac5273ee78373d06a8a980fe1ba2b5c7 Mon Sep 17 00:00:00 2001 From: Jacek Galowicz Date: Tue, 19 Apr 2011 18:51:59 +0200 Subject: [PATCH 07/13] Encapsulated IDT-/GDT-descriptor configuring code into helper functions. --- arch/x86/include/asm/gdt.h | 10 ++++++++++ arch/x86/include/asm/idt.h | 10 ++++++++++ arch/x86/kernel/gdt.c | 22 +++++++++++++++------- arch/x86/kernel/idt.c | 19 ++++++++++++++----- 4 files changed, 49 insertions(+), 12 deletions(-) diff --git a/arch/x86/include/asm/gdt.h b/arch/x86/include/asm/gdt.h index cdb65d46..fb4bd4c1 100644 --- a/arch/x86/include/asm/gdt.h +++ b/arch/x86/include/asm/gdt.h @@ -115,6 +115,16 @@ typedef struct { */ void gdt_install(void); +/** @brief Configures and returns a GDT descriptor with chosen attributes + * + * Just feed this function with address, limit and the flags + * you have seen in idt.h + * + * @return a preconfigured gdt descriptor + */ +gdt_entry_t configure_gdt_entry(unsigned long base, unsigned long limit, + unsigned char access, unsigned char gran); + #ifdef __cplusplus } #endif diff --git a/arch/x86/include/asm/idt.h b/arch/x86/include/asm/idt.h index a43d4383..28fde65a 100644 --- a/arch/x86/include/asm/idt.h +++ b/arch/x86/include/asm/idt.h @@ -116,6 +116,16 @@ void idt_install(void); void idt_set_gate(unsigned char num, unsigned long base, unsigned short sel, unsigned char flags); +/** @brief Configures and returns a IDT entry with chosen attributes + * + * Just feed this function with base, selector and the flags + * you have seen in idt.h + * + * @return a preconfigured idt descriptor + */ +idt_entry_t configure_idt_entry(unsigned long base, unsigned short sel, + unsigned char flags); + #ifdef __cplusplus } #endif diff --git a/arch/x86/kernel/gdt.c b/arch/x86/kernel/gdt.c index 33b136ee..5be409c6 100644 --- a/arch/x86/kernel/gdt.c +++ b/arch/x86/kernel/gdt.c @@ -157,19 +157,27 @@ int create_default_frame(task_t* task, entry_point_t ep, void* arg) static void gdt_set_gate(int num, unsigned long base, unsigned long limit, unsigned char access, unsigned char gran) { + gdt[num] = configure_gdt_entry(base, limit, access, gran); +} +gdt_entry_t configure_gdt_entry(unsigned long base, unsigned long limit, + unsigned char access, unsigned char gran) +{ + gdt_entry_t desc; /* Setup the descriptor base address */ - gdt[num].base_low = (base & 0xFFFF); - gdt[num].base_middle = (base >> 16) & 0xFF; - gdt[num].base_high = (base >> 24) & 0xFF; + desc.base_low = (base & 0xFFFF); + desc.base_middle = (base >> 16) & 0xFF; + desc.base_high = (base >> 24) & 0xFF; /* Setup the descriptor limits */ - gdt[num].limit_low = (limit & 0xFFFF); - gdt[num].granularity = ((limit >> 16) & 0x0F); + desc.limit_low = (limit & 0xFFFF); + desc.granularity = ((limit >> 16) & 0x0F); /* Finally, set up the granularity and access flags */ - gdt[num].granularity |= (gran & 0xF0); - gdt[num].access = access; + desc.granularity |= (gran & 0xF0); + desc.access = access; + + return desc; } /* diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c index fd8156c4..fe3b2bf4 100644 --- a/arch/x86/kernel/idt.c +++ b/arch/x86/kernel/idt.c @@ -48,16 +48,25 @@ static idt_ptr_t idtp; void idt_set_gate(unsigned char num, unsigned long base, unsigned short sel, unsigned char flags) { + idt[num] = configure_idt_entry(base, sel, flags); +} + +idt_entry_t configure_idt_entry(unsigned long base, unsigned short sel, + unsigned char flags) +{ + idt_entry_t desc; /* The interrupt routine's base address */ - idt[num].base_lo = (base & 0xFFFF); - idt[num].base_hi = (base >> 16) & 0xFFFF; + desc.base_lo = (base & 0xFFFF); + desc.base_hi = (base >> 16) & 0xFFFF; /* The segment or 'selector' that this IDT entry will use * is set here, along with any access flags */ - idt[num].sel = sel; - idt[num].always0 = 0; - idt[num].flags = flags; + desc.sel = sel; + desc.always0 = 0; + desc.flags = flags; + + return desc; } extern void isrsyscall(void); From 3fea08710d70e1a401ec734ab5397e16242c3518 Mon Sep 17 00:00:00 2001 From: Stefan Lankes Date: Tue, 19 Apr 2011 20:01:18 +0200 Subject: [PATCH 08/13] use logical operations instead of / and % to increase the performance --- arch/x86/mm/page.c | 16 ++++++++-------- include/metalsvm/config.h.example | 1 + 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/arch/x86/mm/page.c b/arch/x86/mm/page.c index cd3f210b..4e5c4f7c 100644 --- a/arch/x86/mm/page.c +++ b/arch/x86/mm/page.c @@ -640,8 +640,8 @@ int arch_paging_init(void) * Set the page table and page directory entries for the kernel. We map the kernel's physical address * to the same virtual address. */ - npages = ((size_t) &kernel_end - (size_t) &kernel_start) / PAGE_SIZE; - if ((size_t)&kernel_end % PAGE_SIZE) + npages = ((size_t) &kernel_end - (size_t) &kernel_start) >> PAGE_SHIFT; + if ((size_t)&kernel_end & (PAGE_SIZE-1)) npages++; map_region((size_t)&kernel_start, (size_t)&kernel_start, npages, MAP_KERNEL_SPACE); @@ -686,8 +686,8 @@ int arch_paging_init(void) for(i=0; imods_count; i++, mmodule++) { // map physical address to the same virtual address - npages = (mmodule->mod_end - mmodule->mod_start) / PAGE_SIZE; - if (mmodule->mod_end % PAGE_SIZE) + npages = (mmodule->mod_end - mmodule->mod_start) >> PAGE_SHIFT; + if (mmodule->mod_end & (PAGE_SIZE-1)) npages++; map_region((size_t) (mmodule->mod_start), (size_t) (mmodule->mod_start), npages, MAP_KERNEL_SPACE); } @@ -699,17 +699,17 @@ int arch_paging_init(void) map_region(SCC_BOOTINFO, SCC_BOOTINFO, 1, MAP_KERNEL_SPACE); // map the initial ramdisk - npages = bootinfo->size / PAGE_SIZE; - if (bootinfo->size % PAGE_SIZE) + npages = bootinfo->size >> PAGE_SHIFT; + if (bootinfo->size & (PAGE_SIZE-1)) npages++; map_region(bootinfo->addr, bootinfo->addr, npages, MAP_KERNEL_SPACE); // map SCC's configuration registers - viraddr = map_region(CRB_X0_Y0, CRB_X0_Y0, (CRB_OWN-CRB_X0_Y0+16*1024*1024)/PAGE_SIZE, MAP_KERNEL_SPACE|MAP_NO_CACHE); + viraddr = map_region(CRB_X0_Y0, CRB_X0_Y0, (CRB_OWN-CRB_X0_Y0+16*1024*1024) >> PAGE_SHIFT, MAP_KERNEL_SPACE|MAP_NO_CACHE); kprintf("Map configuration registers at 0x%x\n", viraddr); // map SCC's message passing buffers - viraddr = map_region(MPB_X0_Y0, MPB_X0_Y0, (MPB_OWN-MPB_X0_Y0+16*1024*1024)/PAGE_SIZE, MAP_KERNEL_SPACE|MAP_MPE); + viraddr = map_region(MPB_X0_Y0, MPB_X0_Y0, (MPB_OWN-MPB_X0_Y0+16*1024*1024) >> PAGE_SHIFT, MAP_KERNEL_SPACE|MAP_MPE); kprintf("Map message passing buffers at 0x%x\n", viraddr); #endif diff --git a/include/metalsvm/config.h.example b/include/metalsvm/config.h.example index b5c0536e..bb405699 100644 --- a/include/metalsvm/config.h.example +++ b/include/metalsvm/config.h.example @@ -32,6 +32,7 @@ extern "C" { #define KERNEL_STACK_SIZE 8192 #define KMSG_SIZE (128*1024) #define PAGE_SIZE 4096 +#define PAGE_SHIFT 12 #define CACHE_LINE 64 #define MAILBOX_SIZE 8 #define TIMER_FREQ 100 /* in HZ */ From 6e255fe27e21707970abc3e2ab6ba3e6082ac9af Mon Sep 17 00:00:00 2001 From: Stefan Lankes Date: Tue, 19 Apr 2011 20:18:38 +0200 Subject: [PATCH 09/13] add jacobi solver as example program --- newlib/examples/Makefile | 9 +- newlib/examples/jacobi.c | 200 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 208 insertions(+), 1 deletion(-) create mode 100644 newlib/examples/jacobi.c diff --git a/newlib/examples/Makefile b/newlib/examples/Makefile index 38a21cc6..e1a373f9 100644 --- a/newlib/examples/Makefile +++ b/newlib/examples/Makefile @@ -3,6 +3,7 @@ NEWLIB = ../x86/i586-metalsvm-elf32 MAKE = make STRIP_DEBUG = --strip-debug KEEP_DEBUG = --only-keep-debug +LDFLAGS = # other implicit rules %.o : %.c @@ -10,7 +11,13 @@ KEEP_DEBUG = --only-keep-debug default: all -all: hello tests +all: hello tests jacobi + +jacobi: jacobi.o + $(CC_FOR_TARGET) -T link.ld -o $@ $(LDFLAGS) $< -lm + $(OBJCOPY_FOR_TARGET) $(KEEP_DEBUG) $@ $@.sym + $(OBJCOPY_FOR_TARGET) $(STRIP_DEBUG) $@ + chmod a-x $@.sym tests: tests.o $(CC_FOR_TARGET) -T link.ld -o $@ $(LDFLAGS) $< diff --git a/newlib/examples/jacobi.c b/newlib/examples/jacobi.c new file mode 100644 index 00000000..a899d87e --- /dev/null +++ b/newlib/examples/jacobi.c @@ -0,0 +1,200 @@ +/* + * Copyright 2011 Stefan Lankes, Alexander Pilz, Maximilian Marx, Michael Ober, + * Chair for Operating Systems, RWTH Aachen University + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#undef errno +extern int errno; + +#define MATRIX_SIZE 256 +#define MAXVALUE 1337 +#define PAGE_SIZE 4096 +#define CACHE_SIZE (256*1024) +#define ALIGN(x,a) (((x)+(a)-1)&~((a)-1)) + +static int generate_empty_matrix(double*** A , unsigned int N) { + unsigned int iCnt; + int i,j; + + *A = (double**) malloc((N+1)*sizeof(double*)); + + if (*A == NULL) + return -2; /* Error */ + + (*A)[0] = (double*) malloc((N+1)*N*sizeof(double)); + + if (**A == NULL) + return -2; /* Error */ + + for(iCnt=1; iCnt Sum |A[i][j]| with (i != j) + */ + + (*A)[i][i] = sum + 2.0; + (*A)[i][N] += sum + 2.0; + } + + return 0; +} + +int main(int argc, char **argv) +{ + double* temp; + unsigned int i, j, iter_start, iter_end; + unsigned int iterations = 0; + double error, norm, norm_res, max = 0.0; + double** A=0; + double* X; + double* X_old, xi; + double start,stop; + + if (generate_empty_matrix(&A,MATRIX_SIZE) < 0) + { + printf("generate_empty_matrix() failed...\n"); + fflush(stdout); + exit(-1); + + } + + printf("generate_empty_matrix() done...\n"); + fflush(stdout); + + X=(double*) malloc(MATRIX_SIZE*sizeof(double)); + X_old=(double*) malloc(MATRIX_SIZE*sizeof(double)); + if(X == NULL || X_old == NULL) + { + printf("X or X_old is NULL...\n"); + exit(-1); + } + + for(i=0; i 0.01f) + printf("Result is on position %d wrong (%f != 1.0)\n", i, X[i]); + } + printf("maximal error is %f\n", max); + + printf("\nmatrix size: %d x %d\n", MATRIX_SIZE, MATRIX_SIZE); + printf("number of iterations: %d\n", iterations); + //printf("calculation time: %f s\n", stop-start); + + free((void*) X_old); + free((void*) X); + + return 0; +} From ea19b157814ca1afe005ea878ef88b3f2f4cb6cc Mon Sep 17 00:00:00 2001 From: Stefan Lankes Date: Wed, 20 Apr 2011 11:34:52 +0200 Subject: [PATCH 10/13] - add room to the FPU context --- arch/x86/include/asm/tasks_types.h | 67 ++++++++++++++++++++++++++++++ include/metalsvm/tasks_types.h | 7 +++- 2 files changed, 72 insertions(+), 2 deletions(-) create mode 100644 arch/x86/include/asm/tasks_types.h diff --git a/arch/x86/include/asm/tasks_types.h b/arch/x86/include/asm/tasks_types.h new file mode 100644 index 00000000..f0095bf3 --- /dev/null +++ b/arch/x86/include/asm/tasks_types.h @@ -0,0 +1,67 @@ +/* + * Copyright 2011 Stefan Lankes, Chair for Operating Systems, + * RWTH Aachen University + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * This file is part of MetalSVM. + * + */ + +#ifndef __ARCH_TASKS_TYPES__ +#define __ARCH_TASKS_TYPES__ + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct { + long cwd; + long swd; + long twd; + long fip; + long fcs; + long foo; + long fos; + long st_space[20]; + long status; +} i387_fsave_t; + +typedef struct i387_fxsave_struct { + unsigned short cwd; + unsigned short swd; + unsigned short twd; + unsigned short fop; + long fip; + long fcs; + long foo; + long fos; + long mxcsr; + long reserved; + long st_space[32]; + long xmm_space[32]; + long padding[56]; +} i387_fxsave_t __attribute__ ((aligned (16))); + +union fpu_union { + i387_fsave_t fsave; + i387_fxsave_t fxsave; +}; + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/include/metalsvm/tasks_types.h b/include/metalsvm/tasks_types.h index 73ca13d6..29fa5337 100644 --- a/include/metalsvm/tasks_types.h +++ b/include/metalsvm/tasks_types.h @@ -33,6 +33,7 @@ #include #include #include +#include #include #ifdef __cplusplus @@ -57,9 +58,9 @@ typedef struct task { uint32_t status; /// Usage in number of pages atomic_int32_t user_usage; - /// Avoids concurrent access to the page directory + /// Avoids concurrent access to the page directory spinlock_t pgd_lock; - /// pointer to the page directory + /// pointer to the page directory struct page_dir* pgd; /// Lock for the VMA_list spinlock_t vma_lock; @@ -69,6 +70,8 @@ typedef struct task { mailbox_wait_msg_t inbox; /// Mail outbox array mailbox_wait_msg_t* outbox[MAX_TASKS]; + /// FPU state + union fpu_union fpu_state; } __attribute__((packed)) task_t; #ifdef __cplusplus From 7e0179f5f7cd319cfbc3a1604e053589d1124441 Mon Sep 17 00:00:00 2001 From: Stefan Lankes Date: Wed, 20 Apr 2011 15:16:22 +0200 Subject: [PATCH 11/13] first version to support the FPU - seems to work - currently, we didn't support SIMD instructions --- arch/x86/include/asm/tasks_types.h | 6 +++++- arch/x86/kernel/entry.asm | 13 ++++++++----- arch/x86/kernel/isrs.c | 20 +++++++++++++++++++- include/metalsvm/tasks_types.h | 4 +++- kernel/tasks.c | 9 +++++++-- 5 files changed, 42 insertions(+), 10 deletions(-) diff --git a/arch/x86/include/asm/tasks_types.h b/arch/x86/include/asm/tasks_types.h index f0095bf3..138fbd1c 100644 --- a/arch/x86/include/asm/tasks_types.h +++ b/arch/x86/include/asm/tasks_types.h @@ -55,11 +55,15 @@ typedef struct i387_fxsave_struct { long padding[56]; } i387_fxsave_t __attribute__ ((aligned (16))); -union fpu_union { +union fpu_state { i387_fsave_t fsave; i387_fxsave_t fxsave; }; +static inline void save_fpu_state(union fpu_state* state) { + asm volatile ("fsave %0; fwait" : "=m"((*state).fsave)); +} + #ifdef __cplusplus } #endif diff --git a/arch/x86/kernel/entry.asm b/arch/x86/kernel/entry.asm index 1a325dba..ca3afaa1 100644 --- a/arch/x86/kernel/entry.asm +++ b/arch/x86/kernel/entry.asm @@ -64,16 +64,19 @@ ALIGN 4 stublet: ; initialize stack pointer. mov esp, default_stack_pointer -; enable cache and turn on FPU exceptions mov eax, cr0 - ; enable cache - and eax, 0x9fffffff - ; ...and turn on FPU exceptions - or eax, 0x20 +; enable cache, disable paging and fpu emulation + and eax, 0x3ffffffb +; ...monitor coprocessor and turn on FPU exceptions + or eax, 0x22 mov cr0, eax ; clears the current pgd entry xor eax, eax mov cr3, eax +; disable SSE support (TODO) + mov eax, cr4 + and eax, 0xfffbf9ff + mov cr4, eax ; interpret multiboot information extern multiboot_init push ebx diff --git a/arch/x86/kernel/isrs.c b/arch/x86/kernel/isrs.c index 63075d00..6df9526e 100644 --- a/arch/x86/kernel/isrs.c +++ b/arch/x86/kernel/isrs.c @@ -74,6 +74,7 @@ extern void isr30(void); extern void isr31(void); static void fault_handler(struct state *s); +static void fpu_handler(struct state *s); /* * This is a very repetitive function... it's not hard, it's @@ -158,6 +159,23 @@ void isrs_install(void) // install the default handler for(i=0; i<32; i++) irq_install_handler(i, fault_handler); + + // set hanlder for fpu exceptions + irq_uninstall_handler(7); + irq_install_handler(7, fpu_handler); +} + +static void fpu_handler(struct state *s) +{ + task_t* task = per_core(current_task); + + kputs("got FPU exception\n"); + asm volatile ("clts"); // clear the TS flag of cr0 + if (!task->fpu_used) { + task->fpu_used = 1; + asm volatile ("finit"); + } else + asm volatile ("frstor %0" :: "m"(task->fpu.fsave)); // restore fpu state } /** @brief Exception messages @@ -189,7 +207,7 @@ static void fault_handler(struct state *s) { if (s->int_no < 32) { kputs(exception_messages[s->int_no]); - kputs(" Exception.\n"); + kprintf(" Exception. (%d)\n", s->int_no); /* Now, we signalize that we have handled the interrupt */ if (apic_is_enabled()) diff --git a/include/metalsvm/tasks_types.h b/include/metalsvm/tasks_types.h index 29fa5337..c46ec4b4 100644 --- a/include/metalsvm/tasks_types.h +++ b/include/metalsvm/tasks_types.h @@ -66,12 +66,14 @@ typedef struct task { spinlock_t vma_lock; /// List of VMAs vma_t* vma_list; + /// Is set, when the FPU is used + uint32_t fpu_used; /// Mail inbox mailbox_wait_msg_t inbox; /// Mail outbox array mailbox_wait_msg_t* outbox[MAX_TASKS]; /// FPU state - union fpu_union fpu_state; + union fpu_state fpu; } __attribute__((packed)) task_t; #ifdef __cplusplus diff --git a/kernel/tasks.c b/kernel/tasks.c index 6be430ce..3ffb49b9 100644 --- a/kernel/tasks.c +++ b/kernel/tasks.c @@ -48,7 +48,7 @@ DEFINE_PER_CORE(task_t*, current_task, NULL); * A task's id will be its position in this array. */ static task_t task_table[MAX_TASKS] = {[0 ... MAX_TASKS-1] = {0, TASK_INVALID, ATOMIC_INIT(0), \ - SPINLOCK_INIT, NULL, SPINLOCK_INIT, NULL}}; + SPINLOCK_INIT, NULL, SPINLOCK_INIT, NULL, 0}}; static spinlock_irqsave_t table_lock = SPINLOCK_IRQSAVE_INIT; /** @brief helper function for the assembly code to determine the current task @@ -67,6 +67,7 @@ int multitasking_init(void) { memset(task_table[0].outbox, 0x00, sizeof(mailbox_wait_msg_t*)*MAX_TASKS); per_core(current_task) = task_table+0; per_core(current_task)->pgd = get_boot_pgd(); + task_table[0].fpu_used = 0; return 0; } @@ -189,6 +190,7 @@ static int create_task(tid_t* id, entry_point_t ep, void* arg) ret = create_default_frame(task_table+i, ep, arg); + task_table[i].fpu_used = 0; task_table[i].status = TASK_READY; break; } @@ -250,6 +252,7 @@ int sys_fork(void) mailbox_wait_msg_init(&task_table[i].inbox); memset(task_table[i].outbox, 0x00, sizeof(mailbox_wait_msg_t*)*MAX_TASKS); task_table[i].outbox[per_core(current_task)->id] = &per_core(current_task)->inbox; + task_table[i].fpu_used = 0x00; ret = arch_fork(task_table+i); @@ -707,7 +710,9 @@ void scheduler(void) if (per_core(current_task)->status == TASK_RUNNING) per_core(current_task)->status = TASK_READY; task_table[new_id].status = TASK_RUNNING; - + + if (per_core(current_task)->fpu_used) + save_fpu_state(&(per_core(current_task)->fpu)); per_core(current_task) = task_table+new_id; goto get_task_out; } From 8a515c9925b11d64ac466ee968f8343ff8c99be8 Mon Sep 17 00:00:00 2001 From: Stefan Lankes Date: Wed, 20 Apr 2011 20:41:51 +0200 Subject: [PATCH 12/13] remove bug in the calulation of the break condition --- newlib/examples/jacobi.c | 44 ++++++++++++++++++---------------------- 1 file changed, 20 insertions(+), 24 deletions(-) diff --git a/newlib/examples/jacobi.c b/newlib/examples/jacobi.c index a899d87e..6c0e7a20 100644 --- a/newlib/examples/jacobi.c +++ b/newlib/examples/jacobi.c @@ -1,6 +1,6 @@ /* - * Copyright 2011 Stefan Lankes, Alexander Pilz, Maximilian Marx, Michael Ober, - * Chair for Operating Systems, RWTH Aachen University + * Copyright 2010-2011 Stefan Lankes + * Chair for Operating Systems, RWTH Aachen University * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -26,10 +26,10 @@ #undef errno extern int errno; -#define MATRIX_SIZE 256 -#define MAXVALUE 1337 -#define PAGE_SIZE 4096 -#define CACHE_SIZE (256*1024) +#define MATRIX_SIZE 128 +#define MAXVALUE 1337 +#define PAGE_SIZE 4096 +#define CACHE_SIZE (256*1024) #define ALIGN(x,a) (((x)+(a)-1)&~((a)-1)) static int generate_empty_matrix(double*** A , unsigned int N) { @@ -89,28 +89,26 @@ static int generate_empty_matrix(double*** A , unsigned int N) { int main(int argc, char **argv) { - double* temp; - unsigned int i, j, iter_start, iter_end; - unsigned int iterations = 0; - double error, norm, norm_res, max = 0.0; - double** A=0; - double* X; - double* X_old, xi; - double start,stop; + double* temp; + unsigned int i, j, iter_start, iter_end; + unsigned int iterations = 0; + double error, norm, max = 0.0; + double** A=0; + double* X; + double* X_old, xi; + double start,stop; if (generate_empty_matrix(&A,MATRIX_SIZE) < 0) { printf("generate_empty_matrix() failed...\n"); - fflush(stdout); exit(-1); } printf("generate_empty_matrix() done...\n"); - fflush(stdout); - X=(double*) malloc(MATRIX_SIZE*sizeof(double)); - X_old=(double*) malloc(MATRIX_SIZE*sizeof(double)); + X = (double*) malloc(MATRIX_SIZE*sizeof(double)); + X_old = (double*) malloc(MATRIX_SIZE*sizeof(double)); if(X == NULL || X_old == NULL) { printf("X or X_old is NULL...\n"); @@ -124,7 +122,6 @@ int main(int argc, char **argv) } printf("start calculation...\n"); - fflush(stdout); iter_start = 0; iter_end = MATRIX_SIZE; @@ -142,7 +139,7 @@ int main(int argc, char **argv) for (i=iter_start; i Date: Wed, 20 Apr 2011 21:23:22 +0200 Subject: [PATCH 13/13] add basic support of sbrk and fix bug in saving of the fpu context --- arch/x86/kernel/isrs.c | 11 ++++++--- arch/x86/mm/page.c | 23 ++++++++++++++++- include/metalsvm/tasks_types.h | 12 +++++++-- kernel/syscall.c | 33 +++++++++++++++++++++++++ kernel/tasks.c | 38 ++++++++++++++++++++--------- kernel/tests.c | 1 + newlib/src/libgloss/metalsvm/sbrk.c | 29 ++++++++++------------ 7 files changed, 113 insertions(+), 34 deletions(-) diff --git a/arch/x86/kernel/isrs.c b/arch/x86/kernel/isrs.c index 6df9526e..cfb45389 100644 --- a/arch/x86/kernel/isrs.c +++ b/arch/x86/kernel/isrs.c @@ -169,13 +169,16 @@ static void fpu_handler(struct state *s) { task_t* task = per_core(current_task); - kputs("got FPU exception\n"); asm volatile ("clts"); // clear the TS flag of cr0 - if (!task->fpu_used) { - task->fpu_used = 1; + if (!(task->flags & TASK_FPU_INIT)) { + // use the FPU at the first time => Initialize FPU asm volatile ("finit"); - } else + task->flags = task->flags|TASK_FPU_INIT|TASK_FPU_USED; + } else { + // restore the FPU context asm volatile ("frstor %0" :: "m"(task->fpu.fsave)); // restore fpu state + task->flags |= TASK_FPU_USED; + } } /** @brief Exception messages diff --git a/arch/x86/mm/page.c b/arch/x86/mm/page.c index 4e5c4f7c..75f34015 100644 --- a/arch/x86/mm/page.c +++ b/arch/x86/mm/page.c @@ -588,7 +588,28 @@ int print_paging_tree(size_t viraddr) static void pagefault_handler(struct state *s) { - kprintf("PAGE FAULT: Task %u got page fault at %p (irq %d)\n", per_core(current_task)->id, read_cr2(), s->int_no); + task_t* task = per_core(current_task); + size_t viraddr = read_cr2(); + size_t phyaddr; + + if ((viraddr >= task->start_heap) && (viraddr <= task->end_heap) && (viraddr > KERNEL_SPACE)) { + viraddr = viraddr & 0xFFFFF000; + + phyaddr = get_page(); + if (BUILTIN_EXPECT(!phyaddr, 0)) + goto default_handler; + + if (map_region(viraddr, phyaddr, 1, MAP_USER_SPACE|MAP_HEAP) == viraddr) { + memset((void*) viraddr, 0x00, PAGE_SIZE); + return; + } + + kprintf("Could not map 0x%x at 0x%x\n", phyaddr, viraddr); + put_page(phyaddr); + } + +default_handler: + kprintf("PAGE FAULT: Task %u got page fault at %p (irq %d)\n", task->id, viraddr, s->int_no); kprintf("Register state: eax = 0x%x, ebx = 0x%x, ecx = 0x%x, edx = 0x%x, edi = 0x%x, esi = 0x%x, ebp = 0x%x, esp = 0x%x\n", s->eax, s->ebx, s->ecx, s->edx, s->edi, s->esi, s->ebp, s->esp); diff --git a/include/metalsvm/tasks_types.h b/include/metalsvm/tasks_types.h index c46ec4b4..58f65cf2 100644 --- a/include/metalsvm/tasks_types.h +++ b/include/metalsvm/tasks_types.h @@ -47,6 +47,10 @@ extern "C" { #define TASK_FINISHED 4 #define TASK_IDLE 5 +#define TASK_DEFAULT_FLAGS 0 +#define TASK_FPU_INIT (1 << 0) +#define TASK_FPU_USED (1 << 1) + typedef int (STDCALL *entry_point_t)(void*); struct page_dir; @@ -66,8 +70,12 @@ typedef struct task { spinlock_t vma_lock; /// List of VMAs vma_t* vma_list; - /// Is set, when the FPU is used - uint32_t fpu_used; + /// Additional status flags. For instance, to signalize the using of the FPU + uint32_t flags; + /// Start address of the heap + uint32_t start_heap; + /// End address of the heap + uint32_t end_heap; /// Mail inbox mailbox_wait_msg_t inbox; /// Mail outbox array diff --git a/kernel/syscall.c b/kernel/syscall.c index 76ce8e10..95a39fc6 100644 --- a/kernel/syscall.c +++ b/kernel/syscall.c @@ -22,6 +22,7 @@ #include #include #include +#include static int sys_write(int fildes, const char *buf, size_t len) { @@ -37,6 +38,32 @@ static int sys_write(int fildes, const char *buf, size_t len) return len; } +static int sys_sbrk(int incr) +{ + task_t* task = per_core(current_task); + vma_t* tmp = NULL; + int ret; + + spinlock_lock(&task->vma_lock); + + tmp = task->vma_list; + while(tmp && !((task->end_heap >= tmp->start) && (task->end_heap <= tmp->end))) + tmp = tmp->next; + + ret = (int) task->end_heap; + task->end_heap += incr; + if (task->end_heap < task->start_heap) + task->end_heap = task->start_heap; + + // resize virtual memory area + if (tmp && (tmp->end <= task->end_heap)) + tmp->end = task->end_heap; + + spinlock_unlock(&task->vma_lock); + + return ret; +} + int syscall_handler(uint32_t sys_nr, ...) { int ret = -EINVAL; @@ -64,6 +91,12 @@ int syscall_handler(uint32_t sys_nr, ...) case __NR_close: ret = 0; break; + case __NR_sbrk: { + int incr = va_arg(vl, int); + + ret = sys_sbrk(incr); + break; + } case __NR_getpid: ret = per_core(current_task)->id; break; diff --git a/kernel/tasks.c b/kernel/tasks.c index 3ffb49b9..ae699687 100644 --- a/kernel/tasks.c +++ b/kernel/tasks.c @@ -48,7 +48,7 @@ DEFINE_PER_CORE(task_t*, current_task, NULL); * A task's id will be its position in this array. */ static task_t task_table[MAX_TASKS] = {[0 ... MAX_TASKS-1] = {0, TASK_INVALID, ATOMIC_INIT(0), \ - SPINLOCK_INIT, NULL, SPINLOCK_INIT, NULL, 0}}; + SPINLOCK_INIT, NULL, SPINLOCK_INIT, NULL, 0, 0, 0}}; static spinlock_irqsave_t table_lock = SPINLOCK_IRQSAVE_INIT; /** @brief helper function for the assembly code to determine the current task @@ -67,7 +67,7 @@ int multitasking_init(void) { memset(task_table[0].outbox, 0x00, sizeof(mailbox_wait_msg_t*)*MAX_TASKS); per_core(current_task) = task_table+0; per_core(current_task)->pgd = get_boot_pgd(); - task_table[0].fpu_used = 0; + task_table[0].flags = TASK_DEFAULT_FLAGS; return 0; } @@ -190,7 +190,9 @@ static int create_task(tid_t* id, entry_point_t ep, void* arg) ret = create_default_frame(task_table+i, ep, arg); - task_table[i].fpu_used = 0; + task_table[i].flags = TASK_DEFAULT_FLAGS; + task_table[i].start_heap = 0; + task_table[i].end_heap = 0; task_table[i].status = TASK_READY; break; } @@ -252,7 +254,10 @@ int sys_fork(void) mailbox_wait_msg_init(&task_table[i].inbox); memset(task_table[i].outbox, 0x00, sizeof(mailbox_wait_msg_t*)*MAX_TASKS); task_table[i].outbox[per_core(current_task)->id] = &per_core(current_task)->inbox; - task_table[i].fpu_used = 0x00; + task_table[i].flags = per_core(current_task)->flags; + memcpy(&(task_table[i].fpu), &(per_core(current_task)->fpu), sizeof(union fpu_state)); + task_table[i].start_heap = 0; + task_table[i].end_heap = 0; ret = arch_fork(task_table+i); @@ -352,8 +357,8 @@ static int load_task(load_args_t* largs) if (!prog_header.virt_addr) continue; - npages = (prog_header.mem_size / PAGE_SIZE); - if (prog_header.mem_size % PAGE_SIZE) + npages = (prog_header.mem_size >> PAGE_SHIFT); + if (prog_header.mem_size & (PAGE_SIZE-1)) npages++; addr = get_pages(npages); @@ -369,6 +374,10 @@ static int load_task(load_args_t* largs) // clear pages memset((void*) prog_header.virt_addr, 0, npages*PAGE_SIZE); + // set starting point of the heap + if (per_core(current_task)->start_heap < prog_header.virt_addr+prog_header.mem_size) + per_core(current_task)->start_heap = per_core(current_task)->end_heap = prog_header.virt_addr+prog_header.mem_size; + // load program read_fs(node, (uint8_t*)prog_header.virt_addr, prog_header.file_size, prog_header.offset); @@ -387,8 +396,8 @@ static int load_task(load_args_t* largs) case ELF_PT_GNU_STACK: // Indicates stack executability // create user-level stack - npages = DEFAULT_STACK_SIZE / PAGE_SIZE; - if (DEFAULT_STACK_SIZE % PAGE_SIZE) + npages = DEFAULT_STACK_SIZE >> PAGE_SHIFT; + if (DEFAULT_STACK_SIZE & (PAGE_SIZE-1)) npages++; addr = get_pages(npages); @@ -475,6 +484,9 @@ static int load_task(load_args_t* largs) kfree(largs, sizeof(load_args_t)); + // clear fpu state + per_core(current_task)->flags &= ~(TASK_FPU_USED|TASK_FPU_INIT); + jump_to_user_code(header.entry, stack+offset); return 0; @@ -703,6 +715,12 @@ void scheduler(void) if (per_core(current_task)->status == TASK_FINISHED) per_core(current_task)->status = TASK_INVALID; + /* if the task is using the FPU, we need to save the FPU context */ + if (per_core(current_task)->flags & TASK_FPU_USED) { + save_fpu_state(&(per_core(current_task)->fpu)); + per_core(current_task)->flags &= ~TASK_FPU_USED; + } + for(i=1, new_id=(per_core(current_task)->id + 1) % MAX_TASKS; istatus == TASK_RUNNING) per_core(current_task)->status = TASK_READY; task_table[new_id].status = TASK_RUNNING; - - if (per_core(current_task)->fpu_used) - save_fpu_state(&(per_core(current_task)->fpu)); per_core(current_task) = task_table+new_id; + goto get_task_out; } } diff --git a/kernel/tests.c b/kernel/tests.c index fe26ffc6..08f3d45f 100644 --- a/kernel/tests.c +++ b/kernel/tests.c @@ -114,6 +114,7 @@ int test_init(void) //create_kernel_task(NULL, consumer, NULL); //create_user_task(NULL, "/bin/hello", argv); create_user_task(NULL, "/bin/tests", argv); + //create_user_task(NULL, "/bin/jacobi", argv); return 0; } diff --git a/newlib/src/libgloss/metalsvm/sbrk.c b/newlib/src/libgloss/metalsvm/sbrk.c index 356c5ee7..78002469 100644 --- a/newlib/src/libgloss/metalsvm/sbrk.c +++ b/newlib/src/libgloss/metalsvm/sbrk.c @@ -22,23 +22,20 @@ #include #undef errno extern int errno; +#include "warning.h" +#include "syscall.h" -#ifndef NULL -#define NULL ((void*) 0) -#endif +void* +_DEFUN (sbrk, (incr), + int incr) +{ + int ret; -void * -sbrk (incr) - int incr; -{ - extern char _end; // set by linker - static char *heap_end = NULL; - char *prev_heap_end; + ret = SYSCALL1(__NR_sbrk, incr); + if (ret < 0x1000) { + errno = -ret; + ret = -1; + } - if (!heap_end) - heap_end = &_end; - prev_heap_end = heap_end; - heap_end += incr; - - return (void *) prev_heap_end; + return (void*) ret; }