/* 
 * Written by the Chair for Operating Systems, RWTH Aachen University
 * 
 * NO Copyright (C) 2010-2011, Stefan Lankes
 * consider these trivial functions to be public domain.
 * 
 * These functions are distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 */

/**
 * @author Stefan Lankes
 * @file arch/x86/include/asm/string.h
 * @brief Functions related to memcpy and strings.
 *
 * This file deals with memcpy, memset, string functions and everything related to
 * continuous byte fields.
 */

#ifndef __ARCH_STRING_H__
#define __ARCH_STRING_H__

#include <metalsvm/stddef.h>

#ifdef __cplusplus
extern "C" {
#endif

/** @brief Copy a physical page to another physical destination
 *
 * @param dest Destination address
 * @param src Source address
 */
void copy_page_physical(void* dest, const void * src);

#ifdef HAVE_ARCH_MEMCPY

#ifdef CONFIG_ROCKCREEK
/** @brief Fast procedure to get a byte range from RAM into on-die memory.
 *
 * A write access, which cache line is not present, doesn't perform (on the
 * current SCC architecture) a cache line fill. Therefore, the core writes 
 * in this case directly to the memory. 
 *
 * The following function copies by prefetching its destintation. Therefore, 
 * the  function avoids the bad behavior of a "write miss".
 *
 * @param dest Destination address
 * @param src Source address
 * @param count Range size in bytes
 */ 
inline static void *memcpy(void *dest, const void *src, size_t count)
{
	int32_t h, i, j, k, l, m;

	asm volatile   ("cld;\n\t"
			"1: cmpl $0, %%eax ; je 3f\n\t"
			"movl (%%esi), %%ecx\n\t"
			"movl (%%edi), %%edx\n\t"
			"cmpl $1, %%eax ; je 2f\n\t"
			"movl 32(%%esi), %%ecx\n\t"
			"movl 32(%%edi), %%edx\n\t"
			"2: movl 0(%%esi), %%ecx\n\t"
			"movl 4(%%esi), %%edx\n\t"
			"movl %%ecx, 0(%%edi)\n\t"
			"movl %%edx, 4(%%edi)\n\t"
			"movl 8(%%esi), %%ecx\n\t"
			"movl 12(%%esi), %%edx\n\t"
			"movl %%ecx, 8(%%edi)\n\t"
			"movl %%edx, 12(%%edi)\n\t"
			"movl 16(%%esi), %%ecx\n\t"
			"movl 20(%%esi), %%edx\n\t"
			"movl %%ecx, 16(%%edi)\n\t"
			"movl %%edx, 20(%%edi)\n\t"
			"movl 24(%%esi), %%ecx\n\t"
			"movl 28(%%esi), %%edx\n\t"
			"movl %%ecx, 24(%%edi)\n\t"
			"movl %%edx, 28(%%edi)\n\t"
			"addl $32, %%esi\n\t"
			"addl $32, %%edi\n\t"
			"dec %%eax ; jmp 1b\n\t"
			"3: movl %%ebx, %%ecx\n\t"
			"movl (%%edi), %%edx\n\t"
			"andl $31, %%ecx\n\t"
			"rep ; movsb\n\t":"=&a" (h), "=&D"(i), "=&S"(j), "=&b"(k), "=&c"(l), "=&d"(m)
			: "0"(count / 32), "1"(dest), "2"(src), "3"(count) : "memory","cc");

        return dest;
}
#else
/** @brief Copy a byte range from source to dest
 *
 * @param dest Destination address
 * @param src Source address
 * @param count Range of the byte field in bytes
 */
inline static void *memcpy(void* dest, const void *src, size_t count)
{
	int32_t i, j, k;

	if (BUILTIN_EXPECT(!dest || !src, 0))
		return dest;

	asm volatile (
		"cld; rep movsl\n\t"
		"movl %4, %%ecx\n\t" 
		"andl $3, %%ecx\n\t"
		"rep movsb\n\t" 
		: "=&c"(i), "=&D"(j), "=&S"(k) 
		: "0"(count/4), "g"(count), "1"(dest), "2"(src) : "memory","cc");

	return dest;
}
#endif

#endif

#ifdef HAVE_ARCH_MEMSET

#ifdef CONFIG_ROCKCREEK
/** @brief Repeated write of a value to a whole range of bytes
 *
 * SCC optimized version of memset (see memcpy)
 *
 * @param dest Destination address
 * @param val Value to flood the range with
 * @param count Size of target range in bytes
 */
inline static void *memset(void* dest, int val, size_t count)
{
	int32_t i, j;

	if (BUILTIN_EXPECT(!dest, 0))
		return dest;

	asm volatile ("cld\n\t"
		"1: cmpl $32, %%ebx ; jb 2f\n\t"
		"movl (%%edi), %%edx\n\t"
		"movl $32, %%ecx\n\t"
		"rep stosb\n\t"
		"subl $32, %%ebx\n\t"
		"jmp 1b\n\t"
		"2: movl %%ebx, %%ecx ; rep stosb"
		: "=&b"(i), "=&D"(j)
		: "a"(val), "1"(dest), "0"(count) :  "%edx", "%ecx", "memory","cc");

	return dest;
}
#else
/** @brief Repeated write of a value to a whole range of bytes
 *
 * @param dest Destination address
 * @param val Value to flood the range with
 * @param count Size of target range in bytes
 */
inline static void *memset(void* dest, int val, size_t count)
{
	int32_t i, j;

	if (BUILTIN_EXPECT(!dest, 0))
		return dest;

	asm volatile ("cld; rep stosb" 
		: "=&c"(i), "=&D"(j)
		: "a"(val), "1"(dest), "0"(count) : "memory","cc");

	return dest;
}
#endif

#endif

#ifdef HAVE_ARCH_STRLEN

/** @brief Standard string length
 *
 * This function computed the length of the given null terminated string
 * just like the strlen functions you are used to.
 *
 * @return 
 * - The length of the string
 * - 0 if str is a NULL pointer
 */
inline static size_t strlen(const char* str)
{
	size_t len = 0;
	uint32_t i, j;

	if (BUILTIN_EXPECT(!str, 0))
		return len;

	asm volatile("not %%ecx; cld; repne scasb; not %%ecx; dec %%ecx"
		: "=&c"(len), "=&D"(i), "=&a"(j)
		: "2"(0), "1"(str), "0"(len)
		: "memory","cc");

	return len;
}
#endif

#ifdef HAVE_ARCH_STRNCPY

/** @brief Copy string with maximum of n byte length
 *
 * @param dest Destination string pointer
 * @param src Source string pointer
 * @param n maximum number of bytes to copy
 */
char* strncpy(char* dest, const char* src, size_t n);
#endif

#ifdef HAVE_ARCH_STRCPY

/** @brief Copy string
 *
 * Note that there is another safer variant of this function: strncpy.\n
 * That one could save you from accidents with buffer overruns.
 *
 * @param dest Destination string pointer
 * @param src Source string pointer
 */
char* strcpy(char* dest, const char* src);
#endif

#ifdef __cplusplus
}
#endif

#endif