use optimized memcpy function to increase the performance

This commit is contained in:
Stefan Lankes 2011-09-06 08:58:03 -07:00
parent a79f47d365
commit 24ea129456

View file

@ -108,15 +108,67 @@
#define MAC_HI(_x) ((((_x) >> 32)) & 0xFFFF)
#define MAC_LO(_x) (((_x) ) & 0xFFFFFFFF)
#define MIN(a, b) (a) < (b) ? (a) : (b)
static struct netif* mynetif;
#if 0
inline static void *memcpy_noprefetching(void* dest, const void *src, size_t count)
inline static void* memcpy_from_nc(void *dest, const void *src, size_t count)
{
int32_t i, j, k;
#if 0
size_t i;
if (BUILTIN_EXPECT(!dest || !src, 0))
return dest;
for(i=0; i<count; i++)
((uint8_t*) dest)[i] = ((uint8_t*) src)[i];
return dest;
#else
int32_t h, i, j, k, l, m;
asm volatile ("cld;\n\t"
"1: cmpl $0, %%eax ; je 3f\n\t"
"movl (%%edi), %%edx\n\t"
"cmpl $1, %%eax ; je 2f\n\t"
"movl 32(%%edi), %%edx\n\t"
"2: movl 0(%%esi), %%ecx\n\t"
"movl 4(%%esi), %%edx\n\t"
"movl %%ecx, 0(%%edi)\n\t"
"movl %%edx, 4(%%edi)\n\t"
"movl 8(%%esi), %%ecx\n\t"
"movl 12(%%esi), %%edx\n\t"
"movl %%ecx, 8(%%edi)\n\t"
"movl %%edx, 12(%%edi)\n\t"
"movl 16(%%esi), %%ecx\n\t"
"movl 20(%%esi), %%edx\n\t"
"movl %%ecx, 16(%%edi)\n\t"
"movl %%edx, 20(%%edi)\n\t"
"movl 24(%%esi), %%ecx\n\t"
"movl 28(%%esi), %%edx\n\t"
"movl %%ecx, 24(%%edi)\n\t"
"movl %%edx, 28(%%edi)\n\t"
"addl $32, %%esi\n\t"
"addl $32, %%edi\n\t"
"dec %%eax ; jmp 1b\n\t"
"3: movl %%ebx, %%ecx\n\t"
"movl (%%edi), %%edx\n\t"
"andl $31, %%ecx\n\t"
"rep ; movsb\n\t" : "=&a"(h), "=&D"(i), "=&S"(j), "=&b"(k), "=&c"(l), "=&d"(m)
: "0"(count / 32), "1"(dest), "2"(src), "3"(count) : "memory","cc");
return dest;
#endif
}
inline static void* memcpy_to_nc(void* dest, const void *src, size_t count)
{
#if 0
size_t i;
for(i=0; i<count; i++)
((uint8_t*) dest)[i] = ((uint8_t*) src)[i];
return dest;
#else
int32_t i, j, k;
asm volatile (
"cld; rep movsl\n\t"
@ -127,8 +179,8 @@ inline static void *memcpy_noprefetching(void* dest, const void *src, size_t cou
: "0"(count/4), "g"(count), "1"(dest), "2"(src) : "memory","cc");
return dest;
}
#endif
}
static int read_emac(int num_emac, int offset, int core)
{
@ -207,7 +259,7 @@ again:
* This list MUST consist of a single packet ONLY
*/
for (q=p, i=0; q!=0; q=q->next) {
memcpy(((uint8_t*)addr) + 2 + i, q->payload, q->len);
memcpy_to_nc(((uint8_t*)addr) + 2 + i, q->payload, q->len);
i += q->len;
}
@ -224,12 +276,12 @@ again:
if (bytes_left < bytes_to_copy)
bytes_to_copy = bytes_left;
LWIP_DEBUGF(NETIF_DEBUG, ("special case: copy last %d bytes\n", bytes_to_copy));
//LWIP_DEBUGF(NETIF_DEBUG, ("special case: copy last %d bytes\n", bytes_to_copy));
q = p; i = 0;
while ((q != 0) && (i < bytes_to_copy)) {
sz = q->len > bytes_to_copy-i ? bytes_to_copy-i : q->len;
memcpy(((uint8_t*) addr) + 2 + i, q->payload, sz);
memcpy_to_nc(((uint8_t*) addr) + 2 + i, q->payload, sz);
bytes_left -= sz;
i += sz;
if (i < bytes_to_copy)
@ -239,16 +291,16 @@ again:
if (bytes_left != 0) {
rckemacif->tx_write_offset = 1;
addr = rckemacif->tx_buffer + 32;
LWIP_DEBUGF(NETIF_DEBUG, ("special case: copy remaining %d bytes\n", bytes_left));
//LWIP_DEBUGF(NETIF_DEBUG, ("special case: copy remaining %d bytes\n", bytes_left));
i = 0;
if (sz < q->len) {
memcpy((uint8_t*) addr, q->payload + sz, q->len - sz);
memcpy_to_nc((uint8_t*) addr, q->payload + sz, q->len - sz);
bytes_left -= (q->len - sz);
i = q->len - sz;
}
for(q=q->next; (q != 0); q = q->next) {
memcpy(((uint8_t*) addr) + i, q->payload, q->len);
memcpy_to_nc(((uint8_t*) addr) + i, q->payload, q->len);
i += q->len;
}
@ -256,7 +308,7 @@ again:
if (rest != 0)
rest = 32 - rest;
LWIP_DEBUGF(NETIF_DEBUG, ("Rest is %d\n", rest));
//LWIP_DEBUGF(NETIF_DEBUG, ("Rest is %d\n", rest));
rckemacif->tx_write_offset += CLINE_PACKETS(bytes_left + rest) - 1;
}
}
@ -312,7 +364,7 @@ static void rckemacif_rx_handler(struct netif* netif, unsigned int write_offset)
{
rckemacif_t* rckemacif = netif->state;
unsigned short read_offset = rckemacif->rx_read_offset;
unsigned int counter;
//unsigned int counter;
volatile void *addr = NULL;
uint16_t i, length = 0;
uint32_t packets = 0;
@ -373,7 +425,7 @@ again:
if (read_offset < write_offset) {
for (q=p, i/*counter=0*/; q!=NULL; q=q->next) {
memcpy((uint8_t*) q->payload, (uint8_t*)addr + 2, q->len);
memcpy_from_nc((uint8_t*) q->payload, (uint8_t*)addr + 2, q->len);
//for(i=0; i<q->len; i++, counter++) {
// ((uint8_t*) q->payload)[i] = ((uint8_t*)addr)[2 + counter];
//}
@ -384,33 +436,52 @@ again:
int rest;
int bytesLeft = length;
int bytesToCopy = length;
int counter = 0;
/* rest to the end of buffer - 2 bytes length information */
rest = (rckemacif->rx_buffer_max - read_offset + 1) * 32 - 2;
if (length > rest)
bytesToCopy = rest;
LWIP_DEBUGF(NETIF_DEBUG, ("bytes to copy: %d, bytesLeft: %d\n", bytesToCopy, bytesLeft));
//LWIP_DEBUGF(NETIF_DEBUG, ("bytes to copy: %d, bytesLeft: %d\n", bytesToCopy, bytesLeft));
for (q=p, counter=0; q!=NULL; q=q->next) {
for(i=0; i<q->len; i++, counter++) {
q = p;
i = /*counter =*/ 0;
while ((q != NULL) && (counter < bytesToCopy)) {
i = MIN(q->len, bytesToCopy - counter);
memcpy_from_nc(q->payload, (uint8_t*) addr + 2 + counter, i);
counter += i;
if (counter >= bytesToCopy)
goto out;
else
q = q->next;
/*for(i=0; i<q->len; i++, counter++) {
if (counter < bytesToCopy)
((uint8_t*) q->payload)[i] = ((uint8_t*)addr)[2 + counter];
else
goto out;
}
}*/
}
out:
bytesLeft -= bytesToCopy;
if (bytesLeft != 0) {
addr = rckemacif->rx_buffer + 0x20;
LWIP_DEBUGF(NETIF_DEBUG, ("copying from %p, left: %d (%x)\n", addr, bytesLeft, ((uint8_t*)addr)[0]));
for(counter=0; (i<q->len) && (counter < bytesLeft); i++, counter++)
((uint8_t*) q->payload)[i] = ((uint8_t*)addr)[counter];
counter = 0;
//LWIP_DEBUGF(NETIF_DEBUG, ("copying from %p, left: %d (%x)\n", addr, bytesLeft, ((uint8_t*)addr)[0]));
if (i < q->len) {
counter = MIN(q->len - i, bytesLeft);
memcpy_from_nc((uint8_t*)q->payload + i, (uint8_t*) addr, counter);
}
//for(counter=0; (i<q->len) && (counter < bytesLeft); i++, counter++)
// ((uint8_t*) q->payload)[i] = ((uint8_t*)addr)[counter];
for(q=q->next; (q!=NULL) && (counter < bytesLeft); q=q->next) {
for(i=0; (i<q->len) && (counter < bytesLeft); i++, counter++) {
((uint8_t*) q->payload)[i] = ((uint8_t*)addr)[counter];
}
i = MIN(q->len, bytesLeft - counter);
memcpy_from_nc((uint8_t*)q->payload, (uint8_t*)addr + counter, i);
counter += i;
//for(i=0; (i<q->len) && (counter < bytesLeft); i++, counter++) {
// ((uint8_t*) q->payload)[i] = ((uint8_t*)addr)[counter];
//}
}
read_offset = CLINE_PACKETS(bytesLeft);
} else {