use optimized memcpy function to increase the performance
This commit is contained in:
parent
a79f47d365
commit
24ea129456
1 changed files with 96 additions and 25 deletions
|
@ -108,15 +108,67 @@
|
|||
#define MAC_HI(_x) ((((_x) >> 32)) & 0xFFFF)
|
||||
#define MAC_LO(_x) (((_x) ) & 0xFFFFFFFF)
|
||||
|
||||
#define MIN(a, b) (a) < (b) ? (a) : (b)
|
||||
|
||||
static struct netif* mynetif;
|
||||
|
||||
#if 0
|
||||
inline static void *memcpy_noprefetching(void* dest, const void *src, size_t count)
|
||||
inline static void* memcpy_from_nc(void *dest, const void *src, size_t count)
|
||||
{
|
||||
int32_t i, j, k;
|
||||
#if 0
|
||||
size_t i;
|
||||
|
||||
if (BUILTIN_EXPECT(!dest || !src, 0))
|
||||
return dest;
|
||||
for(i=0; i<count; i++)
|
||||
((uint8_t*) dest)[i] = ((uint8_t*) src)[i];
|
||||
|
||||
return dest;
|
||||
#else
|
||||
int32_t h, i, j, k, l, m;
|
||||
|
||||
asm volatile ("cld;\n\t"
|
||||
"1: cmpl $0, %%eax ; je 3f\n\t"
|
||||
"movl (%%edi), %%edx\n\t"
|
||||
"cmpl $1, %%eax ; je 2f\n\t"
|
||||
"movl 32(%%edi), %%edx\n\t"
|
||||
"2: movl 0(%%esi), %%ecx\n\t"
|
||||
"movl 4(%%esi), %%edx\n\t"
|
||||
"movl %%ecx, 0(%%edi)\n\t"
|
||||
"movl %%edx, 4(%%edi)\n\t"
|
||||
"movl 8(%%esi), %%ecx\n\t"
|
||||
"movl 12(%%esi), %%edx\n\t"
|
||||
"movl %%ecx, 8(%%edi)\n\t"
|
||||
"movl %%edx, 12(%%edi)\n\t"
|
||||
"movl 16(%%esi), %%ecx\n\t"
|
||||
"movl 20(%%esi), %%edx\n\t"
|
||||
"movl %%ecx, 16(%%edi)\n\t"
|
||||
"movl %%edx, 20(%%edi)\n\t"
|
||||
"movl 24(%%esi), %%ecx\n\t"
|
||||
"movl 28(%%esi), %%edx\n\t"
|
||||
"movl %%ecx, 24(%%edi)\n\t"
|
||||
"movl %%edx, 28(%%edi)\n\t"
|
||||
"addl $32, %%esi\n\t"
|
||||
"addl $32, %%edi\n\t"
|
||||
"dec %%eax ; jmp 1b\n\t"
|
||||
"3: movl %%ebx, %%ecx\n\t"
|
||||
"movl (%%edi), %%edx\n\t"
|
||||
"andl $31, %%ecx\n\t"
|
||||
"rep ; movsb\n\t" : "=&a"(h), "=&D"(i), "=&S"(j), "=&b"(k), "=&c"(l), "=&d"(m)
|
||||
: "0"(count / 32), "1"(dest), "2"(src), "3"(count) : "memory","cc");
|
||||
|
||||
return dest;
|
||||
#endif
|
||||
}
|
||||
|
||||
inline static void* memcpy_to_nc(void* dest, const void *src, size_t count)
|
||||
{
|
||||
#if 0
|
||||
size_t i;
|
||||
|
||||
for(i=0; i<count; i++)
|
||||
((uint8_t*) dest)[i] = ((uint8_t*) src)[i];
|
||||
|
||||
return dest;
|
||||
#else
|
||||
int32_t i, j, k;
|
||||
|
||||
asm volatile (
|
||||
"cld; rep movsl\n\t"
|
||||
|
@ -127,8 +179,8 @@ inline static void *memcpy_noprefetching(void* dest, const void *src, size_t cou
|
|||
: "0"(count/4), "g"(count), "1"(dest), "2"(src) : "memory","cc");
|
||||
|
||||
return dest;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
static int read_emac(int num_emac, int offset, int core)
|
||||
{
|
||||
|
@ -207,7 +259,7 @@ again:
|
|||
* This list MUST consist of a single packet ONLY
|
||||
*/
|
||||
for (q=p, i=0; q!=0; q=q->next) {
|
||||
memcpy(((uint8_t*)addr) + 2 + i, q->payload, q->len);
|
||||
memcpy_to_nc(((uint8_t*)addr) + 2 + i, q->payload, q->len);
|
||||
i += q->len;
|
||||
}
|
||||
|
||||
|
@ -224,12 +276,12 @@ again:
|
|||
if (bytes_left < bytes_to_copy)
|
||||
bytes_to_copy = bytes_left;
|
||||
|
||||
LWIP_DEBUGF(NETIF_DEBUG, ("special case: copy last %d bytes\n", bytes_to_copy));
|
||||
//LWIP_DEBUGF(NETIF_DEBUG, ("special case: copy last %d bytes\n", bytes_to_copy));
|
||||
|
||||
q = p; i = 0;
|
||||
while ((q != 0) && (i < bytes_to_copy)) {
|
||||
sz = q->len > bytes_to_copy-i ? bytes_to_copy-i : q->len;
|
||||
memcpy(((uint8_t*) addr) + 2 + i, q->payload, sz);
|
||||
memcpy_to_nc(((uint8_t*) addr) + 2 + i, q->payload, sz);
|
||||
bytes_left -= sz;
|
||||
i += sz;
|
||||
if (i < bytes_to_copy)
|
||||
|
@ -239,16 +291,16 @@ again:
|
|||
if (bytes_left != 0) {
|
||||
rckemacif->tx_write_offset = 1;
|
||||
addr = rckemacif->tx_buffer + 32;
|
||||
LWIP_DEBUGF(NETIF_DEBUG, ("special case: copy remaining %d bytes\n", bytes_left));
|
||||
//LWIP_DEBUGF(NETIF_DEBUG, ("special case: copy remaining %d bytes\n", bytes_left));
|
||||
|
||||
i = 0;
|
||||
if (sz < q->len) {
|
||||
memcpy((uint8_t*) addr, q->payload + sz, q->len - sz);
|
||||
memcpy_to_nc((uint8_t*) addr, q->payload + sz, q->len - sz);
|
||||
bytes_left -= (q->len - sz);
|
||||
i = q->len - sz;
|
||||
}
|
||||
for(q=q->next; (q != 0); q = q->next) {
|
||||
memcpy(((uint8_t*) addr) + i, q->payload, q->len);
|
||||
memcpy_to_nc(((uint8_t*) addr) + i, q->payload, q->len);
|
||||
i += q->len;
|
||||
}
|
||||
|
||||
|
@ -256,7 +308,7 @@ again:
|
|||
if (rest != 0)
|
||||
rest = 32 - rest;
|
||||
|
||||
LWIP_DEBUGF(NETIF_DEBUG, ("Rest is %d\n", rest));
|
||||
//LWIP_DEBUGF(NETIF_DEBUG, ("Rest is %d\n", rest));
|
||||
rckemacif->tx_write_offset += CLINE_PACKETS(bytes_left + rest) - 1;
|
||||
}
|
||||
}
|
||||
|
@ -312,7 +364,7 @@ static void rckemacif_rx_handler(struct netif* netif, unsigned int write_offset)
|
|||
{
|
||||
rckemacif_t* rckemacif = netif->state;
|
||||
unsigned short read_offset = rckemacif->rx_read_offset;
|
||||
unsigned int counter;
|
||||
//unsigned int counter;
|
||||
volatile void *addr = NULL;
|
||||
uint16_t i, length = 0;
|
||||
uint32_t packets = 0;
|
||||
|
@ -373,7 +425,7 @@ again:
|
|||
|
||||
if (read_offset < write_offset) {
|
||||
for (q=p, i/*counter=0*/; q!=NULL; q=q->next) {
|
||||
memcpy((uint8_t*) q->payload, (uint8_t*)addr + 2, q->len);
|
||||
memcpy_from_nc((uint8_t*) q->payload, (uint8_t*)addr + 2, q->len);
|
||||
//for(i=0; i<q->len; i++, counter++) {
|
||||
// ((uint8_t*) q->payload)[i] = ((uint8_t*)addr)[2 + counter];
|
||||
//}
|
||||
|
@ -384,33 +436,52 @@ again:
|
|||
int rest;
|
||||
int bytesLeft = length;
|
||||
int bytesToCopy = length;
|
||||
int counter = 0;
|
||||
|
||||
/* rest to the end of buffer - 2 bytes length information */
|
||||
rest = (rckemacif->rx_buffer_max - read_offset + 1) * 32 - 2;
|
||||
if (length > rest)
|
||||
bytesToCopy = rest;
|
||||
LWIP_DEBUGF(NETIF_DEBUG, ("bytes to copy: %d, bytesLeft: %d\n", bytesToCopy, bytesLeft));
|
||||
//LWIP_DEBUGF(NETIF_DEBUG, ("bytes to copy: %d, bytesLeft: %d\n", bytesToCopy, bytesLeft));
|
||||
|
||||
for (q=p, counter=0; q!=NULL; q=q->next) {
|
||||
for(i=0; i<q->len; i++, counter++) {
|
||||
q = p;
|
||||
i = /*counter =*/ 0;
|
||||
while ((q != NULL) && (counter < bytesToCopy)) {
|
||||
i = MIN(q->len, bytesToCopy - counter);
|
||||
memcpy_from_nc(q->payload, (uint8_t*) addr + 2 + counter, i);
|
||||
counter += i;
|
||||
if (counter >= bytesToCopy)
|
||||
goto out;
|
||||
else
|
||||
q = q->next;
|
||||
/*for(i=0; i<q->len; i++, counter++) {
|
||||
if (counter < bytesToCopy)
|
||||
((uint8_t*) q->payload)[i] = ((uint8_t*)addr)[2 + counter];
|
||||
else
|
||||
goto out;
|
||||
}
|
||||
}*/
|
||||
}
|
||||
out:
|
||||
bytesLeft -= bytesToCopy;
|
||||
|
||||
if (bytesLeft != 0) {
|
||||
addr = rckemacif->rx_buffer + 0x20;
|
||||
LWIP_DEBUGF(NETIF_DEBUG, ("copying from %p, left: %d (%x)\n", addr, bytesLeft, ((uint8_t*)addr)[0]));
|
||||
for(counter=0; (i<q->len) && (counter < bytesLeft); i++, counter++)
|
||||
((uint8_t*) q->payload)[i] = ((uint8_t*)addr)[counter];
|
||||
counter = 0;
|
||||
//LWIP_DEBUGF(NETIF_DEBUG, ("copying from %p, left: %d (%x)\n", addr, bytesLeft, ((uint8_t*)addr)[0]));
|
||||
|
||||
if (i < q->len) {
|
||||
counter = MIN(q->len - i, bytesLeft);
|
||||
memcpy_from_nc((uint8_t*)q->payload + i, (uint8_t*) addr, counter);
|
||||
}
|
||||
//for(counter=0; (i<q->len) && (counter < bytesLeft); i++, counter++)
|
||||
// ((uint8_t*) q->payload)[i] = ((uint8_t*)addr)[counter];
|
||||
for(q=q->next; (q!=NULL) && (counter < bytesLeft); q=q->next) {
|
||||
for(i=0; (i<q->len) && (counter < bytesLeft); i++, counter++) {
|
||||
((uint8_t*) q->payload)[i] = ((uint8_t*)addr)[counter];
|
||||
}
|
||||
i = MIN(q->len, bytesLeft - counter);
|
||||
memcpy_from_nc((uint8_t*)q->payload, (uint8_t*)addr + counter, i);
|
||||
counter += i;
|
||||
//for(i=0; (i<q->len) && (counter < bytesLeft); i++, counter++) {
|
||||
// ((uint8_t*) q->payload)[i] = ((uint8_t*)addr)[counter];
|
||||
//}
|
||||
}
|
||||
read_offset = CLINE_PACKETS(bytesLeft);
|
||||
} else {
|
||||
|
|
Loading…
Add table
Reference in a new issue