Compare commits

..

No commits in common. "x64_userspace" and "master" have entirely different histories.

47 changed files with 1717 additions and 2840 deletions

View file

@ -1,12 +1,8 @@
NAME = metalsvm
# For 64bit support, you have define BIT as 64
# Note: do not forget to 'make veryclean' after changing BIT!!!
BIT=64
ARCH = x86
SMP=1
TOPDIR = $(shell pwd)
ARCH = x86
# For 64bit support, you have define BIT as 64
BIT=32
NAME = metalsvm
LWIPDIRS = lwip/src/arch lwip/src/api lwip/src/core lwip/src/core/ipv4 lwip/src/netif
DRIVERDIRS = drivers/net drivers/char
KERNDIRS = libkern kernel mm fs apps arch/$(ARCH)/kernel arch/$(ARCH)/mm arch/$(ARCH)/scc $(LWIPDIRS) $(DRIVERDIRS)
@ -34,56 +30,35 @@ RANLIB_FOR_TARGET = $(CROSSCOMPREFIX)ranlib
STRIP_FOR_TARGET = $(CROSSCOMPREFIX)strip
READELF_FOR_TARGET = $(CROSSCOMPREFIX)readelf
# Tools
MAKE = make
RM = rm -rf
NASM = nasm
# For 64bit code, you have to use qemu-system-x86_64
QEMU = qemu-system-i386
GDB = gdb
ifeq ($(BIT), 32)
QEMU = qemu-system-i386
else ifeq ($(BIT), 64)
QEMU = qemu-system-x86_64
endif
INCLUDE = -I$(TOPDIR)/include \
-I$(TOPDIR)/arch/$(ARCH)/include \
-I$(TOPDIR)/lwip/src/include \
-I$(TOPDIR)/lwip/src/include/ipv4 \
-I$(TOPDIR)/drivers
# For 64bit support, you have to define -felf64 instead of -felf32
NASMFLAGS = -felf32 -g -i$(TOPDIR)/include/metalsvm/
INCLUDE = -I$(TOPDIR)/include -I$(TOPDIR)/arch/$(ARCH)/include -I$(TOPDIR)/lwip/src/include -I$(TOPDIR)/lwip/src/include/ipv4 -I$(TOPDIR)/drivers
# For 64bit support, you have to define "-m64 -mno-red-zone" instead of "-m32 -march=i586"
# Compiler options for final code
CFLAGS = -g -O2 -m$(BIT) -Wall -fomit-frame-pointer -ffreestanding -fstrength-reduce -finline-functions $(INCLUDE) $(STACKPROT)
CFLAGS = -g -m32 -march=i586 -Wall -O2 -fstrength-reduce -fomit-frame-pointer -finline-functions -ffreestanding $(INCLUDE) $(STACKPROT)
# Compiler options for debuging
#CFLAGS = -g -O -m$(BIT) -Wall -fomit-frame-pointer -ffreestanding $(INCLUDE) $(STACKPROT)
NASMFLAGS = -felf$(BIT) -g -i$(TOPDIR)/include/metalsvm/
#CFLAGS = -g -O -m32 -march=i586 -Wall -fomit-frame-pointer -ffreestanding $(INCLUDE) $(STACKPROT)
ARFLAGS = rsv
LDFLAGS = -T link$(BIT).ld -z max-page-size=4096 --defsym __BUILD_DATE=$(shell date +'%Y%m%d') --defsym __BUILD_TIME=$(shell date +'%H%M%S')
STRIP_DEBUG = --strip-debug
KEEP_DEBUG = --only-keep-debug
# Do not change to elf64!
# The Multiboot spec can only boot elf32 binaries
OUTPUT_FORMAT = -O elf32-i386
CFLAGS_FOR_NEWLIB = -m$(BIT) -O2 $(STACKPROT)
LDFLAGS_FOR_NEWLIB = -m$(BIT)
CFLAGS_FOR_TOOLS = -m$(BIT) -O2 -Wall
# For 64bit support, you have to define -m64 instead of "-m32 -march=i586"
CFLAGS_FOR_NEWLIB = -m32 -march=i586 -O2 $(STACKPROT)
# For 64bit support, you have to define -m64 instead of "-m32 -march=i586"
LDFLAGS_FOR_NEWLIB = -m32 -march=i586
# For 64bit support, you have to define -m64 instead of "-m32"
CFLAGS_FOR_TOOLS = -m32 -O2 -Wall
LDFLAGS_FOR_TOOLS =
NASMFLAGS_FOR_NEWLIB = -felf$(BIT)
ifeq ($(BIT), 32)
CFLAGS += -march=i586
CFLAGS_FOR_NEWLIB += -march=i586
LDFLAGS_FOR_NEWLIB += -march=i586
else ifeq ($(BIT), 64)
CFLAGS += -mno-red-zone
endif
# For 64bit support, you have to define -felf64 instead of -felf32
NASMFLAGS_FOR_NEWLIB = -felf32
# Prettify output
V = 0
@ -93,15 +68,11 @@ ifeq ($V,0)
endif
default: all
all: newlib tools $(NAME).elf
newlib:
$(MAKE) ARCH=$(ARCH) BIT=$(BIT) \
LDFLAGS="$(LDFLAGS_FOR_NEWLIB)" \
CFLAGS="$(CFLAGS_FOR_NEWLIB)" \
NASMFLAGS="$(NASMFLAGS_FOR_NEWLIB)" \
CC_FOR_TARGET=$(CC_FOR_TARGET) \
$(MAKE) ARCH=$(ARCH) BIT=$(BIT) LDFLAGS="$(LDFLAGS_FOR_NEWLIB)" CFLAGS="$(CFLAGS_FOR_NEWLIB)" NASMFLAGS="$(NASMFLAGS_FOR_NEWLIB)" CC_FOR_TARGET=$(CC_FOR_TARGET) \
CXX_FOR_TARGET=$(CXX_FOR_TARGET) \
GCC_FOR_TARGET=$(GCC_FOR_TARGET) \
AR_FOR_TARGET=$(AR_FOR_TARGET) \
@ -125,23 +96,14 @@ $(NAME).elf:
$Q$(OBJCOPY_FOR_TARGET) $(STRIP_DEBUG) $(OUTPUT_FORMAT) $(NAME).elf
qemu: newlib tools $(NAME).elf
$(QEMU) -monitor stdio -serial tcp::12346,server,nowait -smp $(SMP) -net nic,model=rtl8139 -net user,hostfwd=tcp::12345-:4711 -kernel metalsvm.elf -initrd tools/initrd.img
$(QEMU) -monitor stdio -smp 2 -net nic,model=rtl8139 -net user,hostfwd=tcp::12345-:4711 -net dump -kernel metalsvm.elf -initrd tools/initrd.img
qemudbg: newlib tools $(NAME).elf
$(QEMU) -s -S -nographic -monitor stdio -serial tcp::12346,server -smp $(SMP) -net nic,model=rtl8139 -net user,hostfwd=tcp::12345-:4711 -kernel metalsvm.elf -initrd tools/initrd.img
$(QEMU) -s -S -smp 2 -net nic,model=rtl8139 -net user,hostfwd=tcp::12345-:4711 -net dump -kernel metalsvm.elf -initrd tools/initrd.img
gdb: $(NAME).elf
$(GDB) -q -x script.gdb
debug: newlib tools $(NAME).elf
killall $(QEMU) || true
killall $(GDB) || true
sleep 1
gnome-terminal --working-directory=$(TOPDIR) \
--tab --title=Shell --command="bash -c 'sleep 1 && telnet localhost 12345'" \
--tab --title=QEmu --command="make qemudbg" \
--tab --title=GDB --command="make gdb" \
--tab --title=Debug --command="bash -c 'sleep 1 && telnet localhost 12346'"
make qemudbg > /dev/null &
$(GDB) -x script.gdb
clean:
$Q$(RM) $(NAME).elf $(NAME).sym *~
@ -150,7 +112,7 @@ clean:
veryclean: clean
$Q$(MAKE) -C newlib veryclean
@echo Very cleaned.
@echo Very cleaned
#depend:
# for i in $(SUBDIRS); do $(MAKE) -k -C $$i depend; done
@ -162,15 +124,16 @@ veryclean: clean
$Q$(CPP_FOR_TARGET) -MF $*.dep -MT $*.o -MM -D__KERNEL__ $(CFLAGS) $<
include/metalsvm/config.inc: include/metalsvm/config.h
@echo "; This file is generated automatically from the config.h file." > $@
@echo "; Before editing this, you should consider editing config.h." >> $@
@sed -nre 's/^[\t ]*#define[\t ]+([a-z_0-9]+)([\t ]+.*)*/%define \1/ip' $< >> $@
@sed -nre 's/^[\t ]*#define[\t ]+([a-z_0-9]+)[\t ]+([a-z_0-9.]+)([\t ]+.*)*/%define \1 \2/ip' $< >> $@
@echo "; This file is generated automatically from the config.h file." > include/metalsvm/config.inc
@echo "; Before editing this, you should consider editing config.h." >> include/metalsvm/config.inc
@awk '/^#define MAX_CORES/{ print "%define MAX_CORES", $$3 }' include/metalsvm/config.h >> include/metalsvm/config.inc
@awk '/^#define KERNEL_STACK_SIZE/{ print "%define KERNEL_STACK_SIZE", $$3 }' include/metalsvm/config.h >> include/metalsvm/config.inc
@awk '/^#define CONFIG_VGA/{ print "%define CONFIG_VGA", $$3 }' include/metalsvm/config.h >> include/metalsvm/config.inc
%.o : %.asm include/metalsvm/config.inc
@echo [ASM] $@
$Q$(NASM) $(NASMFLAGS) -o $@ $<
.PHONY: default all clean qemu qemudbg gdb debug newlib tools
.PHONY: default all clean emu gdb newlib tools
include $(addsuffix /Makefile,$(SUBDIRS))

View file

@ -1,4 +1,4 @@
C_source := tests.c echo.c netio.c jacobi.c laplace.c gfx_client.c gfx_generic.c memory.c
C_source := tests.c echo.c netio.c jacobi.c laplace.c gfx_client.c gfx_generic.c
MODULE := apps
include $(TOPDIR)/Makefile.inc

View file

@ -1,299 +0,0 @@
/*
* Copyright 2011 Steffen Vogel, Chair for Operating Systems,
* RWTH Aachen University
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* This file is part of MetalSVM.
*/
#include <metalsvm/stdlib.h>
#include <metalsvm/stdio.h>
#include <metalsvm/stdarg.h>
#include <metalsvm/mmu.h>
#include <metalsvm/time.h>
#include <metalsvm/tasks.h>
#include <metalsvm/vma.h>
#include <metalsvm/malloc.h>
#include <asm/page.h>
#include <asm/processor.h>
#define PAGE_COUNT 10
#define SIZE (PAGE_COUNT*PAGE_SIZE)
#define VIRT_FROM_ADDR 0x100000000000
#define VIRT_TO_ADDR 0x200000000000
/** @brief Simple helper to format our test results */
static void test(size_t expr, char *fmt, ...)
{
void _putchar(int c, void *arg) { kputchar(c); } // for kvprintf
static int c = 1;
va_list ap;
va_start(ap, fmt);
kprintf("%s #%u:\t", (expr) ? "PASSED" : "FAILED", c++);
kvprintf(fmt, _putchar, NULL, 10, ap);
kputs("\n");
va_end(ap);
if (!expr)
abort();
}
/** @brief Linear feedback shift register PRNG */
static uint16_t rand()
{
static uint16_t lfsr = 0xACE1u;
static uint16_t bit;
bit = ((lfsr >> 0) ^ (lfsr >> 2) ^ (lfsr >> 3) ^ (lfsr >> 5) ) & 1;
return lfsr = (lfsr >> 1) | (bit << 15);
}
/** @brief BSD sum algorithm ('sum' Unix command) and used by QEmu */
uint16_t checksum(size_t start, size_t end) {
size_t addr;
uint16_t sum;
for(addr = start, sum = 0; addr < end; addr++) {
uint8_t val = *((uint8_t *) addr);
sum = (sum >> 1) | (sum << 15);
sum += val;
}
return sum;
}
static int paging_stage2(void *arg) {
size_t old, new;
kprintf("PAGING: entering stage 2...\n");
old = *((size_t *) arg);
kprintf("old sum: %lu\n", old);
new = checksum(VIRT_FROM_ADDR, VIRT_FROM_ADDR + PAGE_COUNT*PAGE_SIZE);
test(old == new, "checksum(%p, %p) = %lu", VIRT_FROM_ADDR, VIRT_FROM_ADDR + PAGE_COUNT*PAGE_SIZE, new);
size_t cr3 = read_cr3();
kprintf("cr3 new = %x\n", cr3);
return 0;
}
/** @brief Test of the paging subsystem
*
* We will map a single physical memory region to two virtual regions.
* When writing to the first one, we should be able to read the same contents
* from the second one.
*/
static void paging(void)
{
size_t c, sum;
size_t *p1, *p2;
size_t virt_from, virt_to, virt_alloc;
size_t phys;
// show original page maps
page_dump(0, 0);
page_stats(0, 0, 1); // reset accessed and dirty bits
// allocate physical page frames
phys = get_pages(PAGE_COUNT);
test(phys, "get_pages(%lu) = 0x%lx", PAGE_COUNT, phys);
// create first mapping
virt_from = map_region(VIRT_FROM_ADDR, phys, PAGE_COUNT, MAP_USER_SPACE);
test(virt_from, "map_region(0x%lx, 0x%lx, %lu, 0x%x) = 0x%lx", VIRT_FROM_ADDR, phys, PAGE_COUNT, 0, virt_from);
// check address translation
phys = virt_to_phys(virt_from);
test(phys, "virt_to_phys(0x%lx) = 0x%lx", virt_from, phys);
// write test data
p1 = (size_t *) virt_from;
for (c = 0; c < PAGE_COUNT*PAGE_SIZE/sizeof(size_t); c++) {
p1[c] = c;
}
// create second mapping pointing to the same page frames
virt_to = map_region(VIRT_TO_ADDR, phys, PAGE_COUNT, MAP_USER_SPACE);
test(virt_to, "map_region(0x%lx, 0x%lx, %lu, 0x%x) = 0x%lx", VIRT_TO_ADDR, phys, PAGE_COUNT, 0, virt_to);
// show pagings infos again
page_dump(0, 0);
page_stats(0, 0, 0);
// check address translation
phys = virt_to_phys(virt_to);
test(phys, "virt_to_phys(0x%lx) = 0x%lx", virt_to, phys);
// check if both mapped areas are equal
p2 = (size_t *) virt_to;
for (c = 0; c < PAGE_COUNT*PAGE_SIZE/sizeof(size_t); c++) {
if (p1[c] != p2[c])
test(0, "data mismatch: *(%p) != *(%p)", &p1[c], &p2[c]);
}
test(1, "data is equal");
// try to remap without MAP_REMAP
virt_to = map_region(VIRT_TO_ADDR, phys+PAGE_SIZE, PAGE_COUNT, MAP_USER_SPACE);
test(!virt_to, "map_region(0x%lx, 0x%lx, %lu, 0x%x) = 0x%lx (without MAP_REMAP flag)", VIRT_TO_ADDR, phys+PAGE_SIZE, PAGE_COUNT, 0, virt_to);
// try to remap with MAP_REMAP
virt_to = map_region(VIRT_TO_ADDR, phys+PAGE_SIZE, PAGE_COUNT, MAP_REMAP|MAP_USER_SPACE);
test(virt_to, "map_region(0x%lx, 0x%lx, %lu, 0x%x) = 0x%lx (with MAP_REMAP flag)", VIRT_TO_ADDR, phys+PAGE_SIZE, PAGE_COUNT, MAP_REMAP, virt_to);
// check if data is not equal anymore (we remapped with 1 page offset)
p2 = (size_t *) virt_to;
for (c = 0; c < PAGE_COUNT*PAGE_SIZE/sizeof(size_t); c++) {
if (p1[c] == p2[c])
test(0, "data match at *(%p) != *(%p)", &p1[c], &p2[c]);
}
test(1, "data is unequal");
// test vma_alloc
virt_alloc = map_region(0, phys, PAGE_COUNT, 0);
test(virt_alloc, "map_region(0x%lx, 0x%lx, %lu, 0x%x) = 0x%lx", 0, phys, PAGE_COUNT, 0, virt_alloc);
// data should match against new vm addr
p2 = (size_t *) virt_alloc;
for (c = 0; c < PAGE_COUNT*PAGE_SIZE/sizeof(size_t); c++) {
if (p1[c] != p2[c])
test(0, "data mismatch at *(%p) != *(%p)", &p1[c], &p2[c]);
}
test(1, "data is equal");
// calc checksum
sum = checksum(virt_alloc, virt_alloc + PAGE_COUNT*PAGE_SIZE);
test(sum, "checksum(%p, %p) = %lu", virt_alloc, virt_alloc + PAGE_COUNT*PAGE_SIZE, sum);
size_t cr3 = read_cr3();
kprintf("cr3 old = %x\n", cr3);
//create_kernel_task(0, paging_stage2, &sum, NORMAL_PRIO);
//sleep(3);
}
/** @brief Test of the VMA allocator */
static void vma(void)
{
int ret;
// vma_alloc
size_t a1 = vma_alloc(SIZE, VMA_HEAP);
test(a1, "vma_alloc(0x%x, 0x%x) = 0x%lx", SIZE, VMA_HEAP, a1);
vma_dump();
size_t a2 = vma_alloc(SIZE, VMA_HEAP|VMA_USER);
test(a2 != 0, "vma_alloc(0x%x, 0x%x) = 0x%lx", SIZE, VMA_HEAP|VMA_USER, a2);
vma_dump();
// vma_add
ret = vma_add(VIRT_FROM_ADDR, VIRT_FROM_ADDR+SIZE, VMA_HEAP|VMA_USER);
test(ret >= 0, "vma_add(0x%lx, 0x%lx, 0x%x) = %u", VIRT_FROM_ADDR, VIRT_FROM_ADDR+SIZE, VMA_HEAP|VMA_USER, ret);
vma_dump();
ret = vma_add(VIRT_FROM_ADDR+SIZE, VIRT_FROM_ADDR+2*SIZE, VMA_HEAP|VMA_USER);
test(ret >= 0, "vma_add(0x%lx, 0x%lx, 0x%x) = %u", VIRT_FROM_ADDR+SIZE, VIRT_FROM_ADDR+2*SIZE, VMA_HEAP|VMA_USER, ret);
vma_dump();
ret = vma_add(VIRT_FROM_ADDR-SIZE, VIRT_FROM_ADDR, VMA_HEAP|VMA_USER);
test(ret >= 0, "vma_add(0x%lx, 0x%lx, 0x%x) = %u", VIRT_FROM_ADDR-SIZE, VIRT_FROM_ADDR, VMA_HEAP|VMA_USER, ret);
vma_dump();
// vma_free
ret = vma_free(VIRT_FROM_ADDR-SIZE, VIRT_FROM_ADDR);
test(ret >= 0, "vma_free(0x%lx, 0x%lx) = %u", VIRT_FROM_ADDR-SIZE, VIRT_FROM_ADDR, ret);
vma_dump();
ret = vma_free(VIRT_FROM_ADDR+SIZE, VIRT_FROM_ADDR+2*SIZE);
test(ret >= 0, "vma_free(0x%lx, 0x%lx) = %u", VIRT_FROM_ADDR+SIZE, VIRT_FROM_ADDR+2*SIZE, ret);
vma_dump();
ret = vma_free(VIRT_FROM_ADDR, VIRT_FROM_ADDR+SIZE);
test(ret >= 0, "vma_free(0x%lx, 0x%lx) = %u", VIRT_FROM_ADDR, VIRT_FROM_ADDR+SIZE, ret);
vma_dump();
}
/** @brief Test of the kernel malloc allocator */
static void malloc(void)
{
int i;
int* p[20];
int* a;
// kmalloc() test
buddy_dump();
a = kmalloc(SIZE);
test(a != NULL, "kmalloc(%lu) = %p", SIZE, a);
buddy_dump();
// simple write/read test
for (i=0; i<SIZE/sizeof(int); i++)
a[i] = i;
for (i=0; i<SIZE/sizeof(int); i++) {
if (a[i] != i)
test(0, "data mismatch: *(%p) != %lu", &a[i], i);
}
test(1, "data is equal");
// kfree() test
kfree(a);
test(1, "kfree(%p)", a);
buddy_dump();
// some random malloc/free patterns to stress the buddy system
for (i=0; i<20; i++) {
uint16_t sz = rand();
p[i] = kmalloc(sz);
test(p[i] != NULL, "kmalloc(%u) = %p", sz, p[i]);
}
buddy_dump();
for (i=0; i<20; i++) {
kfree(p[i]);
test(1, "kfree(%p)", p[i]);
}
buddy_dump();
}
/** @brief This is a simple procedure to test memory management subsystem */
int memory(void* arg)
{
tid_t id;
kprintf("======== PAGING: test started...\n");
paging();
kprintf("======== VMA: test started...\n");
vma();
kprintf("======== MALLOC: test started...\n");
malloc();
kprintf("======== USER: test userspace...\n");
char* argv[] = {"/bin/memtest", "17", "kb", "5", NULL};
create_user_task(&id, argv[0], argv);
kprintf(" calling %s %s %s %s with id = %i:\n", argv[0], argv[1], argv[2], argv[3], id);
kprintf("======== All tests finished successfull...\n");
return 0;
}

View file

@ -43,7 +43,6 @@
int laplace(void* arg);
int jacobi(void* arg);
int memory(void* arg);
void echo_init(void);
void netio_init(void);
@ -745,7 +744,8 @@ int test_init(void)
create_user_task(NULL, "/bin/jacobi", jacobi_argv);
//create_user_task_on_core(NULL, "/bin/jacobi", jacobi_argv, 1);
#endif
#if defined(START_MMNIF_TEST) && defined(CONFIG_LWIP) && LWIP_SOCKET
#ifdef START_MMNIF_TEST
#if defined(CONFIG_LWIP) && LWIP_SOCKET
if (RCCE_IAM == 0) {
kprintf("Start /bin/server...\n");
create_user_task(NULL, "/bin/server", server_argv);
@ -755,8 +755,6 @@ int test_init(void)
create_user_task(NULL, "/bin/client", client_argv);
}
#endif
#ifdef START_MEMORY
create_kernel_task(NULL, memory, NULL, NORMAL_PRIO);
#endif
return 0;

View file

@ -46,7 +46,6 @@
//#define START_HELLO
//#define START_TESTS
//#define START_JACOBI
//#define START_MEMORY
//#define START_CHIEFTEST

View file

@ -34,7 +34,7 @@
// ____ _ _
// / ___| _ _ _ __ ___ | |__ ___ | |___
// \___ \| | | | '_ ` _ \| '_ \ / _ \| / __|
// ___) | |_| | | | | | | |_) | (_) | \__
// ___) | |_| | | | | | | |_) | (_) | \__ \
// |____/ \__, |_| |_| |_|_.__/ \___/|_|___/
// |___/
//
@ -253,7 +253,7 @@
// _____ _ _
// | ___| _ _ __ ___| |_(_) ___ _ __ ___
// | |_ | | | | '_ \ / __| __| |/ _ \| '_ \/ __|
// | _|| |_| | | | | (__| |_| | (_) | | | \__
// | _|| |_| | | | | (__| |_| | (_) | | | \__ \
// |_| \__,_|_| |_|\___|\__|_|\___/|_| |_|___/
//
// #########################################################################################

View file

@ -102,7 +102,7 @@ inline static void outportl(unsigned short _port, unsigned int _data)
inline static void uart_putchar(unsigned char _data)
{
outportb(UART_PORT, _data);
outportb(0x2F8, _data);
}
/**

View file

@ -35,11 +35,9 @@
#ifdef CONFIG_MULTIBOOT
/// Does the bootloader provide mem_* fields?
#define MULTIBOOT_INFO_MEM 0x00000001
/// Does the bootloader provide a list of modules?
/* are there modules to do something with? */
#define MULTIBOOT_INFO_MODS 0x00000008
/// Does the bootloader provide a full memory map?
/* is there a full memory map? */
#define MULTIBOOT_INFO_MEM_MAP 0x00000040
typedef uint16_t multiboot_uint16_t;
@ -116,6 +114,7 @@ struct multiboot_info
multiboot_uint16_t vbe_interface_off;
multiboot_uint16_t vbe_interface_len;
};
typedef struct multiboot_info multiboot_info_t;
struct multiboot_mmap_entry

View file

@ -21,7 +21,6 @@
* @file arch/x86/include/asm/page.h
* @brief Definitions and functions related to paging
* @author Stefan Lankes
* @author Steffen Vogel <steffen.vogel@rwth-aachen.de>
*
* This file defines the interface for paging as like structures related to paging.
*/
@ -32,129 +31,86 @@
#include <metalsvm/stddef.h>
#include <metalsvm/stdlib.h>
/// Page offset bits
#define PAGE_BITS 12
#ifdef CONFIG_X86_32
/// Number of page map indirections
#define PAGE_MAP_LEVELS 2
/// Page map bits
#define PAGE_MAP_BITS 10
/// Total operand width in bits
#define BITS 32
/// Linear/virtual address width
#define VIRT_BITS BITS
/// Physical address width (we dont support PAE)
#define PHYS_BITS BITS
#elif defined(CONFIG_X86_64)
/// Number of page map indirections
#define PAGE_MAP_LEVELS 4
/// Page map bits
#define PAGE_MAP_BITS 9
/// Total operand width in bits
#define BITS 64
/// Linear/virtual address width
#define VIRT_BITS 48
/// Physical address width (maximum value)
#define PHYS_BITS 52
#endif
/// The size of a single page in bytes
#define PAGE_SIZE ( 1L << PAGE_BITS)
/// The number of entries in a page map table
#define PAGE_MAP_ENTRIES ( 1L << PAGE_MAP_BITS)
/// Mask the page address
#define PAGE_MASK (-1L << PAGE_BITS)
/// Mask the entry in a page table
#define PAGE_ENTRY_MASK (-1L << (PAGE_BITS-PAGE_MAP_BITS))
/// Mask for all flag bits in a page map entry (including ignored bits)
#define PAGE_FLAGS_MASK (~(-1L << PAGE_BITS) | (-1L << VIRT_BITS))
/// Align to next page
#define PAGE_FLOOR(addr) (((addr) + PAGE_SIZE - 1) & PAGE_MASK)
/// Align to page
#define PAGE_CEIL(addr) ( (addr) & PAGE_MASK)
/// Sign extension to get a valid canonical address (hack: by using aritmethic shifts)
#define VIRT_SEXT(addr) ((ssize_t) addr << (BITS-VIRT_BITS) >> (BITS-VIRT_BITS))
// base addresses of page map tables
#ifdef CONFIG_X86_32
#define PAGE_MAP_PGD 0xFFFFF000
#define PAGE_MAP_PGT 0xFFC00000
#elif defined(CONFIG_X86_64)
#define PAGE_MAP_PML4 0xFFFFFFFFFFFFF000
#define PAGE_MAP_PDPT 0xFFFFFFFFFFE00000
#define PAGE_MAP_PGD 0xFFFFFFFFC0000000
#define PAGE_MAP_PGT 0xFFFFFF8000000000
#endif
#define _PAGE_BIT_PRESENT 0 /* is present */
#define _PAGE_BIT_RW 1 /* writeable */
#define _PAGE_BIT_USER 2 /* userspace addressable */
#define _PAGE_BIT_PWT 3 /* page write through */
#define _PAGE_BIT_PCD 4 /* page cache disabled */
#define _PAGE_BIT_ACCESSED 5 /* was accessed (raised by CPU) */
#define _PAGE_BIT_DIRTY 6 /* was written to (raised by CPU) */
#define _PAGE_BIT_PSE 7 /* 4 MB (or 2MB) page */
#define _PAGE_BIT_PAT 7 /* on 4KB pages */
#define _PAGE_BIT_GLOBAL 8 /* Global TLB entry PPro+ */
#define _PAGE_BIT_SVM_STRONG 9 /* mark a virtual address range as used by the SVM system */
#define _PAGE_BIT_SVM_LAZYRELEASE 10 /* mark a virtual address range as used by the SVM system */
#define _PAGE_BIT_SVM_INIT 11 /* mark if the MBP proxy is used */
/// Page is present
#define PG_PRESENT (1 << 0)
#define PG_PRESENT (1 << _PAGE_BIT_PRESENT)
/// Page is read- and writable
#define PG_RW (1 << 1)
#define PG_RW (1 << _PAGE_BIT_RW)
/// Page is addressable from userspace
#define PG_USER (1 << 2)
#define PG_USER (1 << _PAGE_BIT_USER)
/// Page write through is activated
#define PG_PWT (1 << 3)
#define PG_PWT (1 << _PAGE_BIT_PWT)
/// Page cache is disabled
#define PG_PCD (1 << 4)
#define PG_PCD (1 << _PAGE_BIT_PCD)
/// Page was recently accessed (set by CPU)
#define PG_ACCESSED (1 << 5)
#define PG_ACCESSED (1 << _PAGE_BIT_ACCESSED)
/// Page is dirty due to recentwrite-access (set by CPU)
#define PG_DIRTY (1 << 6)
/// Huge page: 4MB (or 2MB, 1GB)
#define PG_PSE (1 << 7)
#define PG_DIRTY (1 << _PAGE_BIT_DIRTY)
/// Big page: 4MB (or 2MB)
#define PG_PSE (1 << _PAGE_BIT_PSE)
/// Page is part of the MPB (SCC specific entry)
#define PG_MPE PG_PSE
/// Page attribute table
#define PG_PAT PG_PSE
#define PG_MPE PG_PSE
/// Global TLB entry (Pentium Pro and later)
#define PG_GLOBAL (1 << 8)
#define PG_GLOBAL (1 << _PAGE_BIT_GLOBAL)
/// Pattern flag
#define PG_PAT (1 << _PAGE_BIT_PAT)
/// This virtual address range is used by SVM system as marked
#define PG_SVM (1 << 9)
#define PG_SVM_STRONG PG_SVM
#define PG_SVM PG_SVM_STRONG
#define PG_SVM_STRONG (1 << _PAGE_BIT_SVM_STRONG)
/// This virtual address range is used by SVM system as marked
#define PG_SVM_LAZYRELEASE (1 << 10)
#define PG_SVM_LAZYRELEASE (1 << _PAGE_BIT_SVM_LAZYRELEASE)
/// Currently, no page frame is behind this page (only the MBP proxy)
#define PG_SVM_INIT (1 << 11)
/// Disable execution for this page
#define PG_XD (1L << 63)
#define PG_SVM_INIT (1 << _PAGE_BIT_SVM_INIT)
/// This is a whole set of flags (PRESENT,RW,ACCESSED,DIRTY) for kernelspace tables
#define PG_TABLE (PG_PRESENT|PG_RW|PG_XD)
#define KERN_TABLE (PG_PRESENT|PG_RW|PG_ACCESSED|PG_DIRTY)
/// This is a whole set of flags (PRESENT,RW,ACCESSED,DIRTY,USER) for userspace tables
#define USER_TABLE (PG_PRESENT|PG_RW|PG_ACCESSED|PG_DIRTY|PG_USER)
/// This is a whole set of flags (PRESENT,RW,GLOBAL) for kernelspace pages
#define PG_PAGE (PG_PRESENT|PG_RW|PG_GLOBAL|PG_XD)
#define KERN_PAGE (PG_PRESENT|PG_RW|PG_GLOBAL)
/// This is a whole set of flags (PRESENT,RW,USER) for userspace pages
#define USER_PAGE (PG_PRESENT|PG_RW|PG_USER)
/** @brief A single entry in a page map */
typedef size_t page_entry_t;
/** @brief General page map structure
#if __SIZEOF_POINTER__ == 4
#define PGT_ENTRIES 1024
#elif __SIZEOF_POINTER__ == 8
#define PGT_ENTRIES 512
#endif
/** @brief Page table structure
*
* This page map structure is a general type for all indirecton levels.
* As all page map levels containing the same amount of entries.
* All page maps must be page aligned!
* This structure keeps page table entries.\n
* On a 32bit system, a page table consists normally of 1024 entries.
*/
typedef struct page_map {
page_entry_t entries[PAGE_MAP_ENTRIES];
} __attribute__ ((aligned (PAGE_SIZE))) page_map_t;
typedef struct page_table
{
/// Page table entries are unsigned 32bit integers.
size_t entries[PGT_ENTRIES];
} page_table_t __attribute__ ((aligned (4096)));
/** @brief A callback type for the page map iterator
/** @brief Page directory structure
*
* @param entry A pointer to the current page map entry
* @return
* - 0 if we want to skip underlying page tables
* - >0 if want to recurse into underlying page tables
* This structure keeps page directory entries.\
* On a 32bit system, a page directory consists normally of 1024 entries.
*/
typedef int (*page_cb_t)(page_entry_t* entry, int level);
/** @brief Get the corresponding page map entry to a given virtual address */
static inline page_entry_t* virt_to_entry(size_t addr, int level) {
return (page_entry_t*) ((((ssize_t) addr | (-1L << VIRT_BITS)) >> ((level+1) * PAGE_MAP_BITS)) & ~0x7);
}
/** @brief Get the corresponding virtual address to a page map entry */
static inline size_t entry_to_virt(page_entry_t* entry, int level) {
return VIRT_SEXT((size_t) entry << ((level+1) * PAGE_MAP_BITS));
}
typedef struct page_dir
{
/// Page dir entries are unsigned 32bit integers.
size_t entries[PGT_ENTRIES];
} page_dir_t __attribute__ ((aligned (4096)));
/** @brief Converts a virtual address to a physical
*
@ -236,7 +192,7 @@ int arch_paging_init(void);
*
* @return Returns the address of the boot task's page dir array.
*/
page_map_t* get_boot_page_map(void);
page_dir_t* get_boot_pgd(void);
/** @brief Setup a new page directory for a new user-level task
*
@ -247,18 +203,18 @@ page_map_t* get_boot_page_map(void);
* - counter of allocated page tables
* - -ENOMEM (-12) on failure
*/
int create_page_map(struct task* task, int copy);
int create_pgd(task_t* task, int copy);
/** @brief Delete all page map structures of the current task
/** @brief Delete page directory and its page tables
*
* Puts PML4, PDPT, PGD, PGT tables back to buffer and
* sets the task's page map pointer to NULL
* Puts page tables and page directory back to buffer and
* sets the task's page directory pointer to NULL
*
* @return
* - 0 on success
* - -EINVAL (-22) on failure (in case PGD is still the boot-pgd).
*/
int drop_page_map(void);
int drop_pgd(void);
/** @brief Change the page permission in the page tables of the current task
*
@ -275,13 +231,4 @@ int drop_page_map(void);
*/
int change_page_permissions(size_t start, size_t end, uint32_t flags);
/** @brief Dump mapped memory */
void page_dump(size_t start, size_t end);
/** @brief Print stats about page flags
*
* @param reset Reset accessed and dirty bits in page tables
*/
void page_stats(size_t start, size_t end, int reset);
#endif

View file

@ -53,97 +53,6 @@ extern "C" {
#define CPU_FEATURE_AVX (1 << 28)
#define CPU_FEATURE_HYPERVISOR (1 << 31)
// x86 control registers
/// Protected Mode Enable
#define CR0_PE (1 << 0)
/// Monitor coprocessor
#define CR0_MP (1 << 1)
/// Enable FPU emulation
#define CR0_EM (1 << 2)
/// Task switched
#define CR0_TS (1 << 3)
/// Extension type of coprocessor
#define CR0_ET (1 << 4)
/// Enable FPU error reporting
#define CR0_NE (1 << 5)
/// Enable write protected pages
#define CR0_WP (1 << 16)
/// Enable alignment checks
#define CR0_AM (1 << 18)
/// Globally enables/disable write-back caching
#define CR0_NW (1 << 29)
/// Globally disable memory caching
#define CR0_CD (1 << 30)
/// Enable paging
#define CR0_PG (1 << 31)
/// Virtual 8086 Mode Extensions
#define CR4_VME (1 << 0)
/// Protected-mode Virtual Interrupts
#define CR4_PVI (1 << 1)
/// Disable Time Stamp Counter register (rdtsc instruction)
#define CR4_TSD (1 << 2)
/// Enable debug extensions
#define CR4_DE (1 << 3)
/// Enable hugepage support
#define CR4_PSE (1 << 4)
/// Enable physical address extension
#define CR4_PAE (1 << 5)
/// Enable machine check exceptions
#define CR4_MCE (1 << 6)
/// Enable global pages
#define CR4_PGE (1 << 7)
/// Enable Performance-Monitoring Counter
#define CR4_PCE (1 << 8)
/// Enable Operating system support for FXSAVE and FXRSTOR instructions
#define CR4_OSFXSR (1 << 9)
/// Enable Operating System Support for Unmasked SIMD Floating-Point Exceptions
#define CR4_OSXMMEXCPT (1 << 10)
/// Enable Virtual Machine Extensions, see Intel VT-x
#define CR4_VMXE (1 << 13)
/// Enable Safer Mode Extensions, see Trusted Execution Technology (TXT)
#define CR4_SMXE (1 << 14)
/// Enables the instructions RDFSBASE, RDGSBASE, WRFSBASE, and WRGSBASE
#define CR4_FSGSBASE (1 << 16)
/// Enables process-context identifiers
#define CR4_PCIDE (1 << 17)
/// Enable XSAVE and Processor Extended States
#define CR4_OSXSAVE (1 << 18)
/// Enable Supervisor Mode Execution Protection
#define CR4_SMEP (1 << 20)
/// Enable Supervisor Mode Access Protection
#define CR4_SMAP (1 << 21)
// x86-64 specific MSRs
/// extended feature register
#define MSR_EFER 0xc0000080
/// legacy mode SYSCALL target
#define MSR_STAR 0xc0000081
/// long mode SYSCALL target
#define MSR_LSTAR 0xc0000082
/// compat mode SYSCALL target
#define MSR_CSTAR 0xc0000083
/// EFLAGS mask for syscall
#define MSR_SYSCALL_MASK 0xc0000084
/// 64bit FS base
#define MSR_FS_BASE 0xc0000100
/// 64bit GS base
#define MSR_GS_BASE 0xc0000101
/// SwapGS GS shadow
#define MSR_KERNEL_GS_BASE 0xc0000102
// MSR EFER bits
#define EFER_SCE (1 << 0)
#define EFER_LME (1 << 8)
#define EFER_LMA (1 << 10)
#define EFER_NXE (1 << 11)
#define EFER_SVME (1 << 12)
#define EFER_LMSLE (1 << 13)
#define EFER_FFXSR (1 << 14)
#define EFER_TCE (1 << 15)
typedef struct {
uint32_t feature1, feature2;
} cpu_info_t;
@ -364,7 +273,7 @@ int ipi_tlb_flush(void);
/** @brief Flush a specific page entry in TLB
* @param addr The (virtual) address of the page to flush
*/
static inline void tlb_flush_one_page(size_t addr)
static inline void tlb_flush_one_page(uint32_t addr)
{
asm volatile("invlpg (%0)" : : "r"(addr) : "memory");
#if MAX_CORES > 1
@ -373,7 +282,7 @@ static inline void tlb_flush_one_page(size_t addr)
* => User-level applications run only on one
* and we didn't flush the TLB of the other cores
*/
if (addr < KERNEL_SPACE)
if (addr <= KERNEL_SPACE)
ipi_tlb_flush();
#endif
}
@ -384,7 +293,7 @@ static inline void tlb_flush_one_page(size_t addr)
*/
static inline void tlb_flush(void)
{
size_t val = read_cr3();
uint32_t val = read_cr3();
if (val)
write_cr3(val);

View file

@ -26,6 +26,13 @@
extern "C" {
#endif
/** @brief Copy a physical page to another physical destination
*
* @param dest Destination address
* @param src Source address
*/
void copy_page_physical(void* dest, const void * src);
#ifdef HAVE_ARCH_MEMCPY
#ifdef CONFIG_ROCKCREEK

View file

@ -97,19 +97,17 @@ static inline int register_task(void)
*
* @return 0 in any case
*/
static inline int jump_to_user_code(size_t ep, size_t stack)
static inline int jump_to_user_code(uint32_t ep, uint32_t stack)
{
asm volatile ("push $0x23; push %0; push $0x1B; push %1" :: "r"(stack), "r"(ep)); // fake stack, see Intel Reference Manual, Vol 1, 6.3.6
#ifdef CONFIG_X86_32
asm volatile ("mov %0, %%ds; mov %0, %%fs; mov %0, %%gs; mov %0, %%es" :: "r"(0x23)); // update segment registers
asm volatile ("lret" ::: "cc"); // far return to user level code
#elif defined (CONFIG_X86_64)
asm volatile ("lretq" ::: "cc"); // far return to user level code
#endif
asm volatile ("mov %0, %%ds; mov %0, %%fs; mov %0, %%gs; mov %0, %%es" :: "r"(0x23));
asm volatile ("push $0x23; push %0; push $0x1B; push %1" :: "r"(stack), "r"(ep));
asm volatile ("lret" ::: "cc");
return 0;
#else
return -22;
#endif
}
#ifdef __cplusplus

View file

@ -387,14 +387,12 @@ void smp_start(uint32_t id)
kprintf("Application processor %d is entering its idle task\n", apic_cpu_id());
#ifdef CONFIG_X86_32
// initialization for x86_64 is done in smp_entry()
// initialize default cpu features
cpu_init();
#endif
// use the same gdt like the boot processors
gdt_flush();
// install IDT
idt_install();

View file

@ -29,6 +29,7 @@
SECTION .mboot
global start
start:
mov byte [msg], 'H'
jmp stublet
; This part MUST be 4byte aligned, so we solve that issue using 'ALIGN 4'
@ -37,10 +38,10 @@ mboot:
; Multiboot macros to make a few lines more readable later
MULTIBOOT_PAGE_ALIGN equ 1<<0
MULTIBOOT_MEMORY_INFO equ 1<<1
; MULTIBOOT_AOUT_KLUDGE equ 1<<16
; MULTIBOOT_AOUT_KLUDGE equ 1<<16
MULTIBOOT_HEADER_MAGIC equ 0x1BADB002
MULTIBOOT_HEADER_FLAGS equ MULTIBOOT_PAGE_ALIGN | MULTIBOOT_MEMORY_INFO ; | MULTIBOOT_AOUT_KLUDGE
MULTIBOOT_CHECKSUM equ -(MULTIBOOT_HEADER_MAGIC + MULTIBOOT_HEADER_FLAGS)
MULTIBOOT_CHECKSUM equ -(MULTIBOOT_HEADER_MAGIC + MULTIBOOT_HEADER_FLAGS)
EXTERN code, bss, end
; This is the GRUB Multiboot header. A boot signature
@ -48,6 +49,8 @@ mboot:
dd MULTIBOOT_HEADER_FLAGS
dd MULTIBOOT_CHECKSUM
msg db "?ello from MetalSVM kernel!!", 0
SECTION .text
ALIGN 4
stublet:
@ -67,7 +70,7 @@ stublet:
; jump to the boot processors's C code
extern main
call main
jmp $ ; infinitive loop
jmp $
global cpu_init
cpu_init:
@ -109,7 +112,7 @@ global read_ip
read_ip:
mov eax, [esp+4]
pop DWORD [eax] ; Get the return address
add esp, 4 ; Dirty Hack! read_ip cleanup the stack
add esp, 4 ; Dirty Hack! read_ip cleanup the stacl
jmp [eax] ; Return. Can't use RET because return
; address popped off the stack.

View file

@ -30,7 +30,7 @@ extern kernel_end
extern apic_mp
; We use a special name to map this section at the begin of our kernel
; => Multiboot needs its magic number at the beginning of the kernel
; => Multiboot needs its magic number at the begin of the kernel
SECTION .mboot
global start
start:
@ -42,19 +42,19 @@ mboot:
; Multiboot macros to make a few lines more readable later
MULTIBOOT_PAGE_ALIGN equ 1<<0
MULTIBOOT_MEMORY_INFO equ 1<<1
; MULTIBOOT_AOUT_KLUDGE equ 1<<16
; MULTIBOOT_AOUT_KLUDGE equ 1<<16
MULTIBOOT_HEADER_MAGIC equ 0x1BADB002
MULTIBOOT_HEADER_FLAGS equ MULTIBOOT_PAGE_ALIGN | MULTIBOOT_MEMORY_INFO ; | MULTIBOOT_AOUT_KLUDGE
MULTIBOOT_CHECKSUM equ -(MULTIBOOT_HEADER_MAGIC + MULTIBOOT_HEADER_FLAGS)
MULTIBOOT_CHECKSUM equ -(MULTIBOOT_HEADER_MAGIC + MULTIBOOT_HEADER_FLAGS)
EXTERN code, bss, end
; This is the GRUB Multiboot header. A boot signature
dd MULTIBOOT_HEADER_MAGIC
dd MULTIBOOT_HEADER_FLAGS
dd MULTIBOOT_CHECKSUM
ALIGN 4
; we need already a valid GDT to switch in the 64bit mode
; we need already a valid GDT to switch in the 64bit modus
GDT64: ; Global Descriptor Table (64-bit).
.Null: equ $ - GDT64 ; The null descriptor.
dw 0 ; Limit (low).
@ -81,90 +81,112 @@ GDT64: ; Global Descriptor Table (64-bit).
dw $ - GDT64 - 1 ; Limit.
dq GDT64 ; Base.
times 256 DD 0 ; Stack for booting
times 256 DD 0
startup_stack:
SECTION .data
; Create default page tables for the 64bit kernel
global boot_pml4
; create default page tables for the 64bit kernel
global boot_pgd ; aka PML4
ALIGN 4096 ; of course, the page tables have to be page aligned
PAGE_MAP_ENTRIES equ (1<<9)
PAGE_SIZE equ (1<<12)
boot_pml4 times PAGE_MAP_ENTRIES DQ 0
boot_pdpt times PAGE_MAP_ENTRIES DQ 0
boot_pgd times PAGE_MAP_ENTRIES DQ 0
boot_pgt times (KERNEL_SPACE/PAGE_SIZE) DQ 0
NOPTS equ 512
boot_pgd times 512 DQ 0
boot_pdpt times 512 DQ 0
boot_pd times 512 DQ 0
boot_pt times (NOPTS*512) DQ 0
SECTION .text
ALIGN 8
%if MAX_CORES > 1
global smp_entry
smp_entry:
; Initialize cpu features
call cpu_init
; Initialize cr3 register
mov edi, boot_pml4
; enable caching, disable paging and fpu emulation
and eax, 0x1ffffffb
; ...and turn on FPU exceptions
or eax, 0x22
mov cr0, eax
; clears the current pgd entry
xor eax, eax
mov cr3, eax
; at this stage, we disable the SSE support
mov eax, cr4
and eax, 0xfffbf9ff
mov cr4, eax
; initialize page table
mov edi, boot_pgd
mov cr3, edi
; Enable PAE
; we need to enable PAE modus
mov eax, cr4
or eax, 1 << 5
mov cr4, eax
; Enable longmode (compatibility mode)
; switch to the compatibility mode (which is part of long mode)
mov ecx, 0xC0000080
rdmsr
or eax, 1 << 8
wrmsr
; Enable paging
; enable paging
mov eax, cr0
or eax, 1 << 31 | 1 << 0 ; Set the PG-bit, which is the 31nd bit, and the PE-bit, which is the 0th bit.
mov cr0, eax ; According to the multiboot spec the PE-bit has to be set by bootloader already!
or eax, 1 << 31 | 1 << 0 ; Set the PG-bit, which is the 31nd bit, and the PM-bit, which is the 0th bit.
mov cr0, eax
; Jump to 64-bit longmode
mov edi, [esp+4] ; Set argumet for smp_start
mov edi, [esp+4] ; set argumet for smp_start
lgdt [GDT64.Pointer] ; Load the 64-bit global descriptor table.
jmp GDT64.Code:smp_start64 ; Set the code segment and enter 64-bit long mode.
jmp $ ; endless loop
%endif
; Search MP Floating Pointer Structure
search_mps:
search_apic:
push ebp
mov ebp, esp
push ecx
xor eax, eax
mov ecx, [ebp+8]
.l1:
L1:
cmp [ecx], DWORD 0x5f504d5f ; MP_FLT_SIGNATURE
jne .l2
jne L2
mov al, BYTE [ecx+9]
cmp eax, 4
ja .l2
ja L2
mov al, BYTE [ecx+11]
cmp eax, 0
jne .l2
jne L2
mov eax, ecx
jmp .l3
jmp L3
.l2:
L2:
add ecx, 4
cmp ecx, [ebp+12]
jb .l1
jb L1
xor eax, eax
.l3:
L3:
pop ecx
pop ebp
ret
check_longmode:
; Check for cpuid instruction
ALIGN 4
stublet:
mov esp, startup_stack-4
push ebx ; save pointer to the multiboot structure
mov eax, cr0
; enable caching, disable paging and fpu emulation
and eax, 0x1ffffffb
; ...and turn on FPU exceptions
or eax, 0x22
mov cr0, eax
; clears the current pgd entry
xor eax, eax
mov cr3, eax
; at this stage, we disable the SSE support
mov eax, cr4
and eax, 0xfffbf9ff
mov cr4, eax
; do we have the instruction cpuid?
pushfd
pop eax
mov ecx, eax
@ -176,22 +198,59 @@ check_longmode:
push ecx
popfd
xor eax, ecx
jz .unsupported
; Check for extended cpu features (cpuid > 0x80000000)
jz Linvalid
; cpuid > 0x80000000?
mov eax, 0x80000000
cpuid
cmp eax, 0x80000001
jb .unsupported ; It is less, there is no long mode.
; Check if longmode is supported
jb Linvalid ; It is less, there is no long mode.
; do we have a long mode?
mov eax, 0x80000001
cpuid
test edx, 1 << 29 ; Test if the LM-bit, which is bit 29, is set in the D-register.
jz .unsupported ; They aren't, there is no long mode.
ret
.unsupported:
jmp $
jz Linvalid ; They aren't, there is no long mode.
check_lapic:
; initialize page table
mov edi, boot_pgd
mov cr3, edi
; So lets make PML4T[0] point to the PDPT and so on:
mov DWORD [edi], boot_pdpt ; Set the double word at the destination index to pdpt.
or DWORD [edi], 0x00000003 ; Set present and writeable bit
mov edi, boot_pdpt
mov DWORD [edi], boot_pd ; Set the double word at the destination index to pd.
or DWORD [edi], 0x00000003 ; Set present and writeable bit
mov edi, boot_pd
mov ebx, boot_pt
mov ecx, NOPTS
L0:
mov DWORD [edi], ebx ; Set the double word at the destination index to pt.
or DWORD [edi], 0x00000003 ; Set present and writeable bit
add edi, 8
add ebx, 0x1000
loop L0
%ifdef CONFIG_VGA
; map the VGA address into the virtual address space
mov edi, 0xB8000
shr edi, 9 ; (edi >> 12) * 8
add edi, boot_pt
mov ebx, 0xB8000
or ebx, 0x00000013
mov DWORD [edi], ebx
%endif
; map multiboot structure into the virtual address space
mov edi, [esp]
and edi, 0xFFFFF000
shr edi, 9 ; (edi >> 12) * 8
add edi, boot_pt
mov ebx, [esp]
and ebx, 0xFFFFF000
or ebx, 0x00000003
mov DWORD [edi], ebx
; check if lapic is available
push eax
push ebx
push ecx
@ -200,186 +259,123 @@ check_lapic:
cpuid
and edx, 0x200
cmp edx, 0
je .unsupported
; Map lapic at 0xFEE00000 below the kernel
je no_lapic
; map lapic at 0xFEE00000 below the kernel
mov edi, kernel_start - 0x1000
shr edi, 9 ; (edi >> 12) * 8
add edi, boot_pgt
mov ebx, 0xFEE00000 ; LAPIC base address
add edi, boot_pt
mov ebx, 0xFEE00000
or ebx, 0x00000013
mov DWORD [edi], ebx
.unsupported:
no_lapic:
pop edx
pop ecx
pop ebx
pop eax
ret
cpu_init:
mov eax, cr0
; Enable caching, disable paging and fpu emulation
and eax, 0x1ffffffb
; ...and turn on FPU exceptions
or eax, 0x22
mov cr0, eax
; Clear the current pgd entry
xor eax, eax
mov cr3, eax
; at this stage, we disable the SSE support
mov eax, cr4
and eax, 0xfffbf9ff
mov cr4, eax
ret
; Identity map a single page at address eax
identity_page:
push edi
push ebx
mov edi, eax
and edi, 0xFFFFF000 ; page align in lower half
shr edi, 9 ; (edi >> 12) * 8 (index for boot_pgt)
add edi, boot_pgt
mov ebx, eax
and ebx, 0xFFFFF000 ; page align lower half
or ebx, 0x113 ; set present, global, writable and cache disable bits
mov DWORD [edi], ebx
mov DWORD [edi+4], 0x80000000 ; set execution disable bit in higher half
pop ebx
pop edi
ret
ALIGN 4
stublet:
mov esp, startup_stack-4
; Save pointer to the Multiboot structure
push ebx
; Initialize cpu features
call cpu_init
; Check if longmode is supported
call check_longmode
; Check if lapic is available
call check_lapic
; Find MP Floating Pointer structure
; search APIC
push DWORD 0x100000
push DWORD 0xF0000
call search_mps
call search_apic
add esp, 8
cmp eax, 0
jne map_mps
jne La
push DWORD 0xA0000
push DWORD 0x9F000
call search_mps
call search_apic
add esp, 8
cmp eax, 0
je map_kernel
je Lb
map_mps:
; Map MP Floating Pointer structure
La:
; map MP Floating Pointer Structure
mov DWORD [apic_mp], eax
call identity_page
mov edi, eax
and edi, 0xFFFFF000
shr edi, 9 ; (edi >> 12) * 8
add edi, boot_pt
mov ebx, eax
and ebx, 0xFFFFF000
or ebx, 0x00000013
mov DWORD [edi], ebx
; Map MP Configuration table
mov eax, [eax+4] ; Offset for physical address of MP table
call identity_page
; map mp_config
mov edi, [eax+4]
and edi, 0xFFFFF000
shr edi, 9 ; (edi >> 12) * 8
add edi, boot_pt
mov ebx, [eax+4]
and ebx, 0xFFFFF000
or ebx, 0x00000013
mov DWORD [edi], ebx
%ifdef CONFIG_VGA
; Map VGA textmode plane
mov eax, 0xB8000
call identity_page
%endif
; Map Multiboot structure
mov eax, [esp] ; Pointer is still on the stack
call identity_page
map_kernel:
Lb:
mov edi, kernel_start
shr edi, 9 ; (edi >> 12) * 8 (index for boot_pgt)
add edi, boot_pgt
shr edi, 9 ; (kernel_start >> 12) * 8
add edi, boot_pt
mov ebx, kernel_start
or ebx, 0x103 ; set present, global and writable flags
or ebx, 0x00000003
mov ecx, kernel_end ; determine kernel size in number of pages
sub ecx, kernel_start
shr ecx, 12
inc ecx
.l1:
mov DWORD [edi], ebx
Lc:
mov DWORD [edi], ebx ; Set the double word at the destination index to the B-register.
add edi, 8
add ebx, 0x1000
loop .l1
loop Lc
init_paging:
mov edi, boot_pml4
mov cr3, edi
mov DWORD [edi], boot_pdpt
or DWORD [edi], 0x03 ; Set present and writable flags
mov edi, boot_pdpt
mov DWORD [edi], boot_pgd
or DWORD [edi], 0x03 ; Set present and writable flags
mov edi, boot_pgd
mov ebx, boot_pgt
mov ecx, PAGE_MAP_ENTRIES ; Map all boot_pgt to the kernel space
.l1:
mov DWORD [edi], ebx
or DWORD [edi], 0x03 ; Set present and writable flags
add edi, 8
add ebx, 0x1000
loop .l1
; Enable PAE
; we need to enable PAE modus
mov eax, cr4
or eax, 1 << 5
mov cr4, eax
; Enable longmode (compatibility mode)
; switch to the compatibility mode (which is part of long mode)
mov ecx, 0xC0000080
rdmsr
or eax, 1 << 8 ; IA32_EFER.LME = 1
or eax, 1 << 8
wrmsr
; Enable paging
; enable paging
mov eax, cr0
or eax, 1 << 31 | 1 << 0 ; Set the PG-bit, which is the 31nd bit, and the PE-bit, which is the 0th bit.
or eax, 1 << 31 | 1 << 0 ; Set the PG-bit, which is the 31nd bit, and the PM-bit, which is the 0th bit.
mov cr0, eax
; Jump to 64-bit longmode
pop ebx ; Restore pointer to multiboot structure
lgdt [GDT64.Pointer] ; Load the 64-bit global descriptor table.
jmp GDT64.Code:start64 ; Set the code segment and enter 64-bit long mode.
pop ebx ; restore pointer to multiboot structure
lgdt [GDT64.Pointer] ; Load the 64-bit global descriptor table.
jmp GDT64.Code:start64 ; Set the code segment and enter 64-bit long mode.
Linvalid:
jmp $
[BITS 64]
start64:
; Initialize segment registers
; initialize segment registers
mov ax, GDT64.Data
mov ds, ax
mov es, ax
mov fs, ax
mov gs, ax
mov ss, ax
; Set default stack pointer
; set default stack pointer
mov rsp, boot_stack
add rsp, KERNEL_STACK_SIZE-16
; Interpret multiboot information
; interpret multiboot information
extern multiboot_init
mov rdi, rbx
call multiboot_init
; Jump to the boot processors's C code
; jump to the boot processors's C code
extern main
call main
jmp $
%if MAX_CORES > 1
smp_start64:
; Initialize segment registers
; initialize segment registers
mov ax, GDT64.Data
mov ds, ax
mov es, ax
@ -387,12 +383,29 @@ smp_start64:
mov gs, ax
mov ss, ax
; Jump to the boot processors's C code
; jump to the boot processors's C code
extern smp_start
call smp_start
jmp $
%endif
global cpu_init
cpu_init:
; mov eax, cr0
; enable caching, disable paging and fpu emulation
; and eax, 0x1ffffffb
; ...and turn on FPU exceptions
; or eax, 0x22
; mov cr0, eax
; clears the current pgd entry
; xor eax, eax
; mov cr3, eax
; at this stage, we disable the SSE support
; mov eax, cr4
; and eax, 0xfffbf9ff
; mov cr4, eax
; ret
; This will set up our new segment registers and is declared in
; C as 'extern void gdt_flush();'
global gdt_flush
@ -729,41 +742,41 @@ extern syscall_handler
; used to realize system calls
isrsyscall:
cli ; disable interrupts during prologue
; save caller saved registers
push r15
push r14
push r13
push r12
push r11
push r10
push r9
push r8
push rdi
push rsi
push rbp
push rsp
push rbx
push rdx
push rcx
push rax
; set kernel data segmenets
mov ax, 0x10
mov ds, ax
; x86-64 ABI calling convention
mov r8, rbx
mov r9, rax
mov rax, 0 ; we've not used vector registers for this va_arg call
sti ; enable interrupts during syscall
mov rdi, rsp
call syscall_handler
cli ; disable interrupts during prologue
; restore caller saved registers
pop rax
pop rcx
pop rdx
pop rbx
add rsp, 8
pop rbp
pop rsi
pop rdi
pop r8
pop r9
pop r10
pop r11
pop r12
pop r13
pop r14
iretq
global irq0

View file

@ -50,7 +50,7 @@ size_t* get_current_stack(void)
#endif
// use new page table
write_cr3(virt_to_phys((size_t)curr_task->page_map));
write_cr3(virt_to_phys((size_t)curr_task->pgd));
return curr_task->last_stack_pointer;
}

View file

@ -208,18 +208,13 @@ static const char *exception_messages[] = {
static void fault_handler(struct state *s)
{
if (s->int_no < 32) {
task_t* task = per_core(current_task);
kputs(exception_messages[s->int_no]);
#ifdef CONFIG_X86_32
kprintf("%s Exception (%d) at cs:eip = %#x:%#lx, core = %u, task = %u, error = %#x\n",
"Register state: eflags = %#lx, eax = %#lx, ebx = %#lx, ecx = %#lx, edx = %#lx, edi = %#lx, esi = %#lx, ebp = %#llx, esp = %#lx\n",
exception_messages[s->int_no], s->int_no, s->cs, s->eip, CORE_ID, task->id, s->error,
s->eflags, s->eax, s->ebx, s->ecx, s->edx, s->edi, s->esi, s->ebp, s->esp);
kprintf(" Exception (%d) at 0x%x:0x%x on core %u, error code 0x%x, eflags 0x%x\n",
s->int_no, s->cs, s->eip, CORE_ID, s->error, s->eflags);
#elif defined(CONFIG_X86_64)
kprintf("%s Exception (%d) at cs:rip = %#x:%#lx, core = %u, task = %u, error = %#lx\n"
"Register state: rflags = %#lx, rax = %#lx, rbx = %#lx, rcx = %#lx, rdx = %#lx, rdi = %#lx, rsi = %#lx, rbp = %#llx, rsp = %#lx\n",
exception_messages[s->int_no], s->int_no, s->cs, s->rip, CORE_ID, task->id, s->error,
s->rflags, s->rax, s->rbx, s->rcx, s->rdx, s->rdi, s->rsi, s->rbp, s->rsp);
kprintf(" Exception (%d) at 0x%llx:0x%llx on core %u, error code 0x%llx, rflags 0x%llx\n",
s->int_no, s->cs, s->rip, CORE_ID, s->error, s->rflags);
#endif
/* Now, we signalize that we have handled the interrupt */

View file

@ -37,7 +37,7 @@ void kb_init(size_t size, tid_t tid) {
}
void kb_finish(void) {
kfree(kb_buffer.buffer);
kfree(kb_buffer.buffer, (kb_buffer.maxsize * sizeof(char)));
kb_buffer.buffer = NULL;
kb_buffer.size = 0;
kb_buffer.maxsize = 0;

View file

@ -42,4 +42,38 @@ L3:
pop rax
ret
%if 0
; The following function is derived from JamesM's kernel development tutorials
; (http://www.jamesmolloy.co.uk/tutorial_html/)
global copy_page_physical
copy_page_physical:
push esi ; According to __cdecl, we must preserve the contents of ESI
push edi ; and EDI.
pushf ; push EFLAGS, so we can pop it and reenable interrupts
; later, if they were enabled anyway.
cli ; Disable interrupts, so we aren't interrupted.
; Load these in BEFORE we disable paging!
mov edi, [esp+12+4] ; Destination address
mov esi, [esp+12+8] ; Source address
mov edx, cr0 ; Get the control register...
and edx, 0x7fffffff ; and...
mov cr0, edx ; Disable paging.
cld
mov ecx, 0x400 ; 1024*4bytes = 4096 bytes = page size
rep movsd ; copy page
mov edx, cr0 ; Get the control register again
or edx, 0x80000000 ; and...
mov cr0, edx ; Enable paging.
popf ; Pop EFLAGS back.
pop edi ; Get the original value of EDI
pop esi ; and ESI back.
ret
%endif
SECTION .note.GNU-stack noalloc noexec nowrite progbits

View file

@ -46,7 +46,7 @@
* 0x00100000 - 0x0DEADFFF: Kernel (size depends on the configuration) (221MB)
* 0x0DEAE000 - 0x3FFFEFFF: Kernel heap (801MB)
* 0x3FFFF000 - 0x3FFFFFFF: Page Tables are mapped in this region (4KB)
* (The first 256 entries belongs to kernel space)
* (The last 256 entries belongs to kernel space)
*/
/*
@ -57,14 +57,13 @@ extern const void kernel_start;
extern const void kernel_end;
// boot task's page directory and page directory lock
static page_map_t boot_pgd = {{[0 ... MAP_ENTRIES-1] = 0}};
static page_map_t boot_pgt[KERNEL_SPACE/(MAP_ENTRIES*PAGE_SIZE)];
static page_map_t pgt_container = {{[0 ... MAP_ENTRIES-1] = 0}};
static page_dir_t boot_pgd = {{[0 ... PGT_ENTRIES-1] = 0}};
static page_table_t pgt_container = {{[0 ... PGT_ENTRIES-1] = 0}};
static page_table_t boot_pgt[KERNEL_SPACE/(1024*PAGE_SIZE)];
static spinlock_t kslock = SPINLOCK_INIT;
static int paging_enabled = 0;
page_map_t* get_boot_page_map(void)
page_dir_t* get_boot_pgd(void)
{
return &boot_pgd;
}
@ -72,26 +71,26 @@ page_map_t* get_boot_page_map(void)
/*
* TODO: We create a full copy of the current task. Copy-On-Access will be the better solution.
*
* No PGD locking is needed because only create_page_map use this function and holds already the
* No PGD locking is needed because onls create_pgd use this function and holds already the
* PGD lock.
*/
inline static size_t copy_page_table(task_t* task, uint32_t pgd_index, page_map_t* pgt, int* counter)
inline static size_t copy_page_table(task_t* task, uint32_t pgd_index, page_table_t* pgt, int* counter)
{
uint32_t i;
page_map_t* new_pgt;
page_table_t* new_pgt;
size_t phyaddr;
if (BUILTIN_EXPECT(!pgt, 0))
return 0;
new_pgt = kmalloc(sizeof(page_map_t));
new_pgt = kmalloc(sizeof(page_table_t));
if (!new_pgt)
return 0;
memset(new_pgt, 0x00, sizeof(page_map_t));
memset(new_pgt, 0x00, sizeof(page_table_t));
if (counter)
(*counter)++;
for(i=0; i<MAP_ENTRIES; i++) {
for(i=0; i<PGT_ENTRIES; i++) {
if (pgt->entries[i] & PAGE_MASK) {
if (!(pgt->entries[i] & PG_USER)) {
// Kernel page => copy only page entries
@ -118,11 +117,11 @@ inline static size_t copy_page_table(task_t* task, uint32_t pgd_index, page_map_
return phyaddr;
}
int create_page_map(task_t* task, int copy)
int create_pgd(task_t* task, int copy)
{
page_map_t* pgd;
page_map_t* pgt;
page_map_t* pgt_container;
page_dir_t* pgd;
page_table_t* pgt;
page_table_t* pgt_container;
uint32_t i;
uint32_t index1, index2;
size_t viraddr, phyaddr;
@ -134,26 +133,25 @@ int create_page_map(task_t* task, int copy)
// we already know the virtual address of the "page table container"
// (see file header)
pgt_container = (page_map_t*) ((KERNEL_SPACE - PAGE_SIZE) & PAGE_MASK);
pgt_container = (page_table_t*) ((KERNEL_SPACE - PAGE_SIZE) & PAGE_MASK);
// create new page directory for the new task
pgd = kmalloc(sizeof(page_map_t));
pgd = kmalloc(sizeof(page_dir_t));
if (!pgd)
return -ENOMEM;
memset(pgd, 0x00, sizeof(page_map_t));
memset(pgd, 0x00, sizeof(page_dir_t));
// create a new "page table container" for the new task
pgt = kmalloc(sizeof(page_map_t));
pgt = kmalloc(sizeof(page_table_t));
if (!pgt) {
kfree(pgd, sizeof(page_map_t));
kfree(pgd, sizeof(page_dir_t));
return -ENOMEM;
}
memset(pgt, 0x00, sizeof(page_map_t));
memset(pgt, 0x00, sizeof(page_table_t));
// copy kernel tables
spinlock_lock(&kslock);
for(i=0; i<MAP_ENTRIES; i++) {
for(i=0; i<PGT_ENTRIES; i++) {
pgd->entries[i] = boot_pgd.entries[i];
// only kernel entries will be copied
if (pgd->entries[i] && !(pgd->entries[i] & PG_USER))
@ -171,33 +169,36 @@ int create_page_map(task_t* task, int copy)
pgd->entries[index1] = ((size_t) virt_to_phys((size_t) pgt) & PAGE_MASK)|KERN_TABLE;
pgt->entries[index2] = ((size_t) virt_to_phys((size_t) pgt) & PAGE_MASK)|KERN_PAGE;
task->page_map = pgd;
task->pgd = pgd;
if (copy) {
spinlock_irqsave_lock(&curr_task->page_lock);
spinlock_irqsave_lock(&curr_task->pgd_lock);
for (i=KERNEL_SPACE/(1024*PAGE_SIZE); i<1024; i++) {
if (!(curr_task->page_map->entries[i]))
if (!(curr_task->pgd->entries[i]))
continue;
if (!(curr_task->page_map->entries[i] & PG_USER))
if (!(curr_task->pgd->entries[i] & PG_USER))
continue;
phyaddr = copy_page_table(task, i, (page_map_t*) ((KERNEL_SPACE - 1024*PAGE_SIZE + i*PAGE_SIZE) & PAGE_MASK), &counter);
phyaddr = copy_page_table(task, i, (page_table_t*) ((KERNEL_SPACE - 1024*PAGE_SIZE + i*PAGE_SIZE) & PAGE_MASK), &counter);
if (phyaddr) {
pgd->entries[i] = (phyaddr & PAGE_MASK) | (curr_task->page_map->entries[i] & 0xFFF);
pgd->entries[i] = (phyaddr & PAGE_MASK) | (curr_task->pgd->entries[i] & 0xFFF);
pgt->entries[i] = (phyaddr & PAGE_MASK) | KERN_PAGE;
}
}
spinlock_irqsave_unlock(&curr_task->page_lock);
spinlock_irqsave_unlock(&curr_task->pgd_lock);
}
return counter;
}
int drop_page_map(void)
/*
* drops all page frames and the PGD of a user task
*/
int drop_pgd(void)
{
page_map_t* pgd = per_core(current_task)->page_map;
page_dir_t* pgd = per_core(current_task)->pgd;
size_t phy_pgd = virt_to_phys((size_t) pgd);
task_t* task = per_core(current_task);
uint32_t i;
@ -205,9 +206,9 @@ int drop_page_map(void)
if (BUILTIN_EXPECT(pgd == &boot_pgd, 0))
return -EINVAL;
spinlock_irqsave_lock(&task->page_lock);
spinlock_irqsave_lock(&task->pgd_lock);
for(i=0; i<MAP_ENTRIES; i++) {
for(i=0; i<PGT_ENTRIES; i++) {
if (pgd->entries[i] & PG_USER) {
put_page(pgd->entries[i] & PAGE_MASK);
pgd->entries[i] = 0;
@ -217,9 +218,9 @@ int drop_page_map(void)
// freeing the page directory
put_page(phy_pgd);
task->page_map = NULL;
task->pgd = NULL;
spinlock_irqsave_unlock(&task->page_lock);
spinlock_irqsave_unlock(&task->pgd_lock);
return 0;
}
@ -228,24 +229,24 @@ size_t virt_to_phys(size_t viraddr)
{
task_t* task = per_core(current_task);
uint32_t index1, index2;
page_map_t* pgt;
page_table_t* pgt;
size_t ret = 0;
if (!paging_enabled)
return viraddr;
if (BUILTIN_EXPECT(!task || !task->page_map, 0))
if (BUILTIN_EXPECT(!task || !task->pgd, 0))
return 0;
spinlock_irqsave_lock(&task->page_lock);
spinlock_irqsave_lock(&task->pgd_lock);
index1 = viraddr >> 22;
index2 = (viraddr >> 12) & 0x3FF;
if (!(task->page_map->entries[index1] & PAGE_MASK))
if (!(task->pgd->entries[index1] & PAGE_MASK))
goto out;
pgt = (page_map_t*) ((KERNEL_SPACE - 1024*PAGE_SIZE + index1*PAGE_SIZE) & PAGE_MASK);
pgt = (page_table_t*) ((KERNEL_SPACE - 1024*PAGE_SIZE + index1*PAGE_SIZE) & PAGE_MASK);
if (!pgt || !(pgt->entries[index2]))
goto out;
@ -254,7 +255,7 @@ size_t virt_to_phys(size_t viraddr)
out:
//kprintf("vir %p to phy %p\n", viraddr, ret);
spinlock_irqsave_unlock(&task->page_lock);
spinlock_irqsave_unlock(&task->pgd_lock);
return ret;
}
@ -262,11 +263,11 @@ out:
size_t map_region(size_t viraddr, size_t phyaddr, uint32_t npages, uint32_t flags)
{
task_t* task = per_core(current_task);
page_map_t* pgt;
page_table_t* pgt;
size_t index, i;
size_t ret;
if (BUILTIN_EXPECT(!task || !task->page_map, 0))
if (BUILTIN_EXPECT(!task || !task->pgd, 0))
return 0;
if (BUILTIN_EXPECT(!paging_enabled && (viraddr != phyaddr), 0))
@ -275,7 +276,7 @@ size_t map_region(size_t viraddr, size_t phyaddr, uint32_t npages, uint32_t flag
if (flags & MAP_KERNEL_SPACE)
spinlock_lock(&kslock);
else
spinlock_irqsave_lock(&task->page_lock);
spinlock_irqsave_lock(&task->pgd_lock);
if (!viraddr) {
viraddr = vm_alloc(npages, flags);
@ -291,10 +292,10 @@ size_t map_region(size_t viraddr, size_t phyaddr, uint32_t npages, uint32_t flag
for(i=0; i<npages; i++, viraddr+=PAGE_SIZE, phyaddr+=PAGE_SIZE) {
index = viraddr >> 22;
if (!(task->page_map->entries[index])) {
page_map_t* pgt_container;
if (!(task->pgd->entries[index])) {
page_table_t* pgt_container;
pgt = (page_map_t*) get_page();
pgt = (page_table_t*) get_pages(1);
if (BUILTIN_EXPECT(!pgt, 0)) {
kputs("map_address: out of memory\n");
ret = 0;
@ -303,17 +304,17 @@ size_t map_region(size_t viraddr, size_t phyaddr, uint32_t npages, uint32_t flag
// set the new page table into the directory
if (flags & MAP_USER_SPACE)
task->page_map->entries[index] = (uint32_t)pgt|USER_TABLE;
task->pgd->entries[index] = (uint32_t)pgt|USER_TABLE;
else
task->page_map->entries[index] = (uint32_t)pgt|KERN_TABLE;
task->pgd->entries[index] = (uint32_t)pgt|KERN_TABLE;
// if paging is already enabled, we need to use the virtual address
if (paging_enabled)
// we already know the virtual address of the "page table container"
// (see file header)
pgt_container = (page_map_t*) ((KERNEL_SPACE - PAGE_SIZE) & PAGE_MASK);
pgt_container = (page_table_t*) ((KERNEL_SPACE - PAGE_SIZE) & PAGE_MASK);
else
pgt_container = (page_map_t*) (task->page_map->entries[(KERNEL_SPACE - PAGE_SIZE) >> 22] & PAGE_MASK);
pgt_container = (page_table_t*) (task->pgd->entries[(KERNEL_SPACE - PAGE_SIZE) >> 22] & PAGE_MASK);
if (BUILTIN_EXPECT(!pgt_container, 0)) {
kputs("map_address: internal error\n");
@ -329,11 +330,11 @@ size_t map_region(size_t viraddr, size_t phyaddr, uint32_t npages, uint32_t flag
memset((void*) ((KERNEL_SPACE - 1024*PAGE_SIZE + index*PAGE_SIZE) & PAGE_MASK), 0x00, PAGE_SIZE);
else
memset(pgt, 0x00, PAGE_SIZE);
} else pgt = (page_map_t*) (task->page_map->entries[index] & PAGE_MASK);
} else pgt = (page_table_t*) (task->pgd->entries[index] & PAGE_MASK);
/* convert physical address to virtual */
if (paging_enabled)
pgt = (page_map_t*) ((KERNEL_SPACE - 1024*PAGE_SIZE + index*PAGE_SIZE) & PAGE_MASK);
pgt = (page_table_t*) ((KERNEL_SPACE - 1024*PAGE_SIZE + index*PAGE_SIZE) & PAGE_MASK);
index = (viraddr >> 12) & 0x3FF;
if (pgt->entries[index] && !(flags & MAP_REMAP)) {
@ -381,7 +382,7 @@ out:
if (flags & MAP_KERNEL_SPACE)
spinlock_unlock(&kslock);
else
spinlock_irqsave_unlock(&task->page_lock);
spinlock_irqsave_unlock(&task->pgd_lock);
return ret;
}
@ -391,18 +392,18 @@ int change_page_permissions(size_t start, size_t end, uint32_t flags)
uint32_t index1, index2, newflags;
size_t viraddr = start & 0xFFFFF000;
size_t phyaddr;
page_map_t* pgt;
page_map_t* pgd;
page_table_t* pgt;
page_dir_t* pgd;
task_t* task = per_core(current_task);
if (BUILTIN_EXPECT(!paging_enabled, 0))
return -EINVAL;
pgd = per_core(current_task)->page_map;
pgd = per_core(current_task)->pgd;
if (BUILTIN_EXPECT(!pgd, 0))
return -EINVAL;
spinlock_irqsave_lock(&task->page_lock);
spinlock_irqsave_lock(&task->pgd_lock);
while (viraddr < end)
{
@ -410,7 +411,7 @@ int change_page_permissions(size_t start, size_t end, uint32_t flags)
index2 = (viraddr >> 12) & 0x3FF;
while ((viraddr < end) && (index2 < 1024)) {
pgt = (page_map_t*) (page_map_t*) ((KERNEL_SPACE - 1024*PAGE_SIZE + index1*PAGE_SIZE) & PAGE_MASK);
pgt = (page_table_t*) (page_table_t*) ((KERNEL_SPACE - 1024*PAGE_SIZE + index1*PAGE_SIZE) & PAGE_MASK);
if (pgt && pgt->entries[index2]) {
phyaddr = pgt->entries[index2] & PAGE_MASK;
newflags = pgt->entries[index2] & 0xFFF; // get old flags
@ -447,7 +448,7 @@ int change_page_permissions(size_t start, size_t end, uint32_t flags)
}
}
spinlock_irqsave_unlock(&task->page_lock);
spinlock_irqsave_unlock(&task->pgd_lock);
return 0;
}
@ -463,9 +464,9 @@ size_t vm_alloc(uint32_t npages, uint32_t flags)
uint32_t index1, index2, j;
size_t viraddr, i, ret = 0;
size_t start, end;
page_map_t* pgt;
page_table_t* pgt;
if (BUILTIN_EXPECT(!task || !task->page_map || !paging_enabled, 0))
if (BUILTIN_EXPECT(!task || !task->pgd || !paging_enabled, 0))
return 0;
if (flags & MAP_KERNEL_SPACE) {
@ -482,7 +483,7 @@ size_t vm_alloc(uint32_t npages, uint32_t flags)
if (flags & MAP_KERNEL_SPACE)
spinlock_lock(&kslock);
else
spinlock_irqsave_lock(&task->page_lock);
spinlock_irqsave_lock(&task->pgd_lock);
viraddr = i = start;
j = 0;
@ -490,7 +491,7 @@ size_t vm_alloc(uint32_t npages, uint32_t flags)
index1 = i >> 22;
index2 = (i >> 12) & 0x3FF;
pgt = (page_map_t*) ((KERNEL_SPACE - 1024*PAGE_SIZE + index1*PAGE_SIZE) & PAGE_MASK);
pgt = (page_table_t*) ((KERNEL_SPACE - 1024*PAGE_SIZE + index1*PAGE_SIZE) & PAGE_MASK);
if (!pgt || !(pgt->entries[index2])) {
i+=PAGE_SIZE;
j++;
@ -508,7 +509,7 @@ size_t vm_alloc(uint32_t npages, uint32_t flags)
if (flags & MAP_KERNEL_SPACE)
spinlock_unlock(&kslock);
else
spinlock_irqsave_unlock(&task->page_lock);
spinlock_irqsave_unlock(&task->pgd_lock);
return ret;
}
@ -518,22 +519,22 @@ int unmap_region(size_t viraddr, uint32_t npages)
task_t* task = per_core(current_task);
uint32_t i;
uint32_t index1, index2;
page_map_t* pgt;
page_table_t* pgt;
if (BUILTIN_EXPECT(!task || !task->page_map || !paging_enabled, 0))
if (BUILTIN_EXPECT(!task || !task->pgd || !paging_enabled, 0))
return -EINVAL;
if (viraddr <= KERNEL_SPACE)
spinlock_lock(&kslock);
else
spinlock_irqsave_lock(&task->page_lock);
spinlock_irqsave_lock(&task->pgd_lock);
for(i=0; i<npages; i++, viraddr+=PAGE_SIZE)
{
index1 = viraddr >> 22;
index2 = (viraddr >> 12) & 0x3FF;
pgt = (page_map_t*) ((KERNEL_SPACE - 1024*PAGE_SIZE + index1*PAGE_SIZE) & PAGE_MASK);
pgt = (page_table_t*) ((KERNEL_SPACE - 1024*PAGE_SIZE + index1*PAGE_SIZE) & PAGE_MASK);
if (!pgt)
continue;
pgt->entries[index2] &= ~PG_PRESENT;
@ -547,7 +548,7 @@ int unmap_region(size_t viraddr, uint32_t npages)
if (viraddr <= KERNEL_SPACE)
spinlock_unlock(&kslock);
else
spinlock_irqsave_unlock(&task->page_lock);
spinlock_irqsave_unlock(&task->pgd_lock);
return 0;
}
@ -557,22 +558,22 @@ int vm_free(size_t viraddr, uint32_t npages)
task_t* task = per_core(current_task);
uint32_t i;
uint32_t index1, index2;
page_map_t* pgt;
page_table_t* pgt;
if (BUILTIN_EXPECT(!task || !task->page_map || !paging_enabled, 0))
if (BUILTIN_EXPECT(!task || !task->pgd || !paging_enabled, 0))
return -EINVAL;
if (viraddr <= KERNEL_SPACE)
spinlock_lock(&kslock);
else
spinlock_irqsave_lock(&task->page_lock);
spinlock_irqsave_lock(&task->pgd_lock);
for(i=0; i<npages; i++, viraddr+=PAGE_SIZE)
{
index1 = viraddr >> 22;
index2 = (viraddr >> 12) & 0x3FF;
pgt = (page_map_t*) ((KERNEL_SPACE - 1024*PAGE_SIZE + index1*PAGE_SIZE) & PAGE_MASK);
pgt = (page_table_t*) ((KERNEL_SPACE - 1024*PAGE_SIZE + index1*PAGE_SIZE) & PAGE_MASK);
if (!pgt)
continue;
pgt->entries[index2] = 0;
@ -583,7 +584,7 @@ int vm_free(size_t viraddr, uint32_t npages)
if (viraddr <= KERNEL_SPACE)
spinlock_unlock(&kslock);
else
spinlock_irqsave_unlock(&task->page_lock);
spinlock_irqsave_unlock(&task->pgd_lock);
return 0;
}
@ -592,8 +593,8 @@ int print_paging_tree(size_t viraddr)
{
task_t* task = per_core(current_task);
uint32_t index1, index2;
page_map_t* pgd = NULL;
page_map_t* pgt = NULL;
page_dir_t* pgd = NULL;
page_table_t* pgt = NULL;
if (BUILTIN_EXPECT(!viraddr, 0))
return -EINVAL;
@ -601,20 +602,20 @@ int print_paging_tree(size_t viraddr)
index1 = viraddr >> 22;
index2 = (viraddr >> 12) & 0x3FF;
spinlock_irqsave_lock(&task->page_lock);
spinlock_irqsave_lock(&task->pgd_lock);
kprintf("Paging dump of address 0x%x\n", viraddr);
pgd = task->page_map;
pgd = task->pgd;
kprintf("\tPage directory entry %u: ", index1);
if (pgd) {
kprintf("0x%0x\n", pgd->entries[index1]);
pgt = (page_map_t*) (pgd->entries[index1] & PAGE_MASK);
pgt = (page_table_t*) (pgd->entries[index1] & PAGE_MASK);
} else
kputs("invalid page directory\n");
// convert physical address to virtual
/* convert physical address to virtual */
if (paging_enabled && pgt)
pgt = (page_map_t*) (KERNEL_SPACE - 1024*PAGE_SIZE + index1*PAGE_SIZE);
pgt = (page_table_t*) (KERNEL_SPACE - 1024*PAGE_SIZE + index1*PAGE_SIZE);
kprintf("\tPage table entry %u: ", index2);
if (pgt)
@ -622,7 +623,7 @@ int print_paging_tree(size_t viraddr)
else
kputs("invalid page table\n");
spinlock_irqsave_unlock(&task->page_lock);
spinlock_irqsave_unlock(&task->pgd_lock);
return 0;
}
@ -630,12 +631,12 @@ int print_paging_tree(size_t viraddr)
static void pagefault_handler(struct state *s)
{
task_t* task = per_core(current_task);
page_dir_t* pgd = task->pgd;
page_table_t* pgt = NULL;
size_t viraddr = read_cr2();
size_t phyaddr;
#ifdef CONFIG_ROCKCREEK
uint32_t index1, index2;
page_map_t* pgd = task->page_map;
page_map_t* pgt = NULL;
#endif
if ((viraddr >= task->start_heap) && (viraddr <= task->end_heap) && (viraddr > KERNEL_SPACE)) {
@ -649,7 +650,7 @@ static void pagefault_handler(struct state *s)
memset((void*) viraddr, 0x00, PAGE_SIZE);
return;
}
kprintf("Could not map 0x%x at 0x%x\n", phyaddr, viraddr);
put_page(phyaddr);
}
@ -660,7 +661,7 @@ static void pagefault_handler(struct state *s)
index2 = (viraddr >> 12) & 0x3FF;
if (!pgd || !(pgd->entries[index1] & PAGE_MASK))
goto default_handler;
pgt = (page_map_t*) ((KERNEL_SPACE - 1024*PAGE_SIZE + index1*PAGE_SIZE) & PAGE_MASK);
pgt = (page_table_t*) ((KERNEL_SPACE - 1024*PAGE_SIZE + index1*PAGE_SIZE) & PAGE_MASK);
if (!pgt || !(pgt->entries[index2]))
goto default_handler;
if (pgt->entries[index2] & PG_SVM_INIT) {
@ -686,14 +687,14 @@ default_handler:
int arch_paging_init(void)
{
uint32_t i, npages, index1, index2;
page_map_t* pgt;
page_table_t* pgt;
size_t viraddr;
// replace default pagefault handler
// uninstall default handler and install our own
irq_uninstall_handler(14);
irq_install_handler(14, pagefault_handler);
// create a page table to reference to the other page tables
// Create a page table to reference to the other page tables
pgt = &pgt_container;
// map this table at the end of the kernel space
@ -702,21 +703,21 @@ int arch_paging_init(void)
index2 = (viraddr >> 12) & 0x3FF;
// now, we create a self reference
per_core(current_task)->page_map->entries[index1] = (((size_t) pgt) & PAGE_MASK)|KERN_TABLE;
pgt->entries[index2] = ((size_t) pgt & PAGE_MASK)|KERN_PAGE;
per_core(current_task)->pgd->entries[index1] = (((size_t) pgt) & PAGE_MASK)|KERN_TABLE;
pgt->entries[index2] = ((size_t) pgt & 0xFFFFF000)|KERN_PAGE;
// create the other PGTs for the kernel space
for(i=0; i<KERNEL_SPACE/(1024*PAGE_SIZE)-1; i++) {
size_t phyaddr = boot_pgt+i;
memset((void*) phyaddr, 0x00, sizeof(page_map_t));
per_core(current_task)->page_map->entries[i] = (phyaddr & PAGE_MASK)|KERN_TABLE;
memset((void*) phyaddr, 0x00, sizeof(page_table_t));
per_core(current_task)->pgd->entries[i] = (phyaddr & PAGE_MASK)|KERN_TABLE;
pgt->entries[i] = (phyaddr & PAGE_MASK)|KERN_PAGE;
}
/*
* Set the page table and page directory entries for the kernel.
* We map the kernel's physical address to the same virtual address.
* Set the page table and page directory entries for the kernel. We map the kernel's physical address
* to the same virtual address.
*/
npages = ((size_t) &kernel_end - (size_t) &kernel_start) >> PAGE_SHIFT;
if ((size_t)&kernel_end & (PAGE_SIZE-1))
@ -724,7 +725,7 @@ int arch_paging_init(void)
map_region((size_t)&kernel_start, (size_t)&kernel_start, npages, MAP_KERNEL_SPACE);
#if MAX_CORES > 1
// reserve page for smp boot code
// Reserve page for smp boot code
if (!map_region(SMP_SETUP_ADDR, SMP_SETUP_ADDR, 1, MAP_KERNEL_SPACE|MAP_NO_CACHE)) {
kputs("could not reserve page for smp boot code\n");
return -ENOMEM;
@ -737,12 +738,16 @@ int arch_paging_init(void)
#endif
#ifdef CONFIG_MULTIBOOT
// map mb_info into the kernel space
/*
* of course, mb_info has to map into the kernel space
*/
if (mb_info)
map_region((size_t) mb_info & PAGE_MASK, (size_t) mb_info & PAGE_MASK, 1, MAP_KERNEL_SPACE);
#if 0
// map reserved memory regions into the kernel space
/*
* Map reserved memory regions into the kernel space
*/
if (mb_info && (mb_info->flags & MULTIBOOT_INFO_MEM_MAP)) {
multiboot_memory_map_t* mmap = (multiboot_memory_map_t*) mb_info->mmap_addr;
multiboot_memory_map_t* mmap_end = (void*) ((size_t) mb_info->mmap_addr + mb_info->mmap_length);
@ -800,7 +805,7 @@ int arch_paging_init(void)
kprintf("Map FPGA regsiters at 0x%x\n", viraddr);
#endif
// enable paging
/* enable paging */
write_cr3((uint32_t) &boot_pgd);
i = read_cr0();
i = i | (1 << 31);
@ -817,7 +822,10 @@ int arch_paging_init(void)
bootinfo->addr = viraddr;
#endif
// we turned on paging => now, we are able to register our task
/*
* we turned on paging
* => now, we are able to register our task
*/
register_task();
// APIC registers into the kernel address space

File diff suppressed because it is too large Load diff

View file

@ -70,7 +70,7 @@ static ssize_t socket_write(fildes_t* file, uint8_t* buffer, size_t size)
return -ENOMEM;
memcpy(tmp, buffer, size);
ret = lwip_write(file->offset, tmp, size);
kfree(tmp);
kfree(tmp, size);
#endif
if (ret < 0)
ret = -errno;
@ -147,7 +147,7 @@ int socket_init(vfs_node_t* node, const char* name)
} while(blist);
kfree(new_node);
kfree(new_node, sizeof(vfs_node_t));
return -ENOMEM;
}

View file

@ -83,8 +83,15 @@ static ssize_t stdio_read(fildes_t* file, uint8_t* buffer, size_t size)
static ssize_t stdio_write(fildes_t* file, uint8_t* buffer, size_t size)
{
int i;
for (i = 0; i<size; i++, buffer++)
for (i = 0; i<size; i++, buffer++) {
#ifdef CONFIG_VGA
vga_putchar(*buffer);
#elif defined(CONFIG_UART)
uart_putchar(*buffer);
#else
kputchar(*buffer);
#endif
}
file->offset += size;
return size;
@ -145,7 +152,7 @@ int null_init(vfs_node_t* node, const char* name)
} while(blist);
kfree(new_node);
kfree(new_node, sizeof(vfs_node_t));
return -ENOMEM;
}
@ -204,7 +211,7 @@ int stdin_init(vfs_node_t* node, const char* name)
} while(blist);
kfree(new_node);
kfree(new_node, sizeof(vfs_node_t));
return -ENOMEM;
}
@ -263,7 +270,7 @@ int stdout_init(vfs_node_t* node, const char* name)
} while(blist);
kfree(new_node);
kfree(new_node, sizeof(vfs_node_t));
return -ENOMEM;
}
@ -322,7 +329,7 @@ int stderr_init(vfs_node_t* node, const char* name)
} while(blist);
kfree(new_node);
kfree(new_node, sizeof(vfs_node_t));
return -ENOMEM;
}

View file

@ -210,7 +210,7 @@ static int initrd_open(fildes_t* file, const char* name)
if (file->node->type == FS_FILE) {
if ((file->flags & O_CREAT) && (file->flags & O_EXCL))
return -EEXIST;
/* in the case of O_TRUNC kfree all the nodes */
if (file->flags & O_TRUNC) {
uint32_t i;
@ -221,7 +221,8 @@ static int initrd_open(fildes_t* file, const char* name)
/* the first blist pointer have do remain valid. */
for(i=0; i<MAX_DATABLOCKS && !data; i++) {
if (blist->data[i]) {
kfree(blist->data[i]);
kfree(blist->data[i],
sizeof(data_block_t));
}
}
if (blist->next) {
@ -233,12 +234,12 @@ static int initrd_open(fildes_t* file, const char* name)
do {
for(i=0; i<MAX_DATABLOCKS && !data; i++) {
if (blist->data[i]) {
kfree(blist->data[i]);
kfree(blist->data[i], sizeof(data_block_t));
}
}
lastblist = blist;
blist = blist->next;
kfree(lastblist);
kfree(lastblist, sizeof(block_list_t));
} while(blist);
}
@ -252,7 +253,7 @@ static int initrd_open(fildes_t* file, const char* name)
/* opendir was called: */
if (name[0] == '\0')
return 0;
/* open file was called: */
if (!(file->flags & O_CREAT))
return -ENOENT;
@ -263,11 +264,11 @@ static int initrd_open(fildes_t* file, const char* name)
vfs_node_t* new_node = kmalloc(sizeof(vfs_node_t));
if (BUILTIN_EXPECT(!new_node, 0))
return -EINVAL;
blist = &file->node->block_list;
dir_block_t* dir_block;
dirent_t* dirent;
memset(new_node, 0x00, sizeof(vfs_node_t));
new_node->type = FS_FILE;
new_node->read = &initrd_read;
@ -285,7 +286,7 @@ static int initrd_open(fildes_t* file, const char* name)
if (!dirent->vfs_node) {
dirent->vfs_node = new_node;
strncpy(dirent->name, (char*) name, MAX_FNAME);
goto exit_create_file; // TODO: there might be a better Solution
goto exit_create_file; // there might be a better Solution ***************
}
}
}
@ -424,9 +425,9 @@ static vfs_node_t* initrd_mkdir(vfs_node_t* node, const char* name)
blist = blist->next;
} while(blist);
kfree(dir_block);
kfree(dir_block, sizeof(dir_block_t));
out:
kfree(new_node);
kfree(new_node, sizeof(vfs_node_t));
return NULL;
}

View file

@ -34,14 +34,14 @@ extern "C" {
#define PAGE_SHIFT 12
#define CACHE_LINE 64
#define MAILBOX_SIZE 32
#define TIMER_FREQ 100 // in HZ
#define CLOCK_TICK_RATE 1193182 // 8254 chip's internal oscillator frequency
#define TIMER_FREQ 100 /* in HZ */
#define CLOCK_TICK_RATE 1193182 /* 8254 chip's internal oscillator frequency */
#define INT_SYSCALL 0x80
#define KERNEL_SPACE (1*1024*1024*1024)
#define VIDEO_MEM_ADDR 0xB8000 // the video memory address
#define VIDEO_MEM_ADDR 0xB8000 // the video memora address
#define SMP_SETUP_ADDR 0x07000
#define UART_PORT 0x3F8 // 0x2F8 for SCC
#define BYTE_ORDER LITTLE_ENDIAN
#define BYTE_ORDER LITTLE_ENDIAN
/*
* address space / (page_size * sizeof(uint8_t))
@ -52,7 +52,7 @@ extern "C" {
#define CONFIG_PCI
#define CONFIG_LWIP
#define CONFIG_VGA
#define CONFIG_UART
//#define CONFIG_UART
#define CONFIG_KEYBOARD
#define CONFIG_MULTIBOOT
//#define CONFIG_ROCKCREEK
@ -72,7 +72,7 @@ extern "C" {
//#define SHMADD
#define SHMDBG
//#define SHMADD_CACHEABLE
#define SCC_BOOTINFO 0x80000
#define SCC_BOOTINFO 0x80000
#define BUILTIN_EXPECT(exp, b) __builtin_expect((exp), (b))
//#define BUILTIN_EXPECT(exp, b) (exp)

View file

@ -1,72 +0,0 @@
/*
* Copyright 2010 Steffen Vogel, Chair for Operating Systems,
* RWTH Aachen University
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* This file is part of MetalSVM.
*/
#ifndef __MALLOC_H__
#define __MALLOC_H__
#include <metalsvm/stddef.h>
#ifdef __cplusplus
extern "C" {
#endif
/// Binary exponent of maximal size for kmalloc()
#define BUDDY_MAX 32 // 4 GB
/// Binary exponent of minimal buddy size
#define BUDDY_MIN 3 // 8 Byte >= sizeof(buddy_t)
/// Binary exponent of the size which we allocate with buddy_fill()
#define BUDDY_ALLOC 15 // 32 KByte >= PAGE_SIZE
#define BUDDY_LISTS (BUDDY_MAX-BUDDY_MIN+1)
#define BUDDY_MAGIC 0xBABE
union buddy;
/** @brief Buddy
*
* Every free memory block is stored in a linked list according to its size.
* We can use this free memory to store store this buddy_t union which represents
* this block (the buddy_t union is alligned to the front).
* Therefore the address of the buddy_t union is equal with the address
* of the underlying free memory block.
*
* Every allocated memory block is prefixed with its binary size exponent and
* a known magic number. This prefix is hidden by the user because its located
* before the actual memory address returned by kmalloc()
*/
typedef union buddy {
/// Pointer to the next buddy in the linked list.
union buddy* next;
struct {
/// The binary exponent of the block size
uint8_t exponent;
/// Must be equal to BUDDY_MAGIC for a valid memory block
uint16_t magic;
} prefix;
} buddy_t;
/** @brief Dump free buddies */
void buddy_dump(void);
#ifdef __cplusplus
}
#endif
#endif

View file

@ -31,6 +31,7 @@
#include <metalsvm/stddef.h>
#include <asm/atomic.h>
//#include <asm/mmu.h>
#ifdef __cplusplus
extern "C" {
@ -49,54 +50,33 @@ extern atomic_int32_t total_available_pages;
*/
int mmu_init(void);
/** @brief Get continuous pages
/** @brief get continuous pages
*
* Use first fit algorithm to find a suitable, continous physical memory region
* This function finds a continuous page region (first fit algorithm)
*
* @param no_pages Desired number of pages
*
* @param npages Desired number of pages
* @return
* - physical address on success
* - 0 on failure
*/
size_t get_pages(uint32_t npages);
size_t get_pages(uint32_t no_pages);
/** @brief Get a single page
/** @brief get a single page
*
* Convenience function: uses get_pages(1);
*/
static inline size_t get_page(void) { return get_pages(1); }
/** @brief Put back a sequence of continous pages
/** @brief Put back a page after use
*
* @param phyaddr Physical address of the first page
* @param npages Number of pages
* @param phyaddr Physical address to put back
*
* @return number of pages which were marked as used before calling
*/
int put_pages(size_t phyaddr, size_t npages);
/** @brief Put a single page
*
* Convenience function: uses put_pages(1);
*/
static inline int put_page(size_t phyaddr) { return put_pages(phyaddr, 1); }
/** @brief Copy a physical page frames
*
* @param psrc physical address of source page frames
* @param pdest physical address of source page frames
* @param npages number of pages
* @return
* @return
* - 0 on success
* - -1 on failure
* - -EINVAL (-22) on failure
*/
int copy_pages(size_t pdest, size_t psrc, size_t npages);
/** @brief Copy a single page
*
* Convenience function: uses copy_pages(pdest, psrc, 1);
*/
static inline int copy_page(size_t pdest, size_t psrc) { return copy_pages(pdest, psrc, 1); }
int put_page(size_t phyaddr);
#ifdef __cplusplus
}

View file

@ -29,7 +29,10 @@
#include <metalsvm/stddef.h>
#include <asm/page.h>
/** @brief Sets up the environment, page directories etc and enables paging. */
/**
* Sets up the environment, page directories etc and
* enables paging.
*/
static inline int paging_init(void) { return arch_paging_init(); }
#endif

View file

@ -28,10 +28,14 @@
extern "C" {
#endif
#define NULL ((void*) 0)
#define NULL ((void*) 0)
typedef unsigned int tid_t;
#define PAGE_SIZE (1 << PAGE_SHIFT)
#define PAGE_MASK ~(PAGE_SIZE - 1)
#define PAGE_ALIGN(addr) (((addr) + PAGE_SIZE - 1) & PAGE_MASK)
#if MAX_CORES == 1
#define per_core(name) name
#define DECLARE_PER_CORE(type, name) extern type name;
@ -62,10 +66,10 @@ typedef unsigned int tid_t;
irq_nested_enable(flags);\
return ret; \
}
#define CORE_ID smp_id()
#define CORE_ID smp_id()
#endif
// needed to find the task, which is currently running on this core
/* needed to find the task, which is currently running on this core */
struct task;
DECLARE_PER_CORE(struct task*, current_task);

View file

@ -29,66 +29,72 @@
#ifndef __STDLIB_H__
#define __STDLIB_H__
#include <metalsvm/stddef.h>
#include <metalsvm/config.h>
#include <metalsvm/tasks_types.h>
#include <asm/stddef.h>
#ifdef __cplusplus
extern "C" {
#endif
#define MAP_NO_ACCESS (1 << 0)
#define MAP_READ_ONLY (1 << 1)
#define MAP_USER_SPACE (1 << 2)
#define MAP_CODE (1 << 3)
#define MAP_WT (1 << 4)
#define MAP_NO_CACHE (1 << 5)
#define MAP_MPE (1 << 6)
#define MAP_SVM_STRONG (1 << 7)
#define MAP_SVM_LAZYRELEASE (1 << 8)
#define MAP_SVM_INIT (1 << 9)
#define MAP_KERNEL_SPACE (0 << 2) // legacy compatibility
#define MAP_REMAP (1 << 12)
//#define MAP_NON_CONTINUOUS (1 << 13) // TODO
#define MAP_KERNEL_SPACE (1 << 0)
#define MAP_USER_SPACE (1 << 1)
#define MAP_PAGE_TABLE (1 << 2)
#define MAP_NO_CACHE (1 << 3)
#define MAP_WT (1 << 5)
#define MAP_CODE (1 << 6)
#define MAP_READONLY (1 << 7)
#ifdef CONFIG_ROCKCREEK
#define MAP_MPE (1 << 8)
#endif
#define MAP_SVM_STRONG (1 << 9)
#define MAP_SVM_LAZYRELEASE (1 << 10)
#define MAP_SVM_INIT (1 << 11)
#define MAP_NO_ACCESS (1 << 12)
#define MAP_REMAP (1 << 13)
void NORETURN abort(void);
/** @brief General page allocator function
/** @brief Kernel's memory allocator function.
*
* This function allocates and maps whole pages.
* To avoid fragmentation you should use kmalloc() and kfree()!
* This will just call mem_allocation with
* the flags MAP_KERNEL_SPACE and MAP_HEAP.
*
* @return Pointer to the new memory range
*/
void* kmalloc(size_t);
/** @brief Kernel's more general memory allocator function.
*
* This function lets you choose flags for the newly allocated memory.
*
* @param sz Desired size of the new memory
* @param flags Flags to for map_region(), vma_add()
* @param flags Flags to specify
*
* @return Pointer to the new memory range
*/
void* palloc(size_t sz, uint32_t flags);
void* mem_allocation(size_t sz, uint32_t flags);
/** @brief Free general kernel memory
/** @brief Free memory
*
* The pmalloc() doesn't track how much memory was allocated for which pointer,
* The kernel malloc doesn't track how
* much memory was allocated for which pointer,
* so you have to specify how much memory shall be freed.
*
* @param sz The size which should freed
*/
void pfree(void* addr, size_t sz);
void kfree(void*, size_t);
/** @brief The memory allocator function
/** @brief Create a new stack for a new task
*
* This allocator uses a buddy system to manage free memory.
*
* @return Pointer to the new memory range
* @return start address of the new stack
*/
void* kmalloc(size_t sz);
void* create_stack(void);
/** @brief The memory free function
/** @brief Delete stack of a finished task
*
* Releases memory allocated by malloc()
*
* @param addr The address to the memory block allocated by malloc()
* @param addr Pointer to the stack
* @return 0 on success
*/
void kfree(void* addr);
int destroy_stack(task_t* addr);
/** @brief String to long
*
@ -107,7 +113,7 @@ unsigned long strtoul(const char* nptr, char** endptr, int base);
*/
static inline int atoi(const char *str)
{
return (int)strtol(str, (char **) NULL, 10);
return (int)strtol(str, (char **)NULL, 10);
}
#ifdef __cplusplus

View file

@ -147,7 +147,9 @@ tid_t wait(int32_t* result);
*/
void update_load(void);
/** @brief Print the current cpu load */
/** @brief Print the current cpu load
*
*/
void dump_load(void);
#if MAX_CORES > 1
@ -199,7 +201,9 @@ int block_current_task(void);
*/
int set_timer(uint64_t deadline);
/** @brief check is a timer is expired */
/** @brief check is a timer is expired
*
*/
void check_timers(void);
/** @brief Abort current task */

View file

@ -62,7 +62,7 @@ extern "C" {
#define TASK_L2 (1 << 3)
typedef int (*entry_point_t)(void*);
typedef struct page_map page_map_t;
struct page_dir;
/** @brief The task_t structure */
typedef struct task {
@ -86,12 +86,12 @@ typedef struct task {
struct task* prev;
/// last core id on which the task was running
uint32_t last_core;
/// usage in number of pages (including page map tables)
/// usage in number of pages
atomic_int32_t user_usage;
/// locks access to all page maps with PG_USER flag set
spinlock_irqsave_t page_lock;
/// pointer to page directory (32bit) or page map level 4 (64bit) table respectively
page_map_t* page_map;
/// avoids concurrent access to the page directory
spinlock_irqsave_t pgd_lock;
/// pointer to the page directory
struct page_dir* pgd;
/// lock for the VMA_list
spinlock_t vma_lock;
/// list of VMAs
@ -104,12 +104,10 @@ typedef struct task {
size_t start_heap;
/// end address of the heap
size_t end_heap;
/// the userspace heap
vma_t* heap;
/// LwIP error code
int lwip_err;
/// mail inbox
mailbox_wait_msg_t inbox;
mailbox_wait_msg_t inbox;
/// mail outbox array
mailbox_wait_msg_t* outbox[MAX_TASKS];
/// FPU state

View file

@ -27,101 +27,56 @@
#define __VMA_H__
#include <metalsvm/stddef.h>
#include <asm/page.h>
#ifdef __cplusplus
extern "C" {
#endif
/// Read access to this VMA is allowed
#define VMA_READ (1 << 0)
/// Write access to this VMA is allowed
#define VMA_WRITE (1 << 1)
/// Instructions fetches in this VMA are allowed
#define VMA_EXECUTE (1 << 2)
/// This VMA is cacheable
#define VMA_CACHEABLE (1 << 3)
/// This VMA is not accessable
#define VMA_NO_ACCESS (1 << 4)
/// This VMA should be part of the userspace
#define VMA_USER (1 << 5)
/// A collection of flags used for the kernel heap (kmalloc)
#define VMA_HEAP (VMA_READ|VMA_WRITE|VMA_CACHEABLE)
// boundaries for VAS allocation
#define VMA_KERN_MIN PAGE_SIZE // we skip the first page
#define VMA_KERN_MAX KERNEL_SPACE
#define VMA_USER_MAX PAGE_MAP_PGT
#define VMA_NOACCESS (1 << 4)
struct vma;
/** @brief VMA structure definition
*
* Each item in this linked list marks a used part of the virtual address space.
* Its used by vm_alloc() to find holes between them.
*/
/** @brief VMA structure definition */
typedef struct vma {
/// Start address of the memory area
size_t start;
/// End address of the memory area
size_t end;
/// Type flags field
uint32_t flags;
uint32_t type;
/// Pointer of next VMA element in the list
struct vma* next;
/// Pointer to previous VMA element in the list
struct vma* prev;
} vma_t;
/** @brief Add a new virtual memory area to the list of VMAs
/** @brief Add a new virtual memory region to the list of VMAs
*
* @param start Start address of the new area
* @param end End address of the new area
* @param flags Type flags the new area shall have
* @param task Pointer to the task_t structure of the task
* @param start Start address of the new region
* @param end End address of the new region
* @param type Type flags the new region shall have
*
* @return
* - 0 on success
* - -EINVAL (-22) or -EINVAL (-12) on failure
*/
int vma_add(size_t start, size_t end, uint32_t flags);
int vma_add(struct task* task, size_t start, size_t end, uint32_t type);
/** @brief Search for a free memory area
/** @brief Dump information about this task's VMAs into the terminal.
*
* @param size Size of requestes VMA in bytes
* @param flags
* @return Type flags the new area shall have
* - 0 on failure
* - the start address of a free area
*/
size_t vma_alloc(size_t size, uint32_t flags);
/** @brief Free an allocated memory area
* This will print out Start, end and flags for each VMA in the task's list
*
* @param start Start address of the area to be freed
* @param end End address of the to be freed
* @param task The task's task_t structure
* @return
* - 0 on success
* - -EINVAL (-22) on failure
*/
int vma_free(size_t start, size_t end);
/** @brief Free all virtual memory areas
*
* @return
* - 0 on success
*/
int drop_vma_list();
/** @brief Copy the VMA list of the current task to task
*
* @param task The task where the list should be copied to
* @return
* - 0 on success
*/
int copy_vma_list(struct task* task);
/** @brief Dump information about this task's VMAs into the terminal. */
void vma_dump();
int vma_dump(struct task* task);
#ifdef __cplusplus
}

View file

@ -63,7 +63,7 @@ extern const void bss_end;
int lowlevel_init(void)
{
// initialize .bss section
memset((char*) &bss_start, 0x00, (char*) &bss_end - (char*) &bss_start);
memset((void*)&bss_start, 0x00, ((size_t) &bss_end - (size_t) &bss_start));
koutput_init();

View file

@ -29,7 +29,6 @@
#include <metalsvm/fs.h>
#include <asm/irq.h>
#include <asm/irqflags.h>
#include <asm/page.h>
#include <asm/kb.h>
#ifdef CONFIG_ROCKCREEK
#include <asm/icc.h>
@ -72,9 +71,8 @@ int main(void)
pushbg(COL_BLUE);
kprintf("This is MetalSVM %s Build %u, %u\n",
METALSVM_VERSION, &__BUILD_DATE, &__BUILD_TIME);
METALSVM_VERSION, &__BUILD_DATE, &__BUILD_TIME);
popbg();
system_init();
irq_init();
timer_init();
@ -87,7 +85,7 @@ int main(void)
icc_init();
svm_init();
#endif
initrd_init();
initrd_init();
irq_enable();
@ -103,10 +101,9 @@ int main(void)
disable_timer_irq();
#endif
sleep(2);
sleep(5);
create_kernel_task(&id, initd, NULL, NORMAL_PRIO);
kprintf("Create initd with id %u\n", id);
reschedule();
while(1) {

View file

@ -105,11 +105,11 @@ static int sys_open(const char* name, int flags, int mode)
/* file doesn't exist! */
if (check < 0) {
/* tidy up the fildescriptor */
kfree(curr_task->fildes_table[fd]);
kfree(curr_task->fildes_table[fd], sizeof(fildes_t));
curr_task->fildes_table[fd] = NULL;
return check;
}
return fd;
}
@ -196,7 +196,7 @@ static int sys_socket(int domain, int type, int protocol)
/* file doesn't exist! */
if (curr_task->fildes_table[fd]->node == NULL) {
/* tidy up the fildescriptor */
kfree(curr_task->fildes_table[fd]);
kfree(curr_task->fildes_table[fd], sizeof(fildes_t));
curr_task->fildes_table[fd] = NULL;
return -ENOENT;
}
@ -236,11 +236,11 @@ static int sys_accept(int s, struct sockaddr* addr, socklen_t* addrlen)
curr_task->fildes_table[fd]->offset = sock2;
curr_task->fildes_table[fd]->count = 1;
curr_task->fildes_table[fd]->node = findnode_fs("/dev/socket");
/* file doesn't exist! */
if (curr_task->fildes_table[fd]->node == NULL) {
/* tidy up the fildescriptor */
kfree(curr_task->fildes_table[fd]);
kfree(curr_task->fildes_table[fd], sizeof(fildes_t));
curr_task->fildes_table[fd] = NULL;
return -ENOENT;
}
@ -273,7 +273,7 @@ static int sys_close(int fd)
/* close command failed -> return check = errno */
if (BUILTIN_EXPECT(check < 0, 0))
return check;
kfree(curr_task->fildes_table[fd]);
kfree(curr_task->fildes_table[fd], sizeof(fildes_t));
curr_task->fildes_table[fd] = NULL;
} else {
curr_task->fildes_table[fd]->count--;
@ -356,7 +356,7 @@ static int sys_dup(int fd)
* free the memory which was allocated in get_fildes()
* cause will link it to another existing memory space
*/
kfree(curr_task->fildes_table[new_fd]);
kfree(curr_task->fildes_table[new_fd], sizeof(fildes_t));
/* and link it to another existing memory space */
curr_task->fildes_table[new_fd] = curr_task->fildes_table[fd];
@ -381,7 +381,7 @@ static int sys_dup2(int fd, int fd2)
/* If fd and fd2 are equal, then dup2() just returns fd2 */
if (fd == fd2)
return fd2;
/*
* if descriptor fd2 is already in use, it is first deallocated
* as if a close(2) call had been done first
@ -398,32 +398,30 @@ static int sys_dup2(int fd, int fd2)
static int sys_sbrk(int incr)
{
task_t* task = per_core(current_task);
vma_t* heap = task->heap;
vma_t* tmp = NULL;
int ret;
spinlock_lock(&task->vma_lock);
if (BUILTIN_EXPECT(!heap,0 )) {
kprintf("sys_sbrk: missing heap!\n");
abort();
}
tmp = task->vma_list;
while(tmp && !((task->end_heap >= tmp->start) && (task->end_heap <= tmp->end)))
tmp = tmp->next;
ret = heap->end;
heap->end += incr;
if (heap->end < heap->start)
heap->end = heap->start;
// allocation and mapping of new pages for the heap
// is catched by the pagefault handler
kprintf("sys_sbrk: task = %d, heap->start = %#lx, heap->end = %#lx, incr = %#4x\n", task->id, heap->start, heap->end, incr); // TOD0: remove
ret = (int) task->end_heap;
task->end_heap += incr;
if (task->end_heap < task->start_heap)
task->end_heap = task->start_heap;
// resize virtual memory area
if (tmp && (tmp->end <= task->end_heap))
tmp->end = task->end_heap;
spinlock_unlock(&task->vma_lock);
return ret;
}
int syscall_handler(size_t sys_nr, ...)
int syscall_handler(uint32_t sys_nr, ...)
{
int ret = -EINVAL;
va_list vl;
@ -502,7 +500,7 @@ int syscall_handler(size_t sys_nr, ...)
break;
case __NR_wait: {
int32_t* status = va_arg(vl, int32_t*);
ret = wait(status);
break;
}
@ -551,7 +549,7 @@ int syscall_handler(size_t sys_nr, ...)
ret = -ENOTSOCK;
break;
}
//kprintf("lwip_connect: %p with lenght %i and Socket %i", name, namelen, per_core(current_task)->fildes_table[fd].offset); // TODO: remove
//kprintf("lwip_connect: %p with lenght %i and Socket %i", name, namelen, per_core(current_task)->fildes_table[fd].offset);
ret = lwip_connect(per_core(current_task)->fildes_table[fd]->offset, name, namelen);
@ -603,7 +601,7 @@ int syscall_handler(size_t sys_nr, ...)
}
#endif
default:
kprintf("syscall_handler: invalid system call %u\n", sys_nr);
kputs("invalid system call\n");
ret = -ENOSYS;
break;
};

View file

@ -78,7 +78,6 @@ DEFINE_PER_CORE(task_t*, current_task, task_table+0);
extern const void boot_stack;
/** @brief helper function for the assembly code to determine the current task
*
* @return Pointer to the task_t structure of current task
*/
task_t* get_current_task(void) {
@ -97,37 +96,6 @@ uint32_t get_highest_priority(void)
return msb(runqueues[CORE_ID].prio_bitmap);
}
/** @brief Create a new stack for a new task
*
* @return start address of the new stack
*/
static void* create_stack(void)
{
/*
* TODO: our stack should be non-executable!
* We need this atm because nested functions in page64.c
* are using trampolines on the stack.
*/
return palloc(KERNEL_STACK_SIZE, MAP_CODE);
}
/** @brief Delete stack of a finished task
*
* @param addr Pointer to the stack
* @return
* - 0 on success
* - -EINVAL on failure
*/
static int destroy_stack(task_t* task)
{
if (BUILTIN_EXPECT(!task || !task->stack, 0))
return -EINVAL;
pfree(task->stack, KERNEL_STACK_SIZE);
return 0;
}
int multitasking_init(void) {
if (BUILTIN_EXPECT(task_table[0].status != TASK_IDLE, 0)) {
kputs("Task 0 is not an idle task\n");
@ -136,7 +104,7 @@ int multitasking_init(void) {
mailbox_wait_msg_init(&task_table[0].inbox);
memset(task_table[0].outbox, 0x00, sizeof(mailbox_wait_msg_t*)*MAX_TASKS);
task_table[0].page_map = get_boot_page_map();
task_table[0].pgd = get_boot_pgd();
task_table[0].flags = TASK_DEFAULT_FLAGS;
task_table[0].prio = IDLE_PRIO;
task_table[0].stack = (void*) &boot_stack;
@ -160,7 +128,7 @@ size_t get_idle_task(uint32_t id)
atomic_int32_set(&task_table[id].user_usage, 0);
mailbox_wait_msg_init(&task_table[id].inbox);
memset(task_table[id].outbox, 0x00, sizeof(mailbox_wait_msg_t*)*MAX_TASKS);
task_table[id].page_map = get_boot_page_map();
task_table[id].pgd = get_boot_pgd();
current_task[id].var = task_table+id;
runqueues[id].idle = task_table+id;
@ -225,8 +193,10 @@ static void wakeup_blocked_tasks(int result)
spinlock_irqsave_unlock(&table_lock);
}
/** @brief A procedure to be called by procedures which are called by exiting tasks. */
/** @brief A procedure to be called by
* procedures which are called by exiting tasks. */
static void NORETURN do_exit(int arg) {
vma_t* tmp;
task_t* curr_task = per_core(current_task);
uint32_t flags, core_id, fd, status;
@ -234,17 +204,17 @@ static void NORETURN do_exit(int arg) {
for (fd = 0; fd < NR_OPEN; fd++) {
if(curr_task->fildes_table[fd] != NULL) {
/*
* Delete a descriptor from the per-process object
* reference table. If this is not the last reference to the underlying
* object, the object will be ignored.
*/
* delete a descriptor from the per-process object
* reference table. If this is not the last reference to the underlying
* object, the object will be ignored.
*/
if (curr_task->fildes_table[fd]->count == 1) {
// try to close the file
/* try to close the file */
status = close_fs(curr_task->fildes_table[fd]);
// close command failed -> return check = errno
/* close command failed -> return check = errno */
if (BUILTIN_EXPECT(status < 0, 0))
kprintf("Task %u was not able to close file descriptor %i. close_fs returned %d", curr_task->id, fd, -status);
kfree(curr_task->fildes_table[fd]);
kfree(curr_task->fildes_table[fd], sizeof(fildes_t));
curr_task->fildes_table[fd] = NULL;
} else {
curr_task->fildes_table[fd]->count--;
@ -252,33 +222,37 @@ static void NORETURN do_exit(int arg) {
}
}
}
kfree(curr_task->fildes_table); // finally the table has to be cleared
//finally the table has to be cleared.
kfree(curr_task->fildes_table, sizeof(filp_t)*NR_OPEN);
}
kprintf("Terminate task: %u, return value %d\n", curr_task->id, arg);
wakeup_blocked_tasks(arg);
flags = irq_nested_disable();
drop_vma_list();
//vma_dump(curr_task);
spinlock_lock(&curr_task->vma_lock);
/*
* This marks all userpages as free. Nevertheless they are still existing
* and used by the MMU until the task finishes. Therefore we need to disable
* context switching by disabling interrupts (see above)! We may also make use
* of the TLB and global kernel pages.
*/
drop_page_map();
// remove memory regions
while((tmp = curr_task->vma_list) != NULL) {
kfree((void*) tmp->start, tmp->end - tmp->start + 1);
curr_task->vma_list = tmp->next;
kfree((void*) tmp, sizeof(vma_t));
}
#if 1
spinlock_unlock(&curr_task->vma_lock);
drop_pgd(); // delete page directory and its page tables
#if 0
if (atomic_int32_read(&curr_task->user_usage))
kprintf("Memory leak! Task %d did not release %d pages\n",
curr_task->id, atomic_int32_read(&curr_task->user_usage));
curr_task->id, atomic_int32_read(&curr_task->user_usage));
#endif
curr_task->status = TASK_FINISHED;
// decrease the number of active tasks
flags = irq_nested_disable();
core_id = CORE_ID;
spinlock_irqsave_lock(&runqueues[core_id].lock);
runqueues[core_id].nr_tasks--;
@ -288,7 +262,9 @@ static void NORETURN do_exit(int arg) {
reschedule();
kprintf("Kernel panic: scheduler on core %d found no valid task\n", CORE_ID);
while(1) HALT;
while(1) {
HALT;
}
}
/** @brief A procedure to be called by kernel tasks */
@ -324,7 +300,6 @@ void NORETURN abort(void) {
static int create_task(tid_t* id, entry_point_t ep, void* arg, uint8_t prio, uint32_t core_id)
{
task_t* curr_task;
task_t* new_task = NULL;
int ret = -ENOMEM;
uint32_t i;
@ -344,74 +319,64 @@ static int create_task(tid_t* id, entry_point_t ep, void* arg, uint8_t prio, uin
#endif
{
core_id = CORE_ID;
kprintf("create_task: invalid core id! Set id to %u!\n", core_id);
kprintf("Inavlid core id! Set id to %u!\n", core_id);
}
curr_task = per_core(current_task);
// search free entry in task table
for(i=0; i<MAX_TASKS; i++) {
if (task_table[i].status == TASK_INVALID) {
new_task = &task_table[i];
atomic_int32_set(&task_table[i].user_usage, 0);
ret = create_pgd(task_table+i, 0);
if (ret < 0) {
ret = -ENOMEM;
goto create_task_out;
}
task_table[i].id = i;
task_table[i].status = TASK_READY;
task_table[i].last_stack_pointer = NULL;
task_table[i].stack = create_stack();
task_table[i].flags = TASK_DEFAULT_FLAGS;
task_table[i].prio = prio;
task_table[i].last_core = 0;
spinlock_init(&task_table[i].vma_lock);
task_table[i].vma_list = NULL;
task_table[i].fildes_table = NULL;
mailbox_wait_msg_init(&task_table[i].inbox);
memset(task_table[i].outbox, 0x00, sizeof(mailbox_wait_msg_t*)*MAX_TASKS);
task_table[i].outbox[curr_task->id] = &curr_task->inbox;
if (id)
*id = i;
ret = create_default_frame(task_table+i, ep, arg);
task_table[i].start_heap = 0;
task_table[i].end_heap = 0;
task_table[i].lwip_err = 0;
task_table[i].start_tick = get_clock_tick();
// add task in the runqueue
spinlock_irqsave_lock(&runqueues[core_id].lock);
runqueues[core_id].prio_bitmap |= (1 << prio);
runqueues[core_id].nr_tasks++;
if (!runqueues[core_id].queue[prio-1].first) {
task_table[i].next = task_table[i].prev = NULL;
runqueues[core_id].queue[prio-1].first = task_table+i;
runqueues[core_id].queue[prio-1].last = task_table+i;
} else {
task_table[i].prev = runqueues[core_id].queue[prio-1].last;
task_table[i].next = NULL;
runqueues[core_id].queue[prio-1].last->next = task_table+i;
runqueues[core_id].queue[prio-1].last = task_table+i;
}
spinlock_irqsave_unlock(&runqueues[core_id].lock);
break;
}
}
if (BUILTIN_EXPECT(!new_task, 0)) {
ret = -ENOMEM;
goto out;
}
atomic_int32_set(&new_task->user_usage, 0);
ret = copy_page_map(new_task, 0);
if (ret < 0) {
ret = -ENOMEM;
goto out;
}
new_task->id = i;
new_task->status = TASK_READY;
new_task->last_stack_pointer = NULL;
new_task->stack = create_stack();
new_task->flags = TASK_DEFAULT_FLAGS;
new_task->prio = prio;
new_task->last_core = 0;
spinlock_init(&new_task->vma_lock);
new_task->vma_list = NULL;
new_task->fildes_table = NULL;
mailbox_wait_msg_init(&new_task->inbox);
memset(new_task->outbox, 0x00, sizeof(mailbox_wait_msg_t*)*MAX_TASKS);
new_task->outbox[curr_task->id] = &curr_task->inbox;
if (id)
*id = i;
ret = create_default_frame(new_task, ep, arg);
new_task->start_heap = 0;
new_task->end_heap = 0;
new_task->lwip_err = 0;
new_task->start_tick = get_clock_tick();
// add task in the runqueue
spinlock_irqsave_lock(&runqueues[core_id].lock);
runqueues[core_id].prio_bitmap |= (1 << prio);
runqueues[core_id].nr_tasks++;
if (!runqueues[core_id].queue[prio-1].first) {
new_task->next = new_task->prev = NULL;
runqueues[core_id].queue[prio-1].first = new_task;
runqueues[core_id].queue[prio-1].last = new_task;
}
else {
new_task->prev = runqueues[core_id].queue[prio-1].last;
new_task->next = NULL;
runqueues[core_id].queue[prio-1].last->next = new_task;
runqueues[core_id].queue[prio-1].last = new_task;
}
spinlock_irqsave_unlock(&runqueues[core_id].lock);
out:
create_task_out:
spinlock_irqsave_unlock(&table_lock);
return ret;
@ -422,99 +387,109 @@ int sys_fork(void)
int ret = -ENOMEM;
unsigned int i, core_id, fd_i;
task_t* parent_task = per_core(current_task);
task_t* child_task = NULL;
vma_t** child;
vma_t* parent;
vma_t* tmp;
spinlock_lock(&parent_task->vma_lock);
spinlock_irqsave_lock(&table_lock);
core_id = CORE_ID;
// search free entry in task_table
for(i=0; i<MAX_TASKS; i++) {
if (task_table[i].status == TASK_INVALID) {
child_task = &task_table[i];
atomic_int32_set(&task_table[i].user_usage, 0);
ret = create_pgd(task_table+i, 1);
if (ret < 0) {
ret = -ENOMEM;
goto create_task_out;
}
task_table[i].id = i;
task_table[i].last_stack_pointer = NULL;
task_table[i].stack = create_stack();
spinlock_init(&task_table[i].vma_lock);
// copy VMA list
child = &task_table[i].vma_list;
parent = parent_task->vma_list;
tmp = NULL;
while(parent) {
*child = (vma_t*) kmalloc(sizeof(vma_t));
if (BUILTIN_EXPECT(!child, 0))
break;
(*child)->start = parent->start;
(*child)->end = parent->end;
(*child)->type = parent->type;
(*child)->prev = tmp;
(*child)->next = NULL;
parent = parent->next;
tmp = *child;
child = &((*child)->next);
}
/* init fildes_table */
task_table[i].fildes_table = kmalloc(sizeof(filp_t)*NR_OPEN);
memcpy(task_table[i].fildes_table, parent_task->fildes_table, sizeof(filp_t)*NR_OPEN);
for (fd_i = 0; fd_i < NR_OPEN; fd_i++)
if ((task_table[i].fildes_table[fd_i]) != NULL)
task_table[i].fildes_table[fd_i]->count++;
mailbox_wait_msg_init(&task_table[i].inbox);
memset(task_table[i].outbox, 0x00, sizeof(mailbox_wait_msg_t*)*MAX_TASKS);
task_table[i].outbox[parent_task->id] = &parent_task->inbox;
task_table[i].flags = parent_task->flags;
memcpy(&(task_table[i].fpu), &(parent_task->fpu), sizeof(union fpu_state));
task_table[i].start_tick = get_clock_tick();
task_table[i].start_heap = 0;
task_table[i].end_heap = 0;
task_table[i].lwip_err = 0;
task_table[i].prio = parent_task->prio;
task_table[i].last_core = parent_task->last_core;
// add task in the runqueue
spinlock_irqsave_lock(&runqueues[core_id].lock);
runqueues[core_id].prio_bitmap |= (1 << parent_task->prio);
runqueues[core_id].nr_tasks++;
if (!runqueues[core_id].queue[parent_task->prio-1].first) {
task_table[i].next = task_table[i].prev = NULL;
runqueues[core_id].queue[parent_task->prio-1].first = task_table+i;
runqueues[core_id].queue[parent_task->prio-1].last = task_table+i;
} else {
task_table[i].prev = runqueues[core_id].queue[parent_task->prio-1].last;
task_table[i].next = NULL;
runqueues[core_id].queue[parent_task->prio-1].last->next = task_table+i;
runqueues[core_id].queue[parent_task->prio-1].last = task_table+i;
}
spinlock_irqsave_unlock(&runqueues[core_id].lock);
ret = arch_fork(task_table+i);
if (parent_task != per_core(current_task)) {
// Oh, the current task is the new child task!
// Leave the function without releasing the locks
// because the locks are already released
// by the parent task!
return 0;
}
if (!ret) {
task_table[i].status = TASK_READY;
ret = i;
}
break;
}
}
if (BUILTIN_EXPECT(!child_task, 0)) {
ret = -ENOMEM;
goto out;
}
atomic_int32_set(&child_task->user_usage, 0);
ret = copy_page_map(child_task, 1);
if (ret < 0) {
ret = -ENOMEM;
goto out;
}
ret = copy_vma_list(child_task);
if (BUILTIN_EXPECT(!ret, 0)) {
ret = -ENOMEM;
goto out;
}
child_task->id = i;
child_task->last_stack_pointer = NULL;
child_task->stack = create_stack();
// init fildes_table
child_task->fildes_table = kmalloc(sizeof(filp_t)*NR_OPEN);
memcpy(child_task->fildes_table, parent_task->fildes_table, sizeof(filp_t)*NR_OPEN);
for (fd_i=0; fd_i<NR_OPEN; fd_i++) {
if ((child_task->fildes_table[fd_i]) != NULL)
child_task->fildes_table[fd_i]->count++;
}
// init mailbox
mailbox_wait_msg_init(&child_task->inbox);
memset(child_task->outbox, 0x00, sizeof(mailbox_wait_msg_t*)*MAX_TASKS);
child_task->outbox[parent_task->id] = &parent_task->inbox;
child_task->flags = parent_task->flags;
memcpy(&child_task->fpu, &parent_task->fpu, sizeof(union fpu_state));
child_task->start_tick = get_clock_tick();
child_task->start_heap = 0;
child_task->end_heap = 0;
child_task->lwip_err = 0;
child_task->prio = parent_task->prio;
child_task->last_core = parent_task->last_core;
// add task in the runqueue
spinlock_irqsave_lock(&runqueues[core_id].lock);
runqueues[core_id].prio_bitmap |= (1 << parent_task->prio);
runqueues[core_id].nr_tasks++;
if (!runqueues[core_id].queue[parent_task->prio-1].first) {
child_task->next = child_task->prev = NULL;
runqueues[core_id].queue[parent_task->prio-1].first = child_task;
runqueues[core_id].queue[parent_task->prio-1].last = child_task;
}
else {
child_task->prev = runqueues[core_id].queue[parent_task->prio-1].last;
child_task->next = NULL;
runqueues[core_id].queue[parent_task->prio-1].last->next = child_task;
runqueues[core_id].queue[parent_task->prio-1].last = child_task;
}
spinlock_irqsave_unlock(&runqueues[core_id].lock);
ret = arch_fork(child_task);
if (parent_task != per_core(current_task))
/*
* Oh, the current task is the new child task!
* Leave the function without releasing the locks
* because the locks are already released by the parent task!
*/
return 0;
if (!ret) {
child_task->status = TASK_READY;
ret = i;
}
out:
create_task_out:
spinlock_irqsave_unlock(&table_lock);
spinlock_unlock(&parent_task->vma_lock);
return ret;
}
@ -540,7 +515,7 @@ static int kernel_entry(void* args)
ret = kernel_args->func(kernel_args->args);
kfree(kernel_args);
kfree(kernel_args, sizeof(kernel_args_t));
return ret;
}
@ -586,15 +561,16 @@ static int load_task(load_args_t* largs)
{
uint32_t i, offset, idx, fd_i;
uint32_t addr, npages, flags;
size_t stack = 0, heap = 0;
size_t stack = 0;
elf_header_t header;
elf_program_header_t prog_header;
//elf_section_header_t sec_header;
fildes_t *file = kmalloc(sizeof(fildes_t)); // TODO: kfree is missing!
///!!! kfree is missing!
fildes_t *file = kmalloc(sizeof(fildes_t));
file->offset = 0;
file->flags = 0;
// TODO: init the hole fildes_t struct!
//TODO: init the hole fildes_t struct!
task_t* curr_task = per_core(current_task);
int err;
@ -605,22 +581,22 @@ static int load_task(load_args_t* largs)
if (!file->node)
return -EINVAL;
// init fildes_table
/* init fildes_table */
spinlock_irqsave_lock(&table_lock);
if (!curr_task->fildes_table) {
curr_task->fildes_table = kmalloc(sizeof(filp_t)*NR_OPEN);
if (BUILTIN_EXPECT(!curr_task->fildes_table, 0)) {
if (!task_table[curr_task->id].fildes_table) {
task_table[curr_task->id].fildes_table = kmalloc(sizeof(filp_t)*NR_OPEN);
if (BUILTIN_EXPECT(!task_table[curr_task->id].fildes_table, 0)) {
spinlock_irqsave_unlock(&table_lock);
return -ENOMEM;
}
memset(curr_task->fildes_table, 0x00, sizeof(filp_t)*NR_OPEN);
memset(task_table[curr_task->id].fildes_table, 0x00, sizeof(filp_t)*NR_OPEN);
for (fd_i = 0; fd_i < 3; fd_i++) {
curr_task->fildes_table[fd_i] = kmalloc(sizeof(fildes_t));
curr_task->fildes_table[fd_i]->count = 1;
task_table[curr_task->id].fildes_table[fd_i] = kmalloc(sizeof(fildes_t));
task_table[curr_task->id].fildes_table[fd_i]->count = 1;
}
curr_task->fildes_table[0]->node = findnode_fs("/dev/stdin");
curr_task->fildes_table[1]->node = findnode_fs("/dev/stdout");
curr_task->fildes_table[2]->node = findnode_fs("/dev/stderr");
task_table[curr_task->id].fildes_table[0]->node = findnode_fs("/dev/stdin");
task_table[curr_task->id].fildes_table[1]->node = findnode_fs("/dev/stdout");
task_table[curr_task->id].fildes_table[2]->node = findnode_fs("/dev/stderr");
}
spinlock_irqsave_unlock(&table_lock);
@ -641,43 +617,43 @@ static int load_task(load_args_t* largs)
if (BUILTIN_EXPECT(header.ident._class != ELF_CLASS_32, 0))
goto invalid;
#elif defined(CONFIG_X86_64)
#else
if (BUILTIN_EXPECT(header.machine != ELF_EM_X86_64, 0))
goto invalid;
if (BUILTIN_EXPECT(header.ident._class != ELF_CLASS_64, 0))
goto invalid;
#else
#error "unknown arch"
#endif
if (BUILTIN_EXPECT(header.ident.data != ELF_DATA_2LSB, 0))
goto invalid;
if (header.entry < KERNEL_SPACE)
if (header.entry <= KERNEL_SPACE)
goto invalid;
// interpret program header table
for (i=0; i<header.ph_entry_count; i++) {
file->offset = header.ph_offset+i*header.ph_entry_size;
if (read_fs(file, (uint8_t*) &prog_header, sizeof(elf_program_header_t)) == 0) {
if (read_fs(file, (uint8_t*)&prog_header, sizeof(elf_program_header_t)) == 0) {
kprintf("Could not read programm header!\n");
continue;
}
switch(prog_header.type) {
switch(prog_header.type)
{
case ELF_PT_LOAD: // load program segment
if (!prog_header.virt_addr)
continue;
npages = PAGE_FLOOR(prog_header.mem_size) >> PAGE_BITS;
npages = (prog_header.mem_size >> PAGE_SHIFT);
if (prog_header.mem_size & (PAGE_SIZE-1))
npages++;
addr = get_pages(npages);
flags = MAP_USER_SPACE;
if (prog_header.flags & PF_X)
flags |= MAP_CODE;
if (!(prog_header.flags & PF_W))
flags |= MAP_READ_ONLY;
// map page frames in the address space of the current task
if (!map_region(prog_header.virt_addr, addr, npages, flags)) {
@ -686,30 +662,35 @@ static int load_task(load_args_t* largs)
}
// clear pages
memset((void*) prog_header.virt_addr, 0x00, npages * PAGE_SIZE);
memset((void*) prog_header.virt_addr, 0x00, npages*PAGE_SIZE);
// update heap location
if (heap < prog_header.virt_addr + prog_header.mem_size)
heap = prog_header.virt_addr+prog_header.mem_size;
// set starting point of the heap
if (curr_task->start_heap < prog_header.virt_addr+prog_header.mem_size)
curr_task->start_heap = curr_task->end_heap = prog_header.virt_addr+prog_header.mem_size;
// load program
file->offset = prog_header.offset;
read_fs(file, (uint8_t*) prog_header.virt_addr, prog_header.file_size);
read_fs(file, (uint8_t*)prog_header.virt_addr, prog_header.file_size);
flags = VMA_CACHEABLE | VMA_USER;
flags = VMA_CACHEABLE;
if (prog_header.flags & PF_R)
flags |= VMA_READ;
if (prog_header.flags & PF_W)
flags |= VMA_WRITE;
if (prog_header.flags & PF_X)
flags |= VMA_EXECUTE;
vma_add(curr_task, prog_header.virt_addr, prog_header.virt_addr+npages*PAGE_SIZE-1, flags);
vma_add(prog_header.virt_addr, prog_header.virt_addr+npages*PAGE_SIZE, flags);
if (!(prog_header.flags & PF_W))
change_page_permissions(prog_header.virt_addr, prog_header.virt_addr+npages*PAGE_SIZE-1, flags);
break;
case ELF_PT_GNU_STACK: // Indicates stack executability
// create user-level stack
npages = PAGE_FLOOR(DEFAULT_STACK_SIZE) >> PAGE_BITS;
npages = DEFAULT_STACK_SIZE >> PAGE_SHIFT;
if (DEFAULT_STACK_SIZE & (PAGE_SIZE-1))
npages++;
addr = get_pages(npages);
stack = header.entry*2; // virtual address of the stack
@ -727,8 +708,7 @@ static int load_task(load_args_t* largs)
flags |= VMA_WRITE;
if (prog_header.flags & PF_X)
flags |= VMA_EXECUTE;
vma_add(stack, stack+npages*PAGE_SIZE, flags);
vma_add(curr_task, stack, stack+npages*PAGE_SIZE-1, flags);
break;
}
}
@ -746,23 +726,8 @@ static int load_task(load_args_t* largs)
}
#endif
// setup heap
if (!curr_task->heap)
curr_task->heap = (vma_t*) kmalloc(sizeof(vma_t));
if (BUILTIN_EXPECT(!curr_task->heap || !heap, 0)) {
kprintf("load_task: heap is missing!\n");
return -ENOMEM;
}
curr_task->heap->flags = VMA_HEAP|VMA_USER;
curr_task->heap->start = heap;
curr_task->heap->end = heap;
// TODO: insert into list
if (BUILTIN_EXPECT(!stack, 0)) {
kprintf("load_task: stack is missing!\n");
kprintf("Stack is missing!\n");
return -ENOMEM;
}
@ -775,9 +740,9 @@ static int load_task(load_args_t* largs)
// push argv on the stack
offset -= largs->argc * sizeof(char*);
for (i=0; i<largs->argc; i++) {
for(i=0; i<largs->argc; i++) {
((char**) (stack+offset))[i] = (char*) (stack+idx);
while(((char*) stack)[idx] != '\0')
idx++;
idx++;
@ -785,7 +750,7 @@ static int load_task(load_args_t* largs)
// push env on the stack
offset -= (largs->envc+1) * sizeof(char*);
for (i=0; i<largs->envc; i++) {
for(i=0; i<largs->envc; i++) {
((char**) (stack+offset))[i] = (char*) (stack+idx);
while(((char*) stack)[idx] != '\0')
@ -806,10 +771,10 @@ static int load_task(load_args_t* largs)
*((char***) (stack+offset)) = (char**) (stack + offset + 2*sizeof(char**) + (largs->envc+1) * sizeof(char*));
// push argc on the stack
offset -= sizeof(size_t);
offset -= sizeof(int);
*((int*) (stack+offset)) = largs->argc;
kfree(largs);
kfree(largs, sizeof(load_args_t));
// clear fpu state
curr_task->flags &= ~(TASK_FPU_USED|TASK_FPU_INIT);
@ -820,12 +785,12 @@ static int load_task(load_args_t* largs)
invalid:
kprintf("Invalid executable!\n");
kprintf("Magic number: 0x%x\n", (uint32_t) header.ident.magic);
kprintf("Header type: 0x%x\n", (uint32_t) header.type);
kprintf("Machine type: 0x%x\n", (uint32_t) header.machine);
kprintf("ELF ident class: 0x%x\n", (uint32_t) header.ident._class);
kprintf("ELF ident data: 0x%x\n", header.ident.data);
kprintf("Program entry point: 0x%x\n", (size_t) header.entry);
kprintf("magic number 0x%x\n", (uint32_t) header.ident.magic);
kprintf("header type 0x%x\n", (uint32_t) header.type);
kprintf("machine type 0x%x\n", (uint32_t) header.machine);
kprintf("elf ident class 0x%x\n", (uint32_t) header.ident._class);
kprintf("elf identdata !0x%x\n", header.ident.data);
kprintf("program entry point 0x%x\n", (size_t) header.entry);
return -EINVAL;
}
@ -841,7 +806,7 @@ static int user_entry(void* arg)
ret = load_task((load_args_t*) arg);
kfree(arg);
kfree(arg, sizeof(load_args_t));
return ret;
}
@ -859,6 +824,7 @@ static int user_entry(void* arg)
*/
int create_user_task_on_core(tid_t* id, const char* fname, char** argv, uint32_t core_id)
{
#ifdef CONFIG_X86_32
vfs_node_t* node;
int argc = 0;
size_t i, buffer_size = 0;
@ -894,19 +860,24 @@ int create_user_task_on_core(tid_t* id, const char* fname, char** argv, uint32_t
while ((*dest++ = *src++) != 0);
}
// create new task
/* create new task */
return create_task(id, user_entry, load_args, NORMAL_PRIO, core_id);
#else
return -EINVAL;
#endif
}
/** @brief Used by the execve-Systemcall */
int sys_execve(const char* fname, char** argv, char** env)
{
vfs_node_t* node;
vma_t* tmp;
size_t i, buffer_size = 0;
load_args_t* load_args = NULL;
char *dest, *src;
int ret, argc = 0;
int envc = 0;
task_t* curr_task = per_core(current_task);
node = findnode_fs((char*) fname);
if (!node || !(node->type == FS_FILE))
@ -949,8 +920,16 @@ int sys_execve(const char* fname, char** argv, char** env)
while ((*dest++ = *src++) != 0);
}
spinlock_lock(&curr_task->vma_lock);
// remove old program
drop_vma_list();
while((tmp = curr_task->vma_list) != NULL) {
kfree((void*) tmp->start, tmp->end - tmp->start + 1);
curr_task->vma_list = tmp->next;
kfree((void*) tmp, sizeof(vma_t));
}
spinlock_unlock(&curr_task->vma_lock);
/*
* we use a trap gate to enter the kernel
@ -961,7 +940,7 @@ int sys_execve(const char* fname, char** argv, char** env)
ret = load_task(load_args);
kfree(load_args);
kfree(load_args, sizeof(load_args_t));
return ret;
}

View file

@ -34,7 +34,13 @@
#define VGA_EARLY_PRINT 1
#define UART_EARLY_PRINT 2
#ifdef CONFIG_VGA
static uint32_t early_print = VGA_EARLY_PRINT;
#elif defined(CONFIG_UART)
static uint32_t early_print = UART_EARLY_PRINT;
#else
static uint32_t early_print = NO_EARLY_PRINT;
#endif
static spinlock_irqsave_t olock = SPINLOCK_IRQSAVE_INIT;
static atomic_int32_t kmsg_counter = ATOMIC_INIT(0);
static unsigned char kmessages[KMSG_SIZE] __attribute__ ((section(".kmsg"))) = {[0 ... KMSG_SIZE-1] = 0x00};
@ -130,7 +136,7 @@ int kmsg_init(vfs_node_t * node, const char *name)
}
} while (blist);
kfree(new_node);
kfree(new_node, sizeof(vfs_node_t));
return -ENOMEM;
}
@ -139,10 +145,6 @@ int koutput_init(void)
{
#ifdef CONFIG_VGA
vga_init();
early_print |= VGA_EARLY_PRINT;
#endif
#ifdef CONFIG_UART
early_print |= UART_EARLY_PRINT;
#endif
return 0;
@ -159,11 +161,11 @@ int kputchar(int c)
kmessages[pos % KMSG_SIZE] = (unsigned char) c;
#ifdef CONFIG_VGA
if (early_print & VGA_EARLY_PRINT)
if (early_print == VGA_EARLY_PRINT)
vga_putchar(c);
#endif
#ifdef CONFIG_UART
if (early_print & UART_EARLY_PRINT)
if (early_print == UART_EARLY_PRINT)
uart_putchar(c);
#endif
@ -184,11 +186,11 @@ int kputs(const char *str)
pos = atomic_int32_inc(&kmsg_counter);
kmessages[pos % KMSG_SIZE] = str[i];
#ifdef CONFIG_VGA
if (early_print & VGA_EARLY_PRINT)
if (early_print == VGA_EARLY_PRINT)
vga_putchar(str[i]);
#endif
#ifdef CONFIG_UART
if (early_print & UART_EARLY_PRINT)
if (early_print == UART_EARLY_PRINT)
uart_putchar(str[i]);
#endif
}

View file

@ -1,4 +1,4 @@
C_source := memory.c vma.c malloc.c
C_source := memory.c vma.c
MODULE := mm
include $(TOPDIR)/Makefile.inc

View file

@ -1,207 +0,0 @@
/*
* Copyright 2010 Steffen Vogel, Chair for Operating Systems,
* RWTH Aachen University
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* This file is part of MetalSVM.
*/
#include <metalsvm/malloc.h>
#include <metalsvm/spinlock.h>
#include <metalsvm/stdio.h>
#include <metalsvm/mmu.h>
/// A linked list for each binary size exponent
static buddy_t* buddy_lists[BUDDY_LISTS] = { NULL };
/// Lock for the buddy lists
static spinlock_t buddy_lock = SPINLOCK_INIT;
/** @brief Check if larger free buddies are available */
static inline int buddy_large_avail(uint8_t exp)
{
while (exp<BUDDY_MAX && !buddy_lists[exp-BUDDY_MIN])
exp++;
return exp != BUDDY_MAX;
}
/** @brief Calculate the required buddy size */
static inline int buddy_exp(size_t sz)
{
int exp;
for (exp=0; sz>(1<<exp); exp++);
if (exp > BUDDY_MAX)
exp = 0;
if (exp < BUDDY_MIN)
exp = BUDDY_MIN;
return exp;
}
/** @brief Get a free buddy by potentially splitting a larger one */
static buddy_t* buddy_get(int exp)
{
spinlock_lock(&buddy_lock);
buddy_t** list = &buddy_lists[exp-BUDDY_MIN];
buddy_t* buddy = *list;
buddy_t* split;
if (buddy)
// there is already a free buddy =>
// we remove it from the list
*list = buddy->next;
else if (exp >= BUDDY_ALLOC && !buddy_large_avail(exp))
// theres no free buddy larger than exp =>
// we can allocate new memory
buddy = (buddy_t*) palloc(1<<exp, 0);
else {
// we recursivly request a larger buddy...
buddy = buddy_get(exp+1);
if (BUILTIN_EXPECT(!buddy, 0))
goto out;
// ... and split it, by putting the second half back to the list
split = (buddy_t*) ((size_t) buddy + (1<<exp));
split->next = *list;
*list = split;
}
out:
spinlock_unlock(&buddy_lock);
return buddy;
}
/** @brief Put a buddy back to its free list
*
* TODO: merge adjacent buddies (memory compaction)
*/
static void buddy_put(buddy_t* buddy)
{
spinlock_lock(&buddy_lock);
buddy_t** list = &buddy_lists[buddy->prefix.exponent-BUDDY_MIN];
buddy->next = *list;
*list = buddy;
spinlock_unlock(&buddy_lock);
}
void buddy_dump()
{
size_t free = 0;
int i;
for (i=0; i<BUDDY_LISTS; i++) {
buddy_t* buddy;
int exp = i+BUDDY_MIN;
if (buddy_lists[i])
kprintf("buddy_list[%u] (exp=%u, size=%lu bytes):\n", i, exp, 1<<exp);
for (buddy=buddy_lists[i]; buddy; buddy=buddy->next) {
kprintf(" %p -> %p \n", buddy, buddy->next);
free += 1<<exp;
}
}
kprintf("free buddies: %lu bytes\n", free);
}
void* palloc(size_t sz, uint32_t flags)
{
size_t phyaddr, viraddr;
uint32_t npages = PAGE_FLOOR(sz) >> PAGE_BITS;
kprintf("palloc(%lu) (%lu pages)\n", sz, npages); // TODO: remove
// get free virtual address space
viraddr = vma_alloc(npages*PAGE_SIZE, VMA_HEAP);
if (BUILTIN_EXPECT(!viraddr, 0))
return NULL;
// get continous physical pages
phyaddr = get_pages(npages);
if (BUILTIN_EXPECT(!phyaddr, 0)) {
vma_free(viraddr, viraddr+npages*PAGE_SIZE);
return NULL;
}
// map physical pages to VMA
viraddr = map_region(viraddr, phyaddr, npages, flags);
if (BUILTIN_EXPECT(!viraddr, 0)) {
vma_free(viraddr, viraddr+npages*PAGE_SIZE);
put_pages(phyaddr, npages);
return NULL;
}
return (void*) viraddr;
}
void pfree(void* addr, size_t sz)
{
if (BUILTIN_EXPECT(!addr || !sz, 0))
return;
size_t i;
size_t phyaddr;
size_t viraddr = (size_t) addr & PAGE_MASK;
uint32_t npages = PAGE_FLOOR(sz) >> PAGE_BITS;
// memory is propably not continously mapped!
for (i=0; i<npages; i++) {
phyaddr = virt_to_phys(viraddr+i*PAGE_SIZE);
put_page(phyaddr);
}
unmap_region(viraddr, npages);
vma_free(viraddr, viraddr+npages*PAGE_SIZE);
}
void* kmalloc(size_t sz)
{
if (BUILTIN_EXPECT(!sz, 0))
return NULL;
// add space for the prefix
sz += sizeof(buddy_t);
kprintf("kmalloc(%lu)\n", sz); // TODO: remove
int exp = buddy_exp(sz);
if (BUILTIN_EXPECT(!exp, 0))
return NULL;
buddy_t* buddy = buddy_get(exp);
if (BUILTIN_EXPECT(!buddy, 0))
return NULL;
// setup buddy prefix
buddy->prefix.magic = BUDDY_MAGIC;
buddy->prefix.exponent = exp;
// pointer arithmetic: we hide the prefix
return buddy+1;
}
void kfree(void *addr)
{
if (BUILTIN_EXPECT(!addr, 0))
return;
buddy_t* buddy = (buddy_t*) addr - 1; // get prefix
// check magic
if (BUILTIN_EXPECT(buddy->prefix.magic != BUDDY_MAGIC, 0))
return;
buddy_put(buddy);
}

View file

@ -37,15 +37,17 @@
#endif
/*
* Set whole address space as occupied:
* 0 => free, 1 => occupied
* 0 => free
* 1 => occupied
*
* Set whole address space as occupied
*/
static uint8_t bitmap[BITMAP_SIZE] = {[0 ... BITMAP_SIZE-1] = 0xFF};
static spinlock_t bitmap_lock = SPINLOCK_INIT;
atomic_int32_t total_pages = ATOMIC_INIT(0);
atomic_int32_t total_allocated_pages = ATOMIC_INIT(0);
atomic_int32_t total_available_pages = ATOMIC_INIT(0);
static uint8_t bitmap[BITMAP_SIZE]; // = {[0 ... BITMAP_SIZE-1] = 0xFF};
static spinlock_t bitmap_lock = SPINLOCK_INIT;
static size_t alloc_start;
atomic_int32_t total_pages = ATOMIC_INIT(0);
atomic_int32_t total_allocated_pages = ATOMIC_INIT(0);
atomic_int32_t total_available_pages = ATOMIC_INIT(0);
/*
* Note that linker symbols are not variables, they have no memory allocated for
@ -62,12 +64,20 @@ inline static int page_marked(size_t i)
return (bitmap[index] & (1 << mod));
}
inline static int page_unmarked(size_t i)
{
return !page_marked(i);
}
inline static void page_set_mark(size_t i)
{
size_t index = i >> 3;
size_t mod = i & 0x7;
bitmap[index] = bitmap[index] | (1 << mod);
//if (page_marked(i))
// kprintf("page %u is alread marked\n", i);
bitmap[index] = bitmap[index] | (1 << mod);
}
inline static void page_clear_mark(size_t i)
@ -75,16 +85,178 @@ inline static void page_clear_mark(size_t i)
size_t index = i / 8;
size_t mod = i % 8;
if (page_unmarked(i))
kprintf("page %u is already unmarked\n", i);
bitmap[index] = bitmap[index] & ~(1 << mod);
}
int mmu_init(void)
{
size_t kernel_size;
unsigned int i;
size_t addr;
int ret = 0;
// at first, set default value of the bitmap
memset(bitmap, 0xFF, sizeof(uint8_t)*BITMAP_SIZE);
#ifdef CONFIG_MULTIBOOT
if (mb_info && (mb_info->flags & MULTIBOOT_INFO_MEM_MAP)) {
size_t end_addr;
multiboot_memory_map_t* mmap = (multiboot_memory_map_t*) ((size_t) mb_info->mmap_addr);
multiboot_memory_map_t* mmap_end = (void*) ((size_t) mb_info->mmap_addr + mb_info->mmap_length);
while (mmap < mmap_end) {
if (mmap->type == MULTIBOOT_MEMORY_AVAILABLE) {
/* set the available memory as "unused" */
addr = mmap->addr;
end_addr = addr + mmap->len;
while (addr < end_addr) {
page_clear_mark(addr >> PAGE_SHIFT);
addr += PAGE_SIZE;
atomic_int32_inc(&total_pages);
atomic_int32_inc(&total_available_pages);
}
}
mmap++;
}
} else {
kputs("Unable to initialize the memory management subsystem\n");
while(1) {
HALT;
}
}
#elif defined(CONFIG_ROCKCREEK)
/* of course, the first slots belong to the private memory */
for(addr=0x00; addr<1*0x1000000; addr+=PAGE_SIZE) {
page_clear_mark(addr >> PAGE_SHIFT);
if (addr > addr + PAGE_SIZE)
break;
atomic_int32_inc(&total_pages);
atomic_int32_inc(&total_available_pages);
}
// Note: The last slot belongs always to the private memory.
for(addr=0xFF000000; addr<0xFFFFFFFF; addr+=PAGE_SIZE) {
page_clear_mark(addr >> PAGE_SHIFT);
if (addr > addr + PAGE_SIZE)
break;
atomic_int32_inc(&total_pages);
atomic_int32_inc(&total_available_pages);
}
/*
* Mark the bootinfo as used.
*/
page_set_mark((size_t)bootinfo >> PAGE_SHIFT);
atomic_int32_inc(&total_allocated_pages);
atomic_int32_dec(&total_available_pages);
#else
#error Currently, MetalSVM supports only the Multiboot specification or the RockCreek processor!
#endif
kernel_size = (size_t) &kernel_end - (size_t) &kernel_start;
if (kernel_size & (PAGE_SIZE-1))
kernel_size += PAGE_SIZE - (kernel_size & (PAGE_SIZE-1));
atomic_int32_add(&total_allocated_pages, kernel_size >> PAGE_SHIFT);
atomic_int32_sub(&total_available_pages, kernel_size >> PAGE_SHIFT);
/* set kernel space as used */
for(i=(size_t) &kernel_start >> PAGE_SHIFT; i < (size_t) &kernel_end >> PAGE_SHIFT; i++)
page_set_mark(i);
if ((size_t) &kernel_end & (PAGE_SIZE-1))
page_set_mark(i);
alloc_start = (size_t) &kernel_end >> PAGE_SHIFT;
if ((size_t) &kernel_end & (PAGE_SIZE-1))
alloc_start++;
#if MAX_CORES > 1
// reserve physical page for SMP boot code
page_set_mark(SMP_SETUP_ADDR >> PAGE_SHIFT);
atomic_int32_add(&total_allocated_pages, 1);
atomic_int32_sub(&total_available_pages, 1);
#endif
ret = paging_init();
if (ret) {
kprintf("Failed to initialize paging: %d\n", ret);
return ret;
}
#ifdef CONFIG_MULTIBOOT
/*
* Modules like the init ram disk are already loaded.
* Therefore, we set these pages as used.
*/
if (mb_info && (mb_info->flags & MULTIBOOT_INFO_MODS)) {
multiboot_module_t* mmodule = (multiboot_module_t*) ((size_t) mb_info->mods_addr);
/*
* Mark the mb_info as used.
*/
page_set_mark((size_t)mb_info >> PAGE_SHIFT);
atomic_int32_inc(&total_allocated_pages);
atomic_int32_dec(&total_available_pages);
for(addr = mb_info->mods_addr; addr < mb_info->mods_addr + mb_info->mods_count * sizeof(multiboot_module_t); addr += PAGE_SIZE) {
page_set_mark(addr >> PAGE_SHIFT);
atomic_int32_inc(&total_allocated_pages);
atomic_int32_dec(&total_available_pages);
}
for(i=0; i<mb_info->mods_count; i++, mmodule++) {
for(addr=mmodule->mod_start; addr<mmodule->mod_end; addr+=PAGE_SIZE) {
page_set_mark(addr >> PAGE_SHIFT);
atomic_int32_inc(&total_allocated_pages);
atomic_int32_dec(&total_available_pages);
}
}
}
#elif defined(CONFIG_ROCKCREEK)
/*
* Now, we are able to read the FPGA registers and to
* determine the number of slots for private memory.
*/
uint32_t slots = *((volatile uint8_t*) (FPGA_BASE + 0x8244));
if (slots == 0)
slots = 1;
kprintf("MetalSVM use %d slots for private memory\n", slots);
// define the residual private slots as free
for(addr=1*0x1000000; addr<slots*0x1000000; addr+=PAGE_SIZE) {
page_clear_mark(addr >> PAGE_SHIFT);
if (addr > addr + PAGE_SIZE)
break;
atomic_int32_inc(&total_pages);
atomic_int32_inc(&total_available_pages);
}
/*
* The init ram disk are already loaded.
* Therefore, we set these pages as used.
*/
for(addr=bootinfo->addr; addr < bootinfo->addr+bootinfo->size; addr+=PAGE_SIZE) {
// This area is already mapped, so we need to virt_to_phys() these addresses.
page_set_mark(virt_to_phys(addr) >> PAGE_SHIFT);
atomic_int32_inc(&total_allocated_pages);
atomic_int32_dec(&total_available_pages);
}
#endif
return ret;
}
/*
* Use first fit algorithm to find a suitable physical memory region
*/
size_t get_pages(uint32_t npages)
{
// first page is reserved
static size_t start = 1;
size_t i, j, l;
size_t k = 0;
uint32_t i, j, l;
uint32_t k = 0;
size_t ret = 0;
if (BUILTIN_EXPECT(!npages, 0))
@ -94,8 +266,7 @@ size_t get_pages(uint32_t npages)
return ret;
spinlock_lock(&bitmap_lock);
i = start;
i = alloc_start;
next_try:
while((k < BITMAP_SIZE) && page_marked(i)) {
k++;
@ -113,7 +284,7 @@ next_try:
}
if (i+j >= BITMAP_SIZE) {
i = 1;
i = 0;
goto next_try;
}
@ -121,12 +292,11 @@ next_try:
goto oom;
ret = i*PAGE_SIZE;
kprintf("get_pages: ret 0x%x, i = %d, j = %d, npages = %d\n", ret, i, j, npages); // TODO: remove
//kprintf("alloc: ret 0x%x, i = %d, j = %d, npages = %d\n", ret, i, j, npages);
for(l=i; l<i+j; l++)
page_set_mark(l);
start = i+j;
alloc_start = i+j;
spinlock_unlock(&bitmap_lock);
atomic_int32_add(&total_allocated_pages, npages);
@ -140,253 +310,88 @@ oom:
return ret;
}
int put_pages(size_t phyaddr, size_t npages)
int put_page(size_t phyaddr)
{
if (BUILTIN_EXPECT(!phyaddr || !npages, 0))
uint32_t index = phyaddr >> PAGE_SHIFT;
if (BUILTIN_EXPECT(!phyaddr, 0))
return -EINVAL;
size_t i, ret = 0;
size_t base = phyaddr >> PAGE_BITS;
spinlock_lock(&bitmap_lock);
for (i=0; i<npages; i++) {
if (page_marked(base+i)) {
page_clear_mark(base+i);
ret++;
}
}
page_clear_mark(index);
spinlock_unlock(&bitmap_lock);
atomic_int32_sub(&total_allocated_pages, ret);
atomic_int32_add(&total_available_pages, ret);
atomic_int32_sub(&total_allocated_pages, 1);
atomic_int32_add(&total_available_pages, 1);
kprintf("put_pages: phyaddr=%#lx, npages = %d, ret = %d\n", phyaddr, npages, ret); // TODO: remove
return ret;
return 0;
}
int copy_pages(size_t pdest, size_t psrc, size_t npages)
void* mem_allocation(size_t sz, uint32_t flags)
{
size_t viraddr;
size_t vdest, vsrc;
size_t phyaddr, viraddr;
uint32_t npages = sz >> PAGE_SHIFT;
// allocate virtual memory areas
viraddr = vma_alloc(2*npages*PAGE_SIZE, VMA_HEAP);
if (BUILTIN_EXPECT(!viraddr, 0))
return -ENOMEM;
if (sz & (PAGE_SIZE-1))
npages++;
// map pages
vsrc = map_region(viraddr, psrc, npages, MAP_KERNEL_SPACE);
vdest = map_region(viraddr+npages*PAGE_SIZE, pdest, npages, MAP_KERNEL_SPACE);
if (BUILTIN_EXPECT(!vsrc || !vdest, 0)) {
unmap_region(viraddr, 2*npages);
return -ENOMEM;
}
phyaddr = get_pages(npages);
if (BUILTIN_EXPECT(!phyaddr, 0))
return 0;
kprintf("copy_pages: copy %u pages from: %#lx (%#lx) to %#lx (%#lx)\n", npages, vsrc, psrc, vdest, pdest); // TODO remove
viraddr = map_region(0, phyaddr, npages, flags);
// copy the whole page
memcpy((void*) vdest, (void*) vsrc, npages*PAGE_SIZE);
// householding
unmap_region(viraddr, 2*npages);
vma_free(viraddr, viraddr+2*npages*PAGE_SIZE);
return pdest;
return (void*) viraddr;
}
int mmu_init(void)
void* kmalloc(size_t sz)
{
unsigned int i;
size_t addr;
int ret = 0;
#ifdef CONFIG_MULTIBOOT
if (mb_info) {
if (mb_info->flags & MULTIBOOT_INFO_MEM_MAP) {
multiboot_memory_map_t* mmap = (multiboot_memory_map_t*) ((size_t) mb_info->mmap_addr);
multiboot_memory_map_t* mmap_end = (void*) ((size_t) mb_info->mmap_addr + mb_info->mmap_length);
// mark available memory as free
while (mmap < mmap_end) {
if (mmap->type == MULTIBOOT_MEMORY_AVAILABLE) {
for (addr=mmap->addr; addr < mmap->addr + mmap->len; addr += PAGE_SIZE) {
page_clear_mark(addr >> PAGE_BITS);
atomic_int32_inc(&total_pages);
atomic_int32_inc(&total_available_pages);
}
}
mmap++;
}
}
else if (mb_info->flags & MULTIBOOT_INFO_MEM) {
size_t page;
size_t pages_lower = mb_info->mem_lower >> 2;
size_t pages_upper = mb_info->mem_upper >> 2;
for (page=0; page<pages_lower; page++)
page_clear_mark(page);
for (page=0x100000; page<pages_upper+0x100000; page++)
page_clear_mark(page);
atomic_int32_add(&total_pages, pages_lower + pages_upper);
atomic_int32_add(&total_available_pages, pages_lower + pages_upper);
}
else {
kputs("Unable to initialize the memory management subsystem\n");
while (1) HALT;
}
// mark mb_info as used
page_set_mark((size_t) mb_info >> PAGE_BITS);
atomic_int32_inc(&total_allocated_pages);
atomic_int32_dec(&total_available_pages);
// mark modules list as used
if (mb_info->flags & MULTIBOOT_INFO_MODS) {
for(addr=mb_info->mods_addr; addr<mb_info->mods_addr+mb_info->mods_count*sizeof(multiboot_module_t); addr+=PAGE_SIZE) {
page_set_mark(addr >> PAGE_BITS);
atomic_int32_inc(&total_allocated_pages);
atomic_int32_dec(&total_available_pages);
}
}
}
#elif defined(CONFIG_ROCKCREEK)
// of course, the first slots belong to the private memory
for(addr=0x00; addr<1*0x1000000; addr+=PAGE_SIZE) {
page_clear_mark(addr >> PAGE_BITS);
if (addr > addr + PAGE_SIZE)
break;
atomic_int32_inc(&total_pages);
atomic_int32_inc(&total_available_pages);
}
// Note: The last slot belongs always to the private memory.
for(addr=0xFF000000; addr<0xFFFFFFFF; addr+=PAGE_SIZE) {
page_clear_mark(addr >> PAGE_BITS);
if (addr > addr + PAGE_SIZE)
break;
atomic_int32_inc(&total_pages);
atomic_int32_inc(&total_available_pages);
}
// mark the bootinfo as used.
page_set_mark((size_t)bootinfo >> PAGE_BITS);
atomic_int32_inc(&total_allocated_pages);
atomic_int32_dec(&total_available_pages);
#else
#error Currently, MetalSVM supports only the Multiboot specification or the RockCreek processor!
#endif
// mark kernel as used
for(addr=(size_t) &kernel_start; addr<(size_t) &kernel_end; addr+=PAGE_SIZE) {
page_set_mark(addr >> PAGE_BITS);
atomic_int32_inc(&total_allocated_pages);
atomic_int32_dec(&total_available_pages);
}
#if MAX_CORES > 1
page_set_mark(SMP_SETUP_ADDR >> PAGE_BITS);
atomic_int32_inc(&total_allocated_pages);
atomic_int32_dec(&total_available_pages);
#endif
// enable paging and map SMP, VGA, Multiboot modules etc.
ret = paging_init();
if (ret) {
kprintf("Failed to initialize paging: %d\n", ret);
return ret;
}
// add kernel to VMA list
vma_add(PAGE_CEIL((size_t) &kernel_start),
PAGE_FLOOR((size_t) &kernel_end),
VMA_READ|VMA_WRITE|VMA_EXECUTE|VMA_CACHEABLE);
// add LAPIC tp VMA list
vma_add((size_t) &kernel_start - PAGE_SIZE,
(size_t) &kernel_start,
VMA_READ|VMA_WRITE);
#ifdef CONFIG_VGA
// add VGA to VMA list
vma_add(PAGE_CEIL(VIDEO_MEM_ADDR),
PAGE_FLOOR(VIDEO_MEM_ADDR) + PAGE_SIZE,
VMA_READ|VMA_WRITE);
#endif
#if MAX_CORES > 1
// reserve page for SMP boot code
vma_add(PAGE_CEIL(SMP_SETUP_ADDR),
PAGE_FLOOR(SMP_SETUP_ADDR) + PAGE_SIZE,
VMA_READ|VMA_WRITE|VMA_EXECUTE|VMA_CACHEABLE);
#endif
#ifdef CONFIG_MULTIBOOT
/*
* Modules like the init ram disk are already loaded.
* Therefore, we set these pages as used.
*/
if (mb_info) {
vma_add(PAGE_CEIL((size_t) mb_info),
PAGE_FLOOR((size_t) mb_info + sizeof(multiboot_info_t)),
VMA_READ|VMA_CACHEABLE);
if (mb_info->flags & MULTIBOOT_INFO_MODS) {
multiboot_module_t* mmodule = (multiboot_module_t*) ((size_t) mb_info->mods_addr);
vma_add(PAGE_CEIL((size_t) mb_info->mods_addr),
PAGE_FLOOR((size_t) mb_info->mods_addr + mb_info->mods_count*sizeof(multiboot_module_t)),
VMA_READ|VMA_CACHEABLE);
for(i=0; i<mb_info->mods_count; i++) {
vma_add(PAGE_CEIL(mmodule[i].mod_start),
PAGE_FLOOR(mmodule[i].mod_end),
VMA_READ|VMA_WRITE|VMA_CACHEABLE);
for(addr=mmodule[i].mod_start; addr<mmodule[i].mod_end; addr+=PAGE_SIZE) {
page_set_mark(addr >> PAGE_BITS);
atomic_int32_inc(&total_allocated_pages);
atomic_int32_dec(&total_available_pages);
}
}
}
}
#elif defined(CONFIG_ROCKCREEK)
/*
* Now, we are able to read the FPGA registers and to
* determine the number of slots for private memory.
*/
uint32_t slots = *((volatile uint8_t*) (FPGA_BASE + 0x8244));
if (slots == 0)
slots = 1;
kprintf("MetalSVM use %d slots for private memory\n", slots);
// define the residual private slots as free
for(addr=1*0x1000000; addr<slots*0x1000000; addr+=PAGE_SIZE) {
page_clear_mark(addr >> PAGE_BITS);
if (addr > addr + PAGE_SIZE)
break;
atomic_int32_inc(&total_pages);
atomic_int32_inc(&total_available_pages);
}
/*
* The init ram disk are already loaded.
* Therefore, we set these pages as used.
*/
for(addr=bootinfo->addr; addr<bootinfo->addr+bootinfo->size; addr+=PAGE_SIZE) {
// this area is already mapped, so we need to virt_to_phys() these addresses.
page_set_mark(virt_to_phys(addr) >> PAGE_BITS);
atomic_int32_inc(&total_allocated_pages);
atomic_int32_dec(&total_available_pages);
}
#endif
return ret;
return mem_allocation(sz, MAP_KERNEL_SPACE);
}
void kfree(void* addr, size_t sz)
{
uint32_t index, npages, i;
size_t phyaddr;
if (BUILTIN_EXPECT(!addr && !sz, 0))
return;
npages = sz >> PAGE_SHIFT;
if (sz & (PAGE_SIZE-1))
npages++;
spinlock_lock(&bitmap_lock);
for(i=0; i<npages; i++) {
unmap_region((size_t) addr+i*PAGE_SIZE, 1);
phyaddr = virt_to_phys((size_t) addr+i*PAGE_SIZE);
if (!phyaddr)
continue;
index = phyaddr >> PAGE_SHIFT;
page_clear_mark(index);
}
spinlock_unlock(&bitmap_lock);
vm_free((size_t) addr, npages);
atomic_int32_sub(&total_allocated_pages, npages);
atomic_int32_add(&total_available_pages, npages);
}
void* create_stack(void)
{
return kmalloc(KERNEL_STACK_SIZE);
}
int destroy_stack(task_t* task)
{
if (BUILTIN_EXPECT(!task || !task->stack, 0))
return -EINVAL;
kfree(task->stack, KERNEL_STACK_SIZE);
return 0;
}

333
mm/vma.c
View file

@ -1,5 +1,5 @@
/*
* Copyright 2011 Steffen Vogel, Chair for Operating Systems,
* Copyright 2011 Stefan Lankes, Chair for Operating Systems,
* RWTH Aachen University
*
* Licensed under the Apache License, Version 2.0 (the "License");
@ -17,318 +17,85 @@
* This file is part of MetalSVM.
*/
#include <metalsvm/vma.h>
#include <metalsvm/stdlib.h>
#include <metalsvm/stdio.h>
#include <metalsvm/tasks_types.h>
#include <metalsvm/spinlock.h>
#include <metalsvm/vma.h>
#include <metalsvm/errno.h>
/*
* Kernel space VMA list and lock
*
* For bootstrapping we initialize the VMA list with one empty VMA
* (start == end) and expand this VMA by calls to vma_alloc()
* add a new virtual memory region to the list of VMAs
*/
static vma_t vma_boot = { VMA_KERN_MIN, VMA_KERN_MIN, VMA_HEAP };
static vma_t* vma_list = &vma_boot;
static spinlock_t vma_lock = SPINLOCK_INIT;
size_t vma_alloc(size_t size, uint32_t flags)
int vma_add(task_t* task, size_t start, size_t end, uint32_t type)
{
task_t* task = per_core(current_task);
spinlock_t* lock;
vma_t** list;
kprintf("vma_alloc(0x%lx, 0x%x)\n", size, flags);
size_t base, limit; // boundaries for search
size_t start, end;
if (BUILTIN_EXPECT(!size, 0))
return 0;
if (flags & VMA_USER) {
base = VMA_KERN_MAX;
limit = VMA_USER_MAX;
list = &task->vma_list;
lock = &task->vma_lock;
}
else {
base = VMA_KERN_MIN;
limit = VMA_KERN_MAX;
list = &vma_list;
lock = &vma_lock;
}
spinlock_lock(lock);
// first fit search for free memory area
vma_t* pred = NULL; // vma before current gap
vma_t* succ = *list; // vma after current gap
do {
start = (pred) ? pred->end : base;
end = (succ) ? succ->start : limit;
if (end > start && end - start > size)
break; // we found a gap
pred = succ;
succ = (succ) ? succ->next : NULL;
} while (pred || succ);
if (BUILTIN_EXPECT(end > limit || end < start || end - start < size, 0)) {
spinlock_unlock(lock);
return 0;
}
if (pred && pred->flags == flags) {
pred->end = start+size;
}
else {
// insert new VMA
vma_t* new = kmalloc(sizeof(vma_t));
if (BUILTIN_EXPECT(!new, 0)) {
spinlock_unlock(lock);
return 0;
}
new->start = start;
new->end = start+size;
new->flags = flags;
new->next = succ;
new->prev = pred;
if (succ)
succ->prev = new;
if (pred)
pred->next = new;
else
*list = new;
}
spinlock_unlock(lock);
return start;
}
int vma_free(size_t start, size_t end)
{
task_t* task = per_core(current_task);
spinlock_t* lock;
vma_t* vma;
vma_t** list;
if (BUILTIN_EXPECT(start >= end, 0))
vma_t* new_vma;
if (BUILTIN_EXPECT(!task || start > end, 0))
return -EINVAL;
if (end < VMA_KERN_MAX) {
lock = &vma_lock;
list = &vma_list;
}
else if (start >= VMA_KERN_MAX) {
lock = &task->vma_lock;
list = &task->vma_list;
}
if (BUILTIN_EXPECT(!*list, 0))
return -EINVAL;
spinlock_lock(lock);
// search vma
vma = *list;
while (vma) {
if (start >= vma->start && end <= vma->end) break;
vma = vma->next;
}
if (BUILTIN_EXPECT(!vma, 0)) {
spinlock_unlock(lock);
return -EINVAL;
}
// free/resize vma
if (start == vma->start && end == vma->end) {
if (vma == *list)
*list = vma->next; // update list head
if (vma->prev)
vma->prev->next = vma->next;
if (vma->next)
vma->next->prev = vma->prev;
kfree(vma);
}
else if (start == vma->start)
vma->start = end;
else if (end == vma->end)
vma->end = start;
else {
vma_t* new = kmalloc(sizeof(vma_t));
if (BUILTIN_EXPECT(!new, 0)) {
spinlock_unlock(lock);
return -ENOMEM;
}
new->end = vma->end;
vma->end = start;
new->start = end;
new->next = vma->next;
vma->next = new;
new->prev = vma;
}
spinlock_unlock(lock);
return 0;
}
int vma_add(size_t start, size_t end, uint32_t flags)
{
task_t* task = per_core(current_task);
spinlock_t* lock;
vma_t** list;
kprintf("vma_add(0x%lx, 0x%lx, 0x%x)\n", start, end, flags);
if (BUILTIN_EXPECT(start >= end, 0))
return -EINVAL;
if (flags & VMA_USER) {
list = &task->vma_list;
lock = &task->vma_lock;
// check if address is in userspace
if (BUILTIN_EXPECT(start < VMA_KERN_MAX, 0))
return -EINVAL;
}
else {
list = &vma_list;
lock = &vma_lock;
// check if address is in kernelspace
if (BUILTIN_EXPECT(end >= VMA_KERN_MAX, 0))
return -EINVAL;
}
spinlock_lock(lock);
// search gap
vma_t* pred = NULL;
vma_t* succ = *list;
do {
if ((!pred || pred->end <= start) &&
(!succ || succ->start >= end))
break;
pred = succ;
succ = succ->next;
} while (pred || succ);
// TODO: check bounds
// insert new VMA
vma_t* new = kmalloc(sizeof(vma_t));
if (BUILTIN_EXPECT(!new, 0)) {
spinlock_unlock(lock);
new_vma = kmalloc(sizeof(new_vma));
if (!new_vma)
return -ENOMEM;
spinlock_lock(&task->vma_lock);
new_vma->start = start;
new_vma->end = end;
new_vma->type = type;
if (!(task->vma_list)) {
new_vma->next = new_vma->prev = NULL;
task->vma_list = new_vma;
} else {
vma_t* tmp = task->vma_list;
while (tmp->next && tmp->start < start)
tmp = tmp->next;
new_vma->next = tmp->next;
new_vma->prev = tmp;
tmp->next = new_vma;
}
new->start = start;
new->end = end;
new->flags = flags;
new->next = succ;
new->prev = pred;
if (succ)
succ->prev = new;
if (pred)
pred->next = new;
else
*list = new;
spinlock_unlock(lock);
spinlock_unlock(&task->vma_lock);
return 0;
}
int copy_vma_list(task_t* task)
int vma_dump(task_t* task)
{
task_t* parent_task = per_core(current_task);
vma_t* tmp;
if (BUILTIN_EXPECT(!task, 0))
return -EINVAL;
spinlock_init(&task->vma_lock);
spinlock_lock(&parent_task->vma_lock);
spinlock_lock(&task->vma_lock);
vma_t* last = NULL;
vma_t* parent = parent_task->vma_list;
tmp = task->vma_list;
while (tmp) {
kprintf("%8x - %8x: ", tmp->start, tmp->end);
while (parent) {
vma_t *new = kmalloc(sizeof(vma_t));
if (BUILTIN_EXPECT(!new, 0)) {
spinlock_unlock(&task->vma_lock);
spinlock_unlock(&parent_task->vma_lock);
return -ENOMEM;
}
new->start = parent->start;
new->end = parent->end;
new->flags = parent->flags;
new->prev = last;
if (last)
last->next = new;
if (tmp->type & VMA_READ)
kputs("r");
else
task->vma_list = new;
kputs("-");
last = new;
parent = parent->next;
}
if (tmp->type & VMA_WRITE)
kputs("w");
else
kputs("-");
return 0;
}
if (tmp->type & VMA_EXECUTE)
kputs("x");
else
kputs("-");
kputs("\n");
int drop_vma_list()
{
task_t* task = per_core(current_task);
vma_t* vma;
kprintf("drop_vma_list: task = %u\n", task->id); // TODO: remove
spinlock_lock(&task->vma_lock);
while (vma = task->vma_list) {
task->vma_list = vma->next;
kfree(vma);
tmp = tmp->next;
}
spinlock_unlock(&task->vma_lock);
return 0;
}
void vma_dump()
{
void print_vma(vma_t *vma) {
while (vma) {
kprintf("0x%lx - 0x%lx: size=%x, flags=%c%c%c\n", vma->start, vma->end, vma->end - vma->start,
(vma->flags & VMA_READ) ? 'r' : '-',
(vma->flags & VMA_WRITE) ? 'w' : '-',
(vma->flags & VMA_EXECUTE) ? 'x' : '-');
vma = vma->next;
}
}
task_t* task = per_core(current_task);
kputs("Kernelspace VMAs:\n");
spinlock_lock(&vma_lock);
print_vma(vma_list);
spinlock_unlock(&vma_lock);
kputs("Userspace VMAs:\n");
spinlock_lock(&task->vma_lock);
print_vma(task->vma_list);
spinlock_unlock(&task->vma_lock);
}

View file

@ -68,7 +68,7 @@ int main(int argc, char** argv)
exit(1);
}
testdirent = readdir(testdir);
printf("1. Dirent: %s\n", testdirent->d_name);
printf("1. Dirent: %s", testdirent->d_name);
closedir(testdir);
return errno;

View file

@ -20,82 +20,41 @@
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <time.h>
#include <sys/times.h>
void sleep(int sec) {
struct tms tms;
clock_t t, s = times(&tms);
do {
t = times(&tms);
}
while (t - s <= 1000 * sec);
}
#include <unistd.h>
#include <fcntl.h>
#include <errno.h>
#include <dirent.h>
int print_usage() {
printf("usage: size mb/kb/b [chunks]\n");
exit(-1);
printf("usage: [size mb/kb/b]");
exit(0);
}
int main(int argc, char** argv)
{
int multp = 0;
int size = 0;
int chunks = 1;
void **test;
if (argc <= 2 || argc > 4)
int m = 0;
uint32_t size = 0;
if(argc <= 2)
print_usage();
size = atoi(argv[1]);
if (size <= 0)
print_usage();
if (!strcasecmp(argv[2], "mb"))
multp = (1 << 20);
else if (!strcasecmp(argv[2], "kb"))
multp = (1 << 10);
else if (!strcasecmp(argv[2], "b"))
multp = (1 << 0);
else
print_usage();
size *= multp;
if (argc == 4)
chunks = atoi(argv[3]);
test = malloc(chunks * sizeof(void *));
printf("malloc(%lu)\n", chunks * sizeof(void *));
if (!test) {
printf("malloc(%lu) - FAILED!\n", chunks * sizeof(void *));
exit(-1);
}
// allocate...
for (i = 0; i < chunks; i++) {
test[i] = malloc(size);
if (test[i])
printf("malloc(%d)\tCHUNK: %d START: %p END: %p\n", size, i, test[i], test[i] + size);
if(argc == 3) {
if(!strcmp(argv[2], "mb"))
m = 1024*1024;
else if(!strcmp(argv[2], "kb"))
m = 1024;
else if(!strcmp(argv[2], "b"))
m = 0;
else
printf("malloc(%d)\tFAILED! Abort allocation, start with freeing memory\n", size);
sleep(1);
print_usage();
}
if(argc > 3)
print_usage();
size = atoi(argv[1]);
if(size <= 0)
print_usage();
// and release again
for (i = 0; i < chunks; i++) {
if (test[i]) {
free(test[i]);
printf("free(%p)\tCHUNK: %d\n", test[i], i);
}
sleep(1);
}
free(test);
printf("free(%p)\n", test);
size *= m;
uint8_t* test = malloc(size);
printf("malloc(%d) - START: %p END: %p \n", size, test, test + size);
return 0;
}

View file

@ -56,8 +56,9 @@ L1:
call rax
L2:
; register a function to be called at normal process termination
mov rdi, __do_global_dtors
push __do_global_dtors
call atexit
pop rax
; call init function
call __do_global_ctors
@ -75,17 +76,13 @@ L4:
; arguments are already on the stack
; call the user's function
pop rdi ; argc
pop rsi ; argv pointer
pop rdx ; env pointer
call main
; call exit from the C library so atexit gets called, and the
; C++ destructors get run. This calls our exit routine below
; when it's done.
; call "exit"
mov rdi, rax
push rax
call exit
; endless loop

View file

@ -85,7 +85,7 @@ syscall(int nr, unsigned long arg0, unsigned long arg1, unsigned long arg2,
asm volatile (_SYSCALLSTR(INT_SYSCALL)
: "=a" (res)
: "D" (nr), "S" (arg0), "d" (arg1), "c" (arg2), "b" (arg3), "a" (arg4)
: "0" (nr), "b" (arg0), "c" (arg1), "d" (arg2), "S" (arg3), "D" (arg4)
: "memory", "cc");
return res;

View file

@ -2,17 +2,6 @@
symbol-file metalsvm.sym
target remote localhost:1234
# Debugging 32bit code
#set architecture i386
#break stublet
#continue
# Debugging 64bit code
set architecture i386:x86-64
# Debugging userspace
#add-symbol-file newlib/examples/memtest.sym 0x40200000
#break main
#continue # skip kernel main()
# Configure breakpoints and everything as you wish here.
break main
continue