Compare commits

..

No commits in common. "no_nested" and "master" have entirely different histories.

60 changed files with 1806 additions and 3563 deletions

1
.gitignore vendored
View file

@ -17,7 +17,6 @@ tools/make_initrd
tools/scc_bootinfo.asm tools/scc_bootinfo.asm
newlib/examples/hello newlib/examples/hello
newlib/examples/memtest newlib/examples/memtest
newlib/examples/fork
newlib/examples/jacobi newlib/examples/jacobi
newlib/examples/echo newlib/examples/echo
newlib/examples/tests newlib/examples/tests

View file

@ -1,12 +1,8 @@
NAME = metalsvm
# For 64bit support, you have define BIT as 64
# Note: do not forget to 'make veryclean' after changing BIT!!!
BIT=64
ARCH = x86
SMP=1
TOPDIR = $(shell pwd) TOPDIR = $(shell pwd)
ARCH = x86
# For 64bit support, you have define BIT as 64
BIT=32
NAME = metalsvm
LWIPDIRS = lwip/src/arch lwip/src/api lwip/src/core lwip/src/core/ipv4 lwip/src/netif LWIPDIRS = lwip/src/arch lwip/src/api lwip/src/core lwip/src/core/ipv4 lwip/src/netif
DRIVERDIRS = drivers/net drivers/char DRIVERDIRS = drivers/net drivers/char
KERNDIRS = libkern kernel mm fs apps arch/$(ARCH)/kernel arch/$(ARCH)/mm arch/$(ARCH)/scc $(LWIPDIRS) $(DRIVERDIRS) KERNDIRS = libkern kernel mm fs apps arch/$(ARCH)/kernel arch/$(ARCH)/mm arch/$(ARCH)/scc $(LWIPDIRS) $(DRIVERDIRS)
@ -34,56 +30,35 @@ RANLIB_FOR_TARGET = $(CROSSCOMPREFIX)ranlib
STRIP_FOR_TARGET = $(CROSSCOMPREFIX)strip STRIP_FOR_TARGET = $(CROSSCOMPREFIX)strip
READELF_FOR_TARGET = $(CROSSCOMPREFIX)readelf READELF_FOR_TARGET = $(CROSSCOMPREFIX)readelf
# Tools
MAKE = make MAKE = make
RM = rm -rf RM = rm -rf
NASM = nasm NASM = nasm
# For 64bit code, you have to use qemu-system-x86_64
QEMU = qemu-system-i386
GDB = gdb GDB = gdb
ifeq ($(BIT), 32) # For 64bit support, you have to define -felf64 instead of -felf32
QEMU = qemu-system-i386 NASMFLAGS = -felf32 -g -i$(TOPDIR)/include/metalsvm/
else ifeq ($(BIT), 64) INCLUDE = -I$(TOPDIR)/include -I$(TOPDIR)/arch/$(ARCH)/include -I$(TOPDIR)/lwip/src/include -I$(TOPDIR)/lwip/src/include/ipv4 -I$(TOPDIR)/drivers
QEMU = qemu-system-x86_64 # For 64bit support, you have to define "-m64 -mno-red-zone" instead of "-m32 -march=i586"
endif
INCLUDE = -I$(TOPDIR)/include \
-I$(TOPDIR)/arch/$(ARCH)/include \
-I$(TOPDIR)/lwip/src/include \
-I$(TOPDIR)/lwip/src/include/ipv4 \
-I$(TOPDIR)/drivers
# Compiler options for final code # Compiler options for final code
CFLAGS = -g -O2 -m$(BIT) -Wall -fomit-frame-pointer -ffreestanding -fstrength-reduce -finline-functions $(INCLUDE) $(STACKPROT) CFLAGS = -g -m32 -march=i586 -Wall -O2 -fstrength-reduce -fomit-frame-pointer -finline-functions -ffreestanding $(INCLUDE) $(STACKPROT)
# Compiler options for debuging # Compiler options for debuging
#CFLAGS = -g -O -m$(BIT) -Wall -fomit-frame-pointer -ffreestanding $(INCLUDE) $(STACKPROT) #CFLAGS = -g -O -m32 -march=i586 -Wall -fomit-frame-pointer -ffreestanding $(INCLUDE) $(STACKPROT)
NASMFLAGS = -felf$(BIT) -g -i$(TOPDIR)/include/metalsvm/
ARFLAGS = rsv ARFLAGS = rsv
LDFLAGS = -T link$(BIT).ld -z max-page-size=4096 --defsym __BUILD_DATE=$(shell date +'%Y%m%d') --defsym __BUILD_TIME=$(shell date +'%H%M%S') LDFLAGS = -T link$(BIT).ld -z max-page-size=4096 --defsym __BUILD_DATE=$(shell date +'%Y%m%d') --defsym __BUILD_TIME=$(shell date +'%H%M%S')
STRIP_DEBUG = --strip-debug STRIP_DEBUG = --strip-debug
KEEP_DEBUG = --only-keep-debug KEEP_DEBUG = --only-keep-debug
# Do not change to elf64!
# The Multiboot spec can only boot elf32 binaries
OUTPUT_FORMAT = -O elf32-i386 OUTPUT_FORMAT = -O elf32-i386
# For 64bit support, you have to define -m64 instead of "-m32 -march=i586"
CFLAGS_FOR_NEWLIB = -m$(BIT) -O2 $(STACKPROT) CFLAGS_FOR_NEWLIB = -m32 -march=i586 -O2 $(STACKPROT)
LDFLAGS_FOR_NEWLIB = -m$(BIT) # For 64bit support, you have to define -m64 instead of "-m32 -march=i586"
CFLAGS_FOR_TOOLS = -m$(BIT) -O2 -Wall LDFLAGS_FOR_NEWLIB = -m32 -march=i586
# For 64bit support, you have to define -m64 instead of "-m32"
CFLAGS_FOR_TOOLS = -m32 -O2 -Wall
LDFLAGS_FOR_TOOLS = LDFLAGS_FOR_TOOLS =
NASMFLAGS_FOR_NEWLIB = -felf$(BIT) # For 64bit support, you have to define -felf64 instead of -felf32
NASMFLAGS_FOR_NEWLIB = -felf32
ifeq ($(BIT), 32)
CFLAGS += -march=i586
CFLAGS_FOR_NEWLIB += -march=i586
LDFLAGS_FOR_NEWLIB += -march=i586
else ifeq ($(BIT), 64)
CFLAGS += -mno-red-zone
endif
# Prettify output # Prettify output
V = 0 V = 0
@ -93,15 +68,11 @@ ifeq ($V,0)
endif endif
default: all default: all
all: newlib tools $(NAME).elf all: newlib tools $(NAME).elf
newlib: newlib:
$(MAKE) ARCH=$(ARCH) BIT=$(BIT) \ $(MAKE) ARCH=$(ARCH) BIT=$(BIT) LDFLAGS="$(LDFLAGS_FOR_NEWLIB)" CFLAGS="$(CFLAGS_FOR_NEWLIB)" NASMFLAGS="$(NASMFLAGS_FOR_NEWLIB)" CC_FOR_TARGET=$(CC_FOR_TARGET) \
LDFLAGS="$(LDFLAGS_FOR_NEWLIB)" \
CFLAGS="$(CFLAGS_FOR_NEWLIB)" \
NASMFLAGS="$(NASMFLAGS_FOR_NEWLIB)" \
CC_FOR_TARGET=$(CC_FOR_TARGET) \
CXX_FOR_TARGET=$(CXX_FOR_TARGET) \ CXX_FOR_TARGET=$(CXX_FOR_TARGET) \
GCC_FOR_TARGET=$(GCC_FOR_TARGET) \ GCC_FOR_TARGET=$(GCC_FOR_TARGET) \
AR_FOR_TARGET=$(AR_FOR_TARGET) \ AR_FOR_TARGET=$(AR_FOR_TARGET) \
@ -125,23 +96,14 @@ $(NAME).elf:
$Q$(OBJCOPY_FOR_TARGET) $(STRIP_DEBUG) $(OUTPUT_FORMAT) $(NAME).elf $Q$(OBJCOPY_FOR_TARGET) $(STRIP_DEBUG) $(OUTPUT_FORMAT) $(NAME).elf
qemu: newlib tools $(NAME).elf qemu: newlib tools $(NAME).elf
$(QEMU) -monitor stdio -serial tcp::12346,server,nowait -smp $(SMP) -net nic,model=rtl8139 -net user,hostfwd=tcp::12345-:4711 -kernel metalsvm.elf -initrd tools/initrd.img $(QEMU) -monitor stdio -smp 2 -net nic,model=rtl8139 -net user,hostfwd=tcp::12345-:4711 -net dump -kernel metalsvm.elf -initrd tools/initrd.img
qemudbg: newlib tools $(NAME).elf qemudbg: newlib tools $(NAME).elf
$(QEMU) -s -S -nographic -monitor stdio -serial tcp::12346,server -smp $(SMP) -net nic,model=rtl8139 -net user,hostfwd=tcp::12345-:4711 -kernel metalsvm.elf -initrd tools/initrd.img $(QEMU) -s -S -smp 2 -net nic,model=rtl8139 -net user,hostfwd=tcp::12345-:4711 -net dump -kernel metalsvm.elf -initrd tools/initrd.img
gdb: $(NAME).elf gdb: $(NAME).elf
$(GDB) -q -x script.gdb make qemudbg > /dev/null &
$(GDB) -x script.gdb
debug: newlib tools $(NAME).elf
killall $(QEMU) || true
killall $(GDB) || true
sleep 1
gnome-terminal --working-directory=$(TOPDIR) \
--tab --title=Shell --command="bash -c 'sleep 1 && telnet localhost 12345'" \
--tab --title=QEmu --command="make qemudbg" \
--tab --title=GDB --command="make gdb" \
--tab --title=Debug --command="bash -c 'sleep 1 && telnet localhost 12346'"
clean: clean:
$Q$(RM) $(NAME).elf $(NAME).sym *~ $Q$(RM) $(NAME).elf $(NAME).sym *~
@ -150,7 +112,7 @@ clean:
veryclean: clean veryclean: clean
$Q$(MAKE) -C newlib veryclean $Q$(MAKE) -C newlib veryclean
@echo Very cleaned. @echo Very cleaned
#depend: #depend:
# for i in $(SUBDIRS); do $(MAKE) -k -C $$i depend; done # for i in $(SUBDIRS); do $(MAKE) -k -C $$i depend; done
@ -162,15 +124,16 @@ veryclean: clean
$Q$(CPP_FOR_TARGET) -MF $*.dep -MT $*.o -MM -D__KERNEL__ $(CFLAGS) $< $Q$(CPP_FOR_TARGET) -MF $*.dep -MT $*.o -MM -D__KERNEL__ $(CFLAGS) $<
include/metalsvm/config.inc: include/metalsvm/config.h include/metalsvm/config.inc: include/metalsvm/config.h
@echo "; This file is generated automatically from the config.h file." > $@ @echo "; This file is generated automatically from the config.h file." > include/metalsvm/config.inc
@echo "; Before editing this, you should consider editing config.h." >> $@ @echo "; Before editing this, you should consider editing config.h." >> include/metalsvm/config.inc
@sed -nre 's/^[\t ]*#define[\t ]+([a-z_0-9]+)([\t ]+.*)*/%define \1/ip' $< >> $@ @awk '/^#define MAX_CORES/{ print "%define MAX_CORES", $$3 }' include/metalsvm/config.h >> include/metalsvm/config.inc
@sed -nre 's/^[\t ]*#define[\t ]+([a-z_0-9]+)[\t ]+([a-z_0-9.]+)([\t ]+.*)*/%define \1 \2/ip' $< >> $@ @awk '/^#define KERNEL_STACK_SIZE/{ print "%define KERNEL_STACK_SIZE", $$3 }' include/metalsvm/config.h >> include/metalsvm/config.inc
@awk '/^#define CONFIG_VGA/{ print "%define CONFIG_VGA", $$3 }' include/metalsvm/config.h >> include/metalsvm/config.inc
%.o : %.asm include/metalsvm/config.inc %.o : %.asm include/metalsvm/config.inc
@echo [ASM] $@ @echo [ASM] $@
$Q$(NASM) $(NASMFLAGS) -o $@ $< $Q$(NASM) $(NASMFLAGS) -o $@ $<
.PHONY: default all clean qemu qemudbg gdb debug newlib tools .PHONY: default all clean emu gdb newlib tools
include $(addsuffix /Makefile,$(SUBDIRS)) include $(addsuffix /Makefile,$(SUBDIRS))

View file

@ -1,4 +1,4 @@
C_source := tests.c echo.c netio.c jacobi.c laplace.c gfx_client.c gfx_generic.c memory.c C_source := tests.c echo.c netio.c jacobi.c laplace.c gfx_client.c gfx_generic.c
MODULE := apps MODULE := apps
include $(TOPDIR)/Makefile.inc include $(TOPDIR)/Makefile.inc

View file

@ -1,389 +0,0 @@
/*
* Copyright 2011 Steffen Vogel, Chair for Operating Systems,
* RWTH Aachen University
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* This file is part of MetalSVM.
*/
#include <metalsvm/stdlib.h>
#include <metalsvm/stdio.h>
#include <metalsvm/stdarg.h>
#include <metalsvm/memory.h>
#include <metalsvm/time.h>
#include <metalsvm/tasks.h>
#include <metalsvm/vma.h>
#include <metalsvm/malloc.h>
#include <asm/page.h>
#include <asm/processor.h>
#define PAGE_COUNT 10
#define SIZE (PAGE_COUNT*PAGE_SIZE)
#define VIRT_FROM_ADDR 0x100000000000
#define VIRT_TO_ADDR 0x200000000000
extern atomic_int32_t total_page;
extern atomic_int32_t total_allocated_pages;
extern atomic_int32_t total_available_pages;
/** @brief Simple helper to format our test results */
static void test(size_t expr, char *fmt, ...)
{
void _putchar(int c, void *arg) { kputchar(c); } // for kvprintf
static int c = 1;
va_list ap;
va_start(ap, fmt);
kprintf("%s #%u:\t", (expr) ? "PASSED" : "FAILED", c++);
kvprintf(fmt, _putchar, NULL, 10, ap);
kputs("\n");
va_end(ap);
if (!expr)
abort();
}
/** @brief Linear feedback shift register PRNG */
static uint16_t rand()
{
static uint16_t lfsr = 0xACE1u;
static uint16_t bit;
bit = ((lfsr >> 0) ^ (lfsr >> 2) ^ (lfsr >> 3) ^ (lfsr >> 5) ) & 1;
return lfsr = (lfsr >> 1) | (bit << 15);
}
/** @brief BSD sum algorithm ('sum' Unix command) and used by QEmu */
uint16_t checksum(size_t start, size_t end)
{
size_t addr;
uint16_t sum;
for(addr = start, sum = 0; addr < end; addr++) {
uint8_t val = *((uint8_t *) addr);
sum = (sum >> 1) | (sum << 15);
sum += val;
}
return sum;
}
static int paging_stage2(void *arg)
{
size_t old, new;
kprintf("PAGING: entering stage 2...\n");
size_t cr3 = read_cr3();
kprintf("cr3 new = %#lx\n", cr3);
old = *((size_t *) arg);
kprintf("old sum: %lu\n", old);
new = checksum(VIRT_FROM_ADDR, VIRT_FROM_ADDR + SIZE);
test(old == new, "checksum(%p, %p) = %lu", VIRT_FROM_ADDR, VIRT_FROM_ADDR + SIZE, new);
page_dump(0, -1L);
return 0;
}
/** @brief Test of the paging subsystem
*
* We will map a single physical memory region to two virtual regions.
* When writing to the first one, we should be able to read the same contents
* from the second one.
*/
static void paging(void)
{
size_t c, sum;
size_t *p1, *p2;
size_t virt_from, virt_to;
size_t phys;
size_t t;
int ret;
// show original page maps
t = rdtsc();
page_dump(0, -1L);
kprintf("delta_t = %lu\n", rdtsc() - t);
t = rdtsc();
page_stats(0, -1L, 1); // reset accessed and dirty bits
kprintf("delta_t = %lu\n", rdtsc() - t);
kprintf("bookkeeping pages:\n");
kprintf(" - total:\t%lu\n", atomic_int32_read(&total_pages));
kprintf(" - alloc:\t%lu\n", atomic_int32_read(&total_allocated_pages));
kprintf(" - avail:\t%lu\n", atomic_int32_read(&total_available_pages));
// allocate physical page frames
phys = get_pages(PAGE_COUNT);
test(phys, "get_pages(%lu) = 0x%lx", PAGE_COUNT, phys);
// create first mapping
virt_from = map_region(VIRT_FROM_ADDR, phys, PAGE_COUNT, 0);
test(virt_from, "map_region(0x%lx, 0x%lx, %lu, 0x%x) = 0x%lx", VIRT_FROM_ADDR, phys, PAGE_COUNT, 0, virt_from);
// check address translation
phys = virt_to_phys(virt_from);
test(phys, "virt_to_phys(0x%lx) = 0x%lx", virt_from, phys);
// write test data
p1 = (size_t *) virt_from;
for (c = 0; c < SIZE/sizeof(size_t); c++) {
p1[c] = c;
}
// create second mapping pointing to the same page frames
virt_to = map_region(VIRT_TO_ADDR, phys, PAGE_COUNT, MAP_USER_SPACE);
test(virt_to, "map_region(0x%lx, 0x%lx, %lu, 0x%x) = 0x%lx", VIRT_TO_ADDR, phys, PAGE_COUNT, 0, virt_to);
// show pagings infos again
page_dump(0, -1L);
page_stats(0, -1L, 0);
// check address translation
phys = virt_to_phys(virt_to);
test(phys, "virt_to_phys(0x%lx) = 0x%lx", virt_to, phys);
// check if both mapped areas are equal
p2 = (size_t *) virt_to;
for (c = 0; c < SIZE/sizeof(size_t); c++) {
if (p1[c] != p2[c])
test(0, "data mismatch: *(%p) != *(%p)", &p1[c], &p2[c]);
}
test(1, "data is equal");
// try to remap without MAP_REMAP
virt_to = map_region(VIRT_TO_ADDR, phys+PAGE_SIZE, PAGE_COUNT, MAP_USER_SPACE);
test(!virt_to, "map_region(0x%lx, 0x%lx, %lu, 0x%x) = 0x%lx (without MAP_REMAP flag)", VIRT_TO_ADDR, phys+PAGE_SIZE, PAGE_COUNT, 0, virt_to);
// try to remap with MAP_REMAP
virt_to = map_region(VIRT_TO_ADDR, phys+PAGE_SIZE, PAGE_COUNT, MAP_REMAP|MAP_USER_SPACE);
test(virt_to, "map_region(0x%lx, 0x%lx, %lu, 0x%x) = 0x%lx (with MAP_REMAP flag)", VIRT_TO_ADDR, phys+PAGE_SIZE, PAGE_COUNT, MAP_REMAP, virt_to);
// check if data is not equal anymore (we remapped with +PAGE_SIZE offset)
p2 = (size_t *) virt_to;
for (c = 0; c < SIZE/sizeof(size_t); c++) {
if (p1[c] == p2[c])
test(0, "data match at *(%p) != *(%p)", &p1[c], &p2[c]);
}
test(1, "data is unequal");
// test unmapping
ret = unmap_region(VIRT_TO_ADDR, PAGE_COUNT);
test(!ret, "unmap_region(%#lx, %lu) = %u", VIRT_TO_ADDR, PAGE_COUNT, ret);
page_dump(0, -1L);
// calc checksum
sum = checksum(virt_from, virt_from + SIZE);
test(sum, "checksum(%p, %p) = %lu", virt_from, virt_from+SIZE, sum);
size_t cr3 = read_cr3();
kprintf("cr3 old = %#lx\n", cr3);
create_kernel_task(0, paging_stage2, &sum, NORMAL_PRIO);
sleep(5);
}
/** @brief Test of the VMA allocator */
static void vma(void)
{
int ret;
vma_dump();
// vma_alloc
size_t a1 = vma_alloc(SIZE, VMA_HEAP);
test(a1, "vma_alloc(0x%x, 0x%x) = 0x%lx", SIZE, VMA_HEAP, a1);
size_t a2 = vma_alloc(SIZE, VMA_HEAP|VMA_USER);
test(a2 != 0, "vma_alloc(0x%x, 0x%x) = 0x%lx", SIZE, VMA_HEAP|VMA_USER, a2);
vma_dump();
// vma_free
ret = vma_free(a1, a1+SIZE);
test(ret >= 0, "vma_free(0x%lx, 0x%lx) = %i", a1, a1+SIZE, ret);
ret = vma_free(a2, a2+SIZE);
test(ret >= 0, "vma_free(0x%lx, 0x%lx) = %i", a2, a2+SIZE, ret);
vma_dump();
// vma_add
ret = vma_add(VIRT_FROM_ADDR, VIRT_FROM_ADDR+SIZE, VMA_HEAP|VMA_USER);
test(ret >= 0, "vma_add(0x%lx, 0x%lx, 0x%x) = %u", VIRT_FROM_ADDR, VIRT_FROM_ADDR+SIZE, VMA_HEAP|VMA_USER, ret);
ret = vma_add(VIRT_FROM_ADDR+SIZE, VIRT_FROM_ADDR+2*SIZE, VMA_HEAP|VMA_USER);
test(ret >= 0, "vma_add(0x%lx, 0x%lx, 0x%x) = %u", VIRT_FROM_ADDR+SIZE, VIRT_FROM_ADDR+2*SIZE, VMA_HEAP|VMA_USER, ret);
ret = vma_add(VIRT_FROM_ADDR-SIZE, VIRT_FROM_ADDR, VMA_HEAP|VMA_USER);
test(ret >= 0, "vma_add(0x%lx, 0x%lx, 0x%x) = %u", VIRT_FROM_ADDR-SIZE, VIRT_FROM_ADDR, VMA_HEAP|VMA_USER, ret);
vma_dump();
// vma_free
ret = vma_free(VIRT_FROM_ADDR-SIZE, VIRT_FROM_ADDR);
test(ret >= 0, "vma_free(0x%lx, 0x%lx) = %u", VIRT_FROM_ADDR-SIZE, VIRT_FROM_ADDR, ret);
ret = vma_free(VIRT_FROM_ADDR+SIZE, VIRT_FROM_ADDR+2*SIZE);
test(ret >= 0, "vma_free(0x%lx, 0x%lx) = %u", VIRT_FROM_ADDR+SIZE, VIRT_FROM_ADDR+2*SIZE, ret);
ret = vma_free(VIRT_FROM_ADDR, VIRT_FROM_ADDR+SIZE);
test(ret >= 0, "vma_free(0x%lx, 0x%lx) = %u", VIRT_FROM_ADDR, VIRT_FROM_ADDR+SIZE, ret);
vma_dump();
}
/** @brief Test of the kernel malloc allocator */
static void malloc(void)
{
int i;
int* p[20];
int* a;
// kmalloc() test
buddy_dump();
a = kmalloc(SIZE);
test(a != NULL, "kmalloc(%lu) = %p", SIZE, a);
buddy_dump();
// simple write/read test
for (i=0; i<SIZE/sizeof(int); i++)
a[i] = i;
for (i=0; i<SIZE/sizeof(int); i++) {
if (a[i] != i)
test(0, "data mismatch: *(%p) != %lu", &a[i], i);
}
test(1, "data is equal");
// kfree() test
kfree(a);
test(1, "kfree(%p)", a);
buddy_dump();
// some random malloc/free patterns to stress the buddy system
for (i=0; i<20; i++) {
uint16_t sz = rand();
p[i] = kmalloc(sz);
test(p[i] != NULL, "kmalloc(%u) = %p", sz, p[i]);
}
buddy_dump();
for (i=0; i<20; i++) {
kfree(p[i]);
test(1, "kfree(%p)", p[i]);
}
buddy_dump();
}
/** @brief A memory benchmark for page table walks and TLB misses */
int bench(void)
{
// init hardware performance counters
struct pmc_caps* cap = pmc_init();
if (cap->version == 0x21) { // QEmu returns garbage
kputs("QEMU does not support PMCs.. skipping benchmark!\n");
return -1;
}
kprintf("PMC architecural version: %u\n", cap->version);
kprintf("There are %u general purpose PMCs (%u bit wide) available\n", cap->gp_count, cap->gp_width);
kprintf("There are %u fixed function PMCs (%u bit wide) available\n", cap->ff_count, cap->ff_width);
// setup PMCs
pmc_stop_all();
pmc_config(0, PMC_EVT_PAGE_WALK_CLKS, PMC_EVTSEL_OS | PMC_EVTSEL_EN, 0, 0);
pmc_config(1, PMC_EVT_PAGE_WALK_COUNT, PMC_EVTSEL_OS | PMC_EVTSEL_EN, 0, 0);
// allocate space for results
uint64_t *data = kmalloc(ITERATIONS * sizeof(uint64_t));
if (!data)
return -1;
// clear caches
tlb_flush();
flush_cache();
int i;
for (i=0; i < ITERATIONS; i++) {
pmc_write(0, 0);
pmc_write(1, 0);
pmc_start_all();
#if 0
int i = 100;
while (i--) {
tlb_flush();
page_stats(0);
}
#else
//flush_cache();
//tlb_flush();
page_stats(0);
#endif
pmc_stop_all();
uint64_t clks = pmc_read(0);
uint64_t count = pmc_read(1);
/*kprintf("Number of Page table walks: %lu\n", count);
kprintf("Page table walks clock cycles: %lu\n", clks);
kprintf("Cycles per table walk: %lu.%u\n", clks / count, (1000 * clks / count) % 1000 );*/
data[i] = 1000000 * clks / count;
}
// dump results
for (i=0; i<ITERATIONS; i++)
kprintf("%u\t%lu\n", i, data[i]);
return 0;
}
/** @brief This is a simple procedure to test memory management subsystem */
int memory(void* arg)
{
int ret;
tid_t id;
kprintf("======== PAGING: test started...\n");
paging();
kprintf("======== VMA: test started...\n");
vma();
kprintf("======== MALLOC: test started...\n");
malloc();
kprintf("======== USER: test userspace...\n");
char* argv[] = {"/bin/fork", NULL};
ret = create_user_task(&id, argv[0], argv);
test(!ret, "calling %s with id = %i, ret = %i", argv[0], id, ret);
wait(&ret);
test(!ret, "userspace task returned with code = %d", ret);
kprintf("======== BENCH: memory and TLB benchmark started...\n");
bench();
kprintf("======== All tests finished successfull...\n");
return 0;
}

View file

@ -43,7 +43,6 @@
int laplace(void* arg); int laplace(void* arg);
int jacobi(void* arg); int jacobi(void* arg);
int memory(void* arg);
void echo_init(void); void echo_init(void);
void netio_init(void); void netio_init(void);
@ -745,7 +744,8 @@ int test_init(void)
create_user_task(NULL, "/bin/jacobi", jacobi_argv); create_user_task(NULL, "/bin/jacobi", jacobi_argv);
//create_user_task_on_core(NULL, "/bin/jacobi", jacobi_argv, 1); //create_user_task_on_core(NULL, "/bin/jacobi", jacobi_argv, 1);
#endif #endif
#if defined(START_MMNIF_TEST) && defined(CONFIG_LWIP) && LWIP_SOCKET #ifdef START_MMNIF_TEST
#if defined(CONFIG_LWIP) && LWIP_SOCKET
if (RCCE_IAM == 0) { if (RCCE_IAM == 0) {
kprintf("Start /bin/server...\n"); kprintf("Start /bin/server...\n");
create_user_task(NULL, "/bin/server", server_argv); create_user_task(NULL, "/bin/server", server_argv);
@ -755,8 +755,6 @@ int test_init(void)
create_user_task(NULL, "/bin/client", client_argv); create_user_task(NULL, "/bin/client", client_argv);
} }
#endif #endif
#ifdef START_MEMORY
create_kernel_task(NULL, memory, NULL, NORMAL_PRIO);
#endif #endif
return 0; return 0;

View file

@ -46,7 +46,6 @@
//#define START_HELLO //#define START_HELLO
//#define START_TESTS //#define START_TESTS
//#define START_JACOBI //#define START_JACOBI
//#define START_MEMORY
//#define START_CHIEFTEST //#define START_CHIEFTEST

View file

@ -34,7 +34,7 @@
// ____ _ _ // ____ _ _
// / ___| _ _ _ __ ___ | |__ ___ | |___ // / ___| _ _ _ __ ___ | |__ ___ | |___
// \___ \| | | | '_ ` _ \| '_ \ / _ \| / __| // \___ \| | | | '_ ` _ \| '_ \ / _ \| / __|
// ___) | |_| | | | | | | |_) | (_) | \__ // ___) | |_| | | | | | | |_) | (_) | \__ \
// |____/ \__, |_| |_| |_|_.__/ \___/|_|___/ // |____/ \__, |_| |_| |_|_.__/ \___/|_|___/
// |___/ // |___/
// //
@ -253,7 +253,7 @@
// _____ _ _ // _____ _ _
// | ___| _ _ __ ___| |_(_) ___ _ __ ___ // | ___| _ _ __ ___| |_(_) ___ _ __ ___
// | |_ | | | | '_ \ / __| __| |/ _ \| '_ \/ __| // | |_ | | | | '_ \ / __| __| |/ _ \| '_ \/ __|
// | _|| |_| | | | | (__| |_| | (_) | | | \__ // | _|| |_| | | | | (__| |_| | (_) | | | \__ \
// |_| \__,_|_| |_|\___|\__|_|\___/|_| |_|___/ // |_| \__,_|_| |_|\___|\__|_|\___/|_| |_|___/
// //
// ######################################################################################### // #########################################################################################

View file

@ -100,6 +100,11 @@ inline static void outportl(unsigned short _port, unsigned int _data)
asm volatile("outl %1, %0"::"dN"(_port), "a"(_data)); asm volatile("outl %1, %0"::"dN"(_port), "a"(_data));
} }
inline static void uart_putchar(unsigned char _data)
{
outportb(0x2F8, _data);
}
/** /**
* read a byte from CMOS * read a byte from CMOS
* @param offset CMOS offset * @param offset CMOS offset

View file

@ -35,11 +35,9 @@
#ifdef CONFIG_MULTIBOOT #ifdef CONFIG_MULTIBOOT
/// Does the bootloader provide mem_* fields? /* are there modules to do something with? */
#define MULTIBOOT_INFO_MEM 0x00000001
/// Does the bootloader provide a list of modules?
#define MULTIBOOT_INFO_MODS 0x00000008 #define MULTIBOOT_INFO_MODS 0x00000008
/// Does the bootloader provide a full memory map? /* is there a full memory map? */
#define MULTIBOOT_INFO_MEM_MAP 0x00000040 #define MULTIBOOT_INFO_MEM_MAP 0x00000040
typedef uint16_t multiboot_uint16_t; typedef uint16_t multiboot_uint16_t;
@ -116,6 +114,7 @@ struct multiboot_info
multiboot_uint16_t vbe_interface_off; multiboot_uint16_t vbe_interface_off;
multiboot_uint16_t vbe_interface_len; multiboot_uint16_t vbe_interface_len;
}; };
typedef struct multiboot_info multiboot_info_t; typedef struct multiboot_info multiboot_info_t;
struct multiboot_mmap_entry struct multiboot_mmap_entry

View file

@ -21,7 +21,6 @@
* @file arch/x86/include/asm/page.h * @file arch/x86/include/asm/page.h
* @brief Definitions and functions related to paging * @brief Definitions and functions related to paging
* @author Stefan Lankes * @author Stefan Lankes
* @author Steffen Vogel <steffen.vogel@rwth-aachen.de>
* *
* This file defines the interface for paging as like structures related to paging. * This file defines the interface for paging as like structures related to paging.
*/ */
@ -32,120 +31,86 @@
#include <metalsvm/stddef.h> #include <metalsvm/stddef.h>
#include <metalsvm/stdlib.h> #include <metalsvm/stdlib.h>
/// Page offset bits #define _PAGE_BIT_PRESENT 0 /* is present */
#define PAGE_BITS 12 #define _PAGE_BIT_RW 1 /* writeable */
#define _PAGE_BIT_USER 2 /* userspace addressable */
#ifdef CONFIG_X86_32 #define _PAGE_BIT_PWT 3 /* page write through */
/// Number of page map indirections #define _PAGE_BIT_PCD 4 /* page cache disabled */
#define PAGE_MAP_LEVELS 2 #define _PAGE_BIT_ACCESSED 5 /* was accessed (raised by CPU) */
/// Page map bits #define _PAGE_BIT_DIRTY 6 /* was written to (raised by CPU) */
#define PAGE_MAP_BITS 10 #define _PAGE_BIT_PSE 7 /* 4 MB (or 2MB) page */
/// Total operand width in bits #define _PAGE_BIT_PAT 7 /* on 4KB pages */
#define BITS 32 #define _PAGE_BIT_GLOBAL 8 /* Global TLB entry PPro+ */
/// Linear/virtual address width #define _PAGE_BIT_SVM_STRONG 9 /* mark a virtual address range as used by the SVM system */
#define VIRT_BITS BITS #define _PAGE_BIT_SVM_LAZYRELEASE 10 /* mark a virtual address range as used by the SVM system */
/// Physical address width (we dont support PAE) #define _PAGE_BIT_SVM_INIT 11 /* mark if the MBP proxy is used */
#define PHYS_BITS BITS
#elif defined(CONFIG_X86_64)
/// Number of page map indirections
#define PAGE_MAP_LEVELS 4
/// Page map bits
#define PAGE_MAP_BITS 9
/// Total operand width in bits
#define BITS 64
/// Linear/virtual address width
#define VIRT_BITS 48
/// Physical address width (maximum value)
#define PHYS_BITS 52
#endif
/// The size of a single page in bytes
#define PAGE_SIZE ( 1L << PAGE_BITS)
/// The number of entries in a page map table
#define PAGE_MAP_ENTRIES ( 1L << PAGE_MAP_BITS)
/// Mask the page address
#define PAGE_MASK (-1L << PAGE_BITS)
/// Mask the entry in a page table
#define PAGE_ENTRY_MASK (-1L << (PAGE_BITS-PAGE_MAP_BITS))
/// Mask for all flag bits in a page map entry (including ignored bits)
#define PAGE_FLAGS_MASK (~(-1L << PAGE_BITS) | (-1L << VIRT_BITS))
/// Align to next page
#define PAGE_FLOOR(addr) (((addr) + PAGE_SIZE - 1) & PAGE_MASK)
/// Align to page
#define PAGE_CEIL(addr) ( (addr) & PAGE_MASK)
/// Sign extension to get a valid canonical address (hack: by using aritmethic shifts)
#define VIRT_SEXT(addr) ((ssize_t) addr << (BITS-VIRT_BITS) >> (BITS-VIRT_BITS))
// base addresses of page map tables
#ifdef CONFIG_X86_32
#define PAGE_MAP_PGD 0xFFFFF000
#define PAGE_MAP_PGT 0xFFC00000
#elif defined(CONFIG_X86_64)
#define PAGE_MAP_PML4 0xFFFFFFFFFFFFF000
#define PAGE_MAP_PDPT 0xFFFFFFFFFFE00000
#define PAGE_MAP_PGD 0xFFFFFFFFC0000000
#define PAGE_MAP_PGT 0xFFFFFF8000000000
#endif
/// Page is present /// Page is present
#define PG_PRESENT (1 << 0) #define PG_PRESENT (1 << _PAGE_BIT_PRESENT)
/// Page is read- and writable /// Page is read- and writable
#define PG_RW (1 << 1) #define PG_RW (1 << _PAGE_BIT_RW)
/// Page is addressable from userspace /// Page is addressable from userspace
#define PG_USER (1 << 2) #define PG_USER (1 << _PAGE_BIT_USER)
/// Page write through is activated /// Page write through is activated
#define PG_PWT (1 << 3) #define PG_PWT (1 << _PAGE_BIT_PWT)
/// Page cache is disabled /// Page cache is disabled
#define PG_PCD (1 << 4) #define PG_PCD (1 << _PAGE_BIT_PCD)
/// Page was recently accessed (set by CPU) /// Page was recently accessed (set by CPU)
#define PG_ACCESSED (1 << 5) #define PG_ACCESSED (1 << _PAGE_BIT_ACCESSED)
/// Page is dirty due to recentwrite-access (set by CPU) /// Page is dirty due to recentwrite-access (set by CPU)
#define PG_DIRTY (1 << 6) #define PG_DIRTY (1 << _PAGE_BIT_DIRTY)
/// Huge page: 4MB (or 2MB, 1GB) /// Big page: 4MB (or 2MB)
#define PG_PSE (1 << 7) #define PG_PSE (1 << _PAGE_BIT_PSE)
/// Page is part of the MPB (SCC specific entry) /// Page is part of the MPB (SCC specific entry)
#define PG_MPE PG_PSE #define PG_MPE PG_PSE
/// Page attribute table
#define PG_PAT PG_PSE
/// Global TLB entry (Pentium Pro and later) /// Global TLB entry (Pentium Pro and later)
#define PG_GLOBAL (1 << 8) #define PG_GLOBAL (1 << _PAGE_BIT_GLOBAL)
/// Pattern flag
#define PG_PAT (1 << _PAGE_BIT_PAT)
/// This virtual address range is used by SVM system as marked /// This virtual address range is used by SVM system as marked
#define PG_SVM (1 << 9) #define PG_SVM PG_SVM_STRONG
#define PG_SVM_STRONG PG_SVM #define PG_SVM_STRONG (1 << _PAGE_BIT_SVM_STRONG)
/// This virtual address range is used by SVM system as marked /// This virtual address range is used by SVM system as marked
#define PG_SVM_LAZYRELEASE (1 << 10) #define PG_SVM_LAZYRELEASE (1 << _PAGE_BIT_SVM_LAZYRELEASE)
/// Currently, no page frame is behind this page (only the MBP proxy) /// Currently, no page frame is behind this page (only the MBP proxy)
#define PG_SVM_INIT (1 << 11) #define PG_SVM_INIT (1 << _PAGE_BIT_SVM_INIT)
/// Disable execution for this page
#define PG_XD (1L << 63)
/// This is a whole set of flags (PRESENT,RW,ACCESSED,DIRTY) for kernelspace tables /// This is a whole set of flags (PRESENT,RW,ACCESSED,DIRTY) for kernelspace tables
#define PG_TABLE (PG_PRESENT|PG_RW|PG_XD) #define KERN_TABLE (PG_PRESENT|PG_RW|PG_ACCESSED|PG_DIRTY)
/// This is a whole set of flags (PRESENT,RW,ACCESSED,DIRTY,USER) for userspace tables
#define USER_TABLE (PG_PRESENT|PG_RW|PG_ACCESSED|PG_DIRTY|PG_USER)
/// This is a whole set of flags (PRESENT,RW,GLOBAL) for kernelspace pages /// This is a whole set of flags (PRESENT,RW,GLOBAL) for kernelspace pages
#define PG_PAGE (PG_PRESENT|PG_RW|PG_GLOBAL|PG_XD) #define KERN_PAGE (PG_PRESENT|PG_RW|PG_GLOBAL)
/// This is a whole set of flags (PRESENT,RW,USER) for userspace pages
#define USER_PAGE (PG_PRESENT|PG_RW|PG_USER)
/** @brief A single entry in a page map */ #if __SIZEOF_POINTER__ == 4
typedef size_t page_entry_t; #define PGT_ENTRIES 1024
#elif __SIZEOF_POINTER__ == 8
/** @brief General page map structure #define PGT_ENTRIES 512
#endif
/** @brief Page table structure
* *
* This page map structure is a general type for all indirecton levels. * This structure keeps page table entries.\n
* As all page map levels containing the same amount of entries. * On a 32bit system, a page table consists normally of 1024 entries.
* All page maps must be page aligned!
*/ */
typedef struct page_map { typedef struct page_table
page_entry_t entries[PAGE_MAP_ENTRIES]; {
} __attribute__ ((aligned (PAGE_SIZE))) page_map_t; /// Page table entries are unsigned 32bit integers.
size_t entries[PGT_ENTRIES];
} page_table_t __attribute__ ((aligned (4096)));
/** @brief A callback type for the page map iterator /** @brief Page directory structure
* *
* @param entry A pointer to the current page map entry * This structure keeps page directory entries.\
* @return * On a 32bit system, a page directory consists normally of 1024 entries.
* - 0 if we want to skip underlying page tables
* - >0 if want to recurse into underlying page tables
*/ */
typedef int (*page_cb_t)(page_entry_t* entry, int level); typedef struct page_dir
{
/// Page dir entries are unsigned 32bit integers.
size_t entries[PGT_ENTRIES];
} page_dir_t __attribute__ ((aligned (4096)));
/** @brief Converts a virtual address to a physical /** @brief Converts a virtual address to a physical
* *
@ -227,29 +192,29 @@ int arch_paging_init(void);
* *
* @return Returns the address of the boot task's page dir array. * @return Returns the address of the boot task's page dir array.
*/ */
page_map_t* get_boot_page_map(void); page_dir_t* get_boot_pgd(void);
/** @brief Setup a new page directory for a new user-level task /** @brief Setup a new page directory for a new user-level task
* *
* @param task Pointer to the task-specific task_t structure * @param task Pointer to the task-specific task_t structure
* @param copy If true: copy userspace pages and tables * @param copy If true: PGD will be a copy of the kernel's address space PGD
* *
* @return * @return
* - counter of allocated page tables * - counter of allocated page tables
* - -ENOMEM (-12) on failure * - -ENOMEM (-12) on failure
*/ */
int copy_page_map(struct task* task, int copy); int create_pgd(task_t* task, int copy);
/** @brief Delete all page map structures of the current task /** @brief Delete page directory and its page tables
* *
* Puts PML4, PDPT, PGD, PGT tables back to buffer and * Puts page tables and page directory back to buffer and
* sets the task's page map pointer to NULL * sets the task's page directory pointer to NULL
* *
* @return * @return
* - 0 on success * - 0 on success
* - -EINVAL (-22) on failure (in case PGD is still the boot-pgd). * - -EINVAL (-22) on failure (in case PGD is still the boot-pgd).
*/ */
int drop_page_map(void); int drop_pgd(void);
/** @brief Change the page permission in the page tables of the current task /** @brief Change the page permission in the page tables of the current task
* *
@ -266,13 +231,4 @@ int drop_page_map(void);
*/ */
int change_page_permissions(size_t start, size_t end, uint32_t flags); int change_page_permissions(size_t start, size_t end, uint32_t flags);
/** @brief Dump mapped memory */
void page_dump(size_t start, size_t end);
/** @brief Print stats about page flags
*
* @param reset Reset accessed and dirty bits in page tables
*/
void page_stats(size_t start, size_t end, int reset);
#endif #endif

View file

@ -1,193 +0,0 @@
/*
* Copyright 2013 Steffen Vogel, Chair for Operating Systems,
* RWTH Aachen University
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* This file is part of MetalSVM.
*/
/**
* @author Steffen Vogel
* @file arch/x86/include/pmc.h
* @brief Simple interface to IA32 Performance Monitor Counters
*
* This implementation is in parts specific for Intel Core 2 Duo Processors!
*/
#ifndef _ARCH_PMC_H_
#define _ARCH_PMC_H_
#include <stddef.h>
// PMC MSR addresses
#define MSR_PERF_GLOBAL_STATUS 0x38E // global counter control facilities
#define MSR_PERF_GLOBAL_CTRL 0x38F
#define MSR_PERF_GLOBAL_OVF_CTRL 0x390
#define IA32_PERF_CAPABILITIES 0x345
#define IA32_PERFEVTSEL(i) (0x186 + i) // general purpose PMC configuration register
#define IA32_PMC(i) (0x0C1 + i) // general purpose PMC counter register
#define IA32_A_PMC(i) (0x4C1 + i) // general purpose alias PMC counter register for full width writes
#define MSR_PERF_FIXED_CTR(i) (0x309 + i) // fixed function PMC counter register
#define MSR_PERF_FIXED_CTR_CTRL 0x38D // fixed functiion PMC configuration register
/* For Intel Core 2 Duo the MSR_PERF_FIXED_CTRs are mapped as followed:
* MSR_PERF_FIXED_CTR(0) => INST_RETIRED.ANY
* MSR_PERF_FIXED_CTR(1) => CPU_CLK_UNHALTED.CORE
* MSR_PERF_FIXED_CTR(2) => CPU_CLK_UNHALTED.REF */
// architecural flags for IA32_PERFEVTSEL
#define PMC_EVTSEL_CMASK 24 // counter mask [31:24]
#define PMC_EVTSEL_UMASK 8 // unit mask [15:8]
#define PMC_EVTSEL_INC (1 << 23) // invert counter mask
#define PMC_EVTSEL_EN (1 << 22) // enable counters
#define PMC_EVTSEL_ANY (1 << 21) // any thread (from version 3 on)
#define PMC_EVTSEL_INT (1 << 20) // APIC interrupt enable
#define PMC_EVTSEL_PC (1 << 19) // pin control
#define PMC_EVTSEL_E (1 << 18) // edge detect
#define PMC_EVTSEL_OS (1 << 17) // operating system mode
#define PMC_EVTSEL_USR (1 << 16) // user mode
// Core 2 Duo non-architecural flags for IA32_PERFEVTSEL (bus snooping)
#define PMC_EVTSEL_HITM (1 << 11) // HITM response
#define PMC_EVTSEL_HIT (1 << 9) // HIT response
#define PMV_EVTSEL_CLEAN (1 << 8) // CLEAN response
// architecutral PMC events CPUID.0AH.EBX[6:0]
#define PMC_EVT_UNHALTED_CORE_CLKS 0x003C // UnHalted Core Cycles
#define PMC_EVT_UNHALTED_REF_CLKS 0x013C // UnHalted Reference Cycles
#define PMC_EVT_INST_RET 0x00C0 // Instruction Retired
#define PMC_EVT_LLC_REF 0x4F2E // LLC Reference
#define PMC_EVT_LLC_MISS 0x412E // LLC Misses
#define PMC_EVT_BRANCH_RET 0x00C4 // Branch Instruction Retired
#define PMC_EVT_BRANCH_MISS_RET 0x00C5 // Branch Miss Retired
// Core 2 Duo non-architecural PMC events
#define PMC_EVT_DTLB_MISS_ANY 0x0108 // Memory accesses that missed the TLB
#define PMC_EVT_DTLB_MISS_LD 0x0208 // DTLB misses due to load operations
#define PMC_EVT_DTLB_MISS_L0_LD 0x0408 // Level 0: DTLB misses due to load operations
#define PMC_EVT_DTLB_MISS_ST 0x0808 // DTLB misses due to store operations
#define PMC_EVT_ITLB_FLUSH 0x4082 // ITLB flushes
#define PMC_EVT_ITLB_MISS 0x1282 // ITLB misses (either large or small page)
#define PMC_EVT_ITLB_MISS_RET 0x00C9 // Retired instructions that missed the ITLB
#define PMC_EVT_ITLB_MISS_SMALL 0x0282 // ITLB small page misses
#define PMC_EVT_ITLB_MISS_LARGE 0x1082 // ITLB large page misses
#define PMC_EVT_PAGE_WALK_COUNT 0x010C // Number of page-walks executed
#define PMC_EVT_PAGE_WALK_CLKS 0x020C // Duration of page-walks in core cycles
struct pmc {
uint8_t id;
void (*start)();
void (*stop)();
void (*reset)();
void (*write)(uint64_t val);
uint64_t (*read)();
};
struct pmc_caps {
/// Architecural PM version (CPUID.0AH:EAX[7:0])
uint8_t version;
/// Number of available General Purpose PMCs (CPUID.0AH:EAX[15:8])
uint8_t gp_count;
/// Number of available Fixed Function PMCs (CPUID.0AH.EDX[4:0])
uint8_t ff_count;
/// Counter bit width of General Purpose PMCs (CPUID.0AH:EAX[23:16])
uint8_t gp_width;
/// Counter bit width of Fixed Function PMCs (CPUID.0AH.EDX[12:5])
uint8_t ff_width;
/// Bit mask of supported architecural PMC events (CPUID.0AH.EBX[6:0])
uint32_t arch_events;
/// IA32_PERF_CAPABILITIES MSR
uint64_t msr;
};
/** @brief Queries the CPU about available Performance Monitoring capabilities
*
* @return A pointer to the capabilities struct
**/
struct pmc_caps* pmc_init();
/** @brief Setups and stops the general purpose PMCs
*
* @param i The counter number to configure (positive for gp PMCs, negative for ff PMCs)
* @param event A combined event number including the unit mask (PMC_EVT_*)
* @param flags Flags for the IA32_PERFEVTSEL registers (PMC_EVTSEL_*)
* @param umask A seperate Unitmask ORed with event
* @param cmask A optional counter mask value
* @return
* - 0 on success
* - else failure (invalid counter or flags)
*/
int pmc_config(uint8_t i, uint16_t event, uint32_t flags, uint8_t umask, uint8_t cmask);
/** @brief Start a single general purpose PMC
*
* @param i The counter number
* @return
* - 0 on success
* - -EINVAL on invalid counter number
*/
inline int pmc_start(uint8_t i);
/** @brief Stop a single general purpose PMC
*
* @param i The counter number
* @return
* - 0 on success
* - -EINVAL on invalid counter number
*/
inline int pmc_stop(uint8_t i);
/** @brief Start all PMCs at the same time
*
* @param i The counter number
* @return
* - 0 on success
* - -EINVAL on invalid counter number
*/
inline int pmc_start_all();
/** @brief Stop all PMCs at the same time
*
* @param i The counter number
* @return
* - 0 on success
* - -EINVAL on invalid counter number
*/
inline int pmc_stop_all();
/** @brief Read a single general purpose PMC
*
* @param i The counter number
* @return The counter value (see struct pmc_caps.gp_width)
*/
inline uint64_t pmc_read(uint8_t i);
/** @brief Write a single general purpose PMC value
*
* Not all architectures support full width writes to the PMCs.
* If bit 13 (FW_WRITE) in struct pmc_caps.msr is not set the PMC
* is updated with the 32 bit sign extended version of val!
*
* @param i The counter number
* @param val The counter value (see struct pmc_caps.gp_width)
*/
inline int pmc_write(uint8_t i, uint64_t val);
#endif

View file

@ -39,124 +39,22 @@
extern "C" { extern "C" {
#endif #endif
// CPUID.01H.EDX feature list // feature list 1
#define CPU_FEATURE_FPU (1 << 0) #define CPU_FEATURE_FPU (1 << 0)
#define CPU_FEATUE_PSE (1 << 3)
#define CPU_FEATURE_MSR (1 << 5) #define CPU_FEATURE_MSR (1 << 5)
#define CPU_FEATURE_PAE (1 << 6)
#define CPU_FEATURE_APIC (1 << 9) #define CPU_FEATURE_APIC (1 << 9)
#define CPU_FEATURE_PGE (1 << 13)
#define CPU_FEATURE_PAT (1 << 16)
#define CPU_FEATURE_PSE36 (1 << 17)
#define CPU_FEATURE_MMX (1 << 23) #define CPU_FEATURE_MMX (1 << 23)
#define CPU_FEATURE_FXSR (1 << 24) #define CPU_FEATURE_FXSR (1 << 24)
#define CPU_FEATURE_SSE (1 << 25) #define CPU_FEATURE_SSE (1 << 25)
#define CPU_FEATURE_SSE2 (1 << 26) #define CPU_FEATURE_SSE2 (1 << 26)
// CPUID.01H.ECX feature list // feature list 2
#define CPU_FEATURE_X2APIC (1 << 21) #define CPU_FEATURE_X2APIC (1 << 21)
#define CPU_FEATURE_AVX (1 << 28) #define CPU_FEATURE_AVX (1 << 28)
#define CPU_FEATURE_HYPERVISOR (1 << 31) #define CPU_FEATURE_HYPERVISOR (1 << 31)
// CPUID.80000001H:EDX feature list
#define CPU_FEATURE_NX (1 << 20)
#define CPU_FEATURE_1GBHP (1 << 26)
#define CPU_FEATURE_LM (1 << 29)
// x86 control registers
/// Protected Mode Enable
#define CR0_PE (1 << 0)
/// Monitor coprocessor
#define CR0_MP (1 << 1)
/// Enable FPU emulation
#define CR0_EM (1 << 2)
/// Task switched
#define CR0_TS (1 << 3)
/// Extension type of coprocessor
#define CR0_ET (1 << 4)
/// Enable FPU error reporting
#define CR0_NE (1 << 5)
/// Enable write protected pages
#define CR0_WP (1 << 16)
/// Enable alignment checks
#define CR0_AM (1 << 18)
/// Globally enables/disable write-back caching
#define CR0_NW (1 << 29)
/// Globally disable memory caching
#define CR0_CD (1 << 30)
/// Enable paging
#define CR0_PG (1 << 31)
/// Virtual 8086 Mode Extensions
#define CR4_VME (1 << 0)
/// Protected-mode Virtual Interrupts
#define CR4_PVI (1 << 1)
/// Disable Time Stamp Counter register (rdtsc instruction)
#define CR4_TSD (1 << 2)
/// Enable debug extensions
#define CR4_DE (1 << 3)
/// Enable hugepage support
#define CR4_PSE (1 << 4)
/// Enable physical address extension
#define CR4_PAE (1 << 5)
/// Enable machine check exceptions
#define CR4_MCE (1 << 6)
/// Enable global pages
#define CR4_PGE (1 << 7)
/// Enable Performance-Monitoring Counter
#define CR4_PCE (1 << 8)
/// Enable Operating system support for FXSAVE and FXRSTOR instructions
#define CR4_OSFXSR (1 << 9)
/// Enable Operating System Support for Unmasked SIMD Floating-Point Exceptions
#define CR4_OSXMMEXCPT (1 << 10)
/// Enable Virtual Machine Extensions, see Intel VT-x
#define CR4_VMXE (1 << 13)
/// Enable Safer Mode Extensions, see Trusted Execution Technology (TXT)
#define CR4_SMXE (1 << 14)
/// Enables the instructions RDFSBASE, RDGSBASE, WRFSBASE, and WRGSBASE
#define CR4_FSGSBASE (1 << 16)
/// Enables process-context identifiers
#define CR4_PCIDE (1 << 17)
/// Enable XSAVE and Processor Extended States
#define CR4_OSXSAVE (1 << 18)
/// Enable Supervisor Mode Execution Protection
#define CR4_SMEP (1 << 20)
/// Enable Supervisor Mode Access Protection
#define CR4_SMAP (1 << 21)
// x86-64 specific MSRs
/// extended feature register
#define MSR_EFER 0xc0000080
/// legacy mode SYSCALL target
#define MSR_STAR 0xc0000081
/// long mode SYSCALL target
#define MSR_LSTAR 0xc0000082
/// compat mode SYSCALL target
#define MSR_CSTAR 0xc0000083
/// EFLAGS mask for syscall
#define MSR_SYSCALL_MASK 0xc0000084
/// 64bit FS base
#define MSR_FS_BASE 0xc0000100
/// 64bit GS base
#define MSR_GS_BASE 0xc0000101
/// SwapGS GS shadow
#define MSR_KERNEL_GS_BASE 0xc0000102
// MSR EFER bits
#define EFER_SCE (1 << 0)
#define EFER_LME (1 << 8)
#define EFER_LMA (1 << 10)
#define EFER_NXE (1 << 11)
#define EFER_SVME (1 << 12)
#define EFER_LMSLE (1 << 13)
#define EFER_FFXSR (1 << 14)
#define EFER_TCE (1 << 15)
typedef struct { typedef struct {
uint32_t feature1, feature2, feature3; uint32_t feature1, feature2;
uint32_t addr_width;
} cpu_info_t; } cpu_info_t;
extern cpu_info_t cpu_info; extern cpu_info_t cpu_info;
@ -209,16 +107,6 @@ inline static uint32_t on_hypervisor(void)
return (cpu_info.feature2 & CPU_FEATURE_HYPERVISOR); return (cpu_info.feature2 & CPU_FEATURE_HYPERVISOR);
} }
inline static uint32_t has_pge(void)
{
return (cpu_info.feature1 & CPU_FEATURE_PGE);
}
inline static uint32_t has_nx(void)
{
return (cpu_info.feature3 & CPU_FEATURE_NX);
}
/** @brief Read out time stamp counter /** @brief Read out time stamp counter
* *
* The rdtsc asm command puts a 64 bit time stamp value * The rdtsc asm command puts a 64 bit time stamp value
@ -385,7 +273,7 @@ int ipi_tlb_flush(void);
/** @brief Flush a specific page entry in TLB /** @brief Flush a specific page entry in TLB
* @param addr The (virtual) address of the page to flush * @param addr The (virtual) address of the page to flush
*/ */
static inline void tlb_flush_one_page(size_t addr) static inline void tlb_flush_one_page(uint32_t addr)
{ {
asm volatile("invlpg (%0)" : : "r"(addr) : "memory"); asm volatile("invlpg (%0)" : : "r"(addr) : "memory");
#if MAX_CORES > 1 #if MAX_CORES > 1
@ -394,7 +282,7 @@ static inline void tlb_flush_one_page(size_t addr)
* => User-level applications run only on one * => User-level applications run only on one
* and we didn't flush the TLB of the other cores * and we didn't flush the TLB of the other cores
*/ */
if (addr < KERNEL_SPACE) if (addr <= KERNEL_SPACE)
ipi_tlb_flush(); ipi_tlb_flush();
#endif #endif
} }
@ -405,7 +293,7 @@ static inline void tlb_flush_one_page(size_t addr)
*/ */
static inline void tlb_flush(void) static inline void tlb_flush(void)
{ {
size_t val = read_cr3(); uint32_t val = read_cr3();
if (val) if (val)
write_cr3(val); write_cr3(val);

View file

@ -26,6 +26,13 @@
extern "C" { extern "C" {
#endif #endif
/** @brief Copy a physical page to another physical destination
*
* @param dest Destination address
* @param src Source address
*/
void copy_page_physical(void* dest, const void * src);
#ifdef HAVE_ARCH_MEMCPY #ifdef HAVE_ARCH_MEMCPY
#ifdef CONFIG_ROCKCREEK #ifdef CONFIG_ROCKCREEK

View file

@ -97,19 +97,17 @@ static inline int register_task(void)
* *
* @return 0 in any case * @return 0 in any case
*/ */
static inline int jump_to_user_code(size_t ep, size_t stack) static inline int jump_to_user_code(uint32_t ep, uint32_t stack)
{ {
asm volatile ("push $0x23; push %0; push $0x1B; push %1" :: "r"(stack), "r"(ep)); // fake stack, see Intel Reference Manual, Vol 1, 6.3.6
#ifdef CONFIG_X86_32 #ifdef CONFIG_X86_32
asm volatile ("mov %0, %%ds; mov %0, %%fs; mov %0, %%gs; mov %0, %%es" :: "r"(0x23)); // update segment registers asm volatile ("mov %0, %%ds; mov %0, %%fs; mov %0, %%gs; mov %0, %%es" :: "r"(0x23));
asm volatile ("lret" ::: "cc"); // far return to user level code asm volatile ("push $0x23; push %0; push $0x1B; push %1" :: "r"(stack), "r"(ep));
#elif defined (CONFIG_X86_64) asm volatile ("lret" ::: "cc");
asm volatile ("lretq" ::: "cc"); // far return to user level code
#endif
return 0; return 0;
#else
return -22;
#endif
} }
#ifdef __cplusplus #ifdef __cplusplus

View file

@ -1,74 +0,0 @@
/*
* Copyright 2010 Steffen Vogel, Chair for Operating Systems,
* RWTH Aachen University
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* This file is part of MetalSVM.
*/
/**
* @author Steffen Vogel
* @file arch/x86/include/asm/uart.h
* @brief COM port related code
*/
#ifndef __ARCH_UART_H__
#define __ARCH_UART_H__
#include <metalsvm/stddef.h>
#ifdef __cplusplus
extern "C" {
#endif
#ifdef CONFIG_UART
/** @brief Initialize VGA output and clear the screen */
void uart_init(void);
/** @brief Send a single character to the uart
*
* @return The original input character casted to int
*/
void uart_putchar(char c);
/** @brief Receive a single character on the uart
*
* @return The original input character casted to int
*/
char uart_getchar(void);
/** @brief Simple string output on screen.
*
* If you want a new line you will have to "\\n".
*
* @return Length of output in bytes
*/
void uart_puts(const char *str);
/** @brief Simple string output on screen.
*
* If you want a new line you will have to "\\n".
*
* @return Length of output in bytes
*/
int uart_gets(char *str, size_t len);
#endif
#ifdef __cplusplus
}
#endif
#endif

View file

@ -1,4 +1,4 @@
C_source := gdt.c kb.c timer.c irq.c isrs.c idt.c vga.c uart.c multiboot.c apic.c pci.c processor.c pmc.c C_source := gdt.c kb.c timer.c irq.c isrs.c idt.c vga.c multiboot.c apic.c pci.c processor.c
ASM_source := entry$(BIT).asm string$(BIT).asm ASM_source := entry$(BIT).asm string$(BIT).asm
MODULE := arch_x86_kernel MODULE := arch_x86_kernel

View file

@ -27,7 +27,7 @@
#include <metalsvm/init.h> #include <metalsvm/init.h>
#include <metalsvm/page.h> #include <metalsvm/page.h>
#include <metalsvm/spinlock.h> #include <metalsvm/spinlock.h>
#include <metalsvm/memory.h> #include <metalsvm/mmu.h>
#include <metalsvm/tasks.h> #include <metalsvm/tasks.h>
#include <asm/irq.h> #include <asm/irq.h>
#include <asm/idt.h> #include <asm/idt.h>
@ -387,14 +387,12 @@ void smp_start(uint32_t id)
kprintf("Application processor %d is entering its idle task\n", apic_cpu_id()); kprintf("Application processor %d is entering its idle task\n", apic_cpu_id());
#ifdef CONFIG_X86_32 // initialize default cpu features
// initialization for x86_64 is done in smp_entry()
cpu_init(); cpu_init();
#endif
// use the same gdt like the boot processors // use the same gdt like the boot processors
gdt_flush(); gdt_flush();
// install IDT // install IDT
idt_install(); idt_install();
@ -724,8 +722,8 @@ static int apic_probe(void)
} }
} }
#endif #endif
found_mp:
#endif #endif
found_mp:
if (!apic_mp) if (!apic_mp)
goto no_mp; goto no_mp;

View file

@ -29,6 +29,7 @@
SECTION .mboot SECTION .mboot
global start global start
start: start:
mov byte [msg], 'H'
jmp stublet jmp stublet
; This part MUST be 4byte aligned, so we solve that issue using 'ALIGN 4' ; This part MUST be 4byte aligned, so we solve that issue using 'ALIGN 4'
@ -37,10 +38,10 @@ mboot:
; Multiboot macros to make a few lines more readable later ; Multiboot macros to make a few lines more readable later
MULTIBOOT_PAGE_ALIGN equ 1<<0 MULTIBOOT_PAGE_ALIGN equ 1<<0
MULTIBOOT_MEMORY_INFO equ 1<<1 MULTIBOOT_MEMORY_INFO equ 1<<1
; MULTIBOOT_AOUT_KLUDGE equ 1<<16 ; MULTIBOOT_AOUT_KLUDGE equ 1<<16
MULTIBOOT_HEADER_MAGIC equ 0x1BADB002 MULTIBOOT_HEADER_MAGIC equ 0x1BADB002
MULTIBOOT_HEADER_FLAGS equ MULTIBOOT_PAGE_ALIGN | MULTIBOOT_MEMORY_INFO ; | MULTIBOOT_AOUT_KLUDGE MULTIBOOT_HEADER_FLAGS equ MULTIBOOT_PAGE_ALIGN | MULTIBOOT_MEMORY_INFO ; | MULTIBOOT_AOUT_KLUDGE
MULTIBOOT_CHECKSUM equ -(MULTIBOOT_HEADER_MAGIC + MULTIBOOT_HEADER_FLAGS) MULTIBOOT_CHECKSUM equ -(MULTIBOOT_HEADER_MAGIC + MULTIBOOT_HEADER_FLAGS)
EXTERN code, bss, end EXTERN code, bss, end
; This is the GRUB Multiboot header. A boot signature ; This is the GRUB Multiboot header. A boot signature
@ -48,6 +49,8 @@ mboot:
dd MULTIBOOT_HEADER_FLAGS dd MULTIBOOT_HEADER_FLAGS
dd MULTIBOOT_CHECKSUM dd MULTIBOOT_CHECKSUM
msg db "?ello from MetalSVM kernel!!", 0
SECTION .text SECTION .text
ALIGN 4 ALIGN 4
stublet: stublet:
@ -67,7 +70,7 @@ stublet:
; jump to the boot processors's C code ; jump to the boot processors's C code
extern main extern main
call main call main
jmp $ ; infinitive loop jmp $
global cpu_init global cpu_init
cpu_init: cpu_init:
@ -109,7 +112,7 @@ global read_ip
read_ip: read_ip:
mov eax, [esp+4] mov eax, [esp+4]
pop DWORD [eax] ; Get the return address pop DWORD [eax] ; Get the return address
add esp, 4 ; Dirty Hack! read_ip cleanup the stack add esp, 4 ; Dirty Hack! read_ip cleanup the stacl
jmp [eax] ; Return. Can't use RET because return jmp [eax] ; Return. Can't use RET because return
; address popped off the stack. ; address popped off the stack.

View file

@ -30,7 +30,7 @@ extern kernel_end
extern apic_mp extern apic_mp
; We use a special name to map this section at the begin of our kernel ; We use a special name to map this section at the begin of our kernel
; => Multiboot needs its magic number at the beginning of the kernel ; => Multiboot needs its magic number at the begin of the kernel
SECTION .mboot SECTION .mboot
global start global start
start: start:
@ -42,19 +42,19 @@ mboot:
; Multiboot macros to make a few lines more readable later ; Multiboot macros to make a few lines more readable later
MULTIBOOT_PAGE_ALIGN equ 1<<0 MULTIBOOT_PAGE_ALIGN equ 1<<0
MULTIBOOT_MEMORY_INFO equ 1<<1 MULTIBOOT_MEMORY_INFO equ 1<<1
; MULTIBOOT_AOUT_KLUDGE equ 1<<16 ; MULTIBOOT_AOUT_KLUDGE equ 1<<16
MULTIBOOT_HEADER_MAGIC equ 0x1BADB002 MULTIBOOT_HEADER_MAGIC equ 0x1BADB002
MULTIBOOT_HEADER_FLAGS equ MULTIBOOT_PAGE_ALIGN | MULTIBOOT_MEMORY_INFO ; | MULTIBOOT_AOUT_KLUDGE MULTIBOOT_HEADER_FLAGS equ MULTIBOOT_PAGE_ALIGN | MULTIBOOT_MEMORY_INFO ; | MULTIBOOT_AOUT_KLUDGE
MULTIBOOT_CHECKSUM equ -(MULTIBOOT_HEADER_MAGIC + MULTIBOOT_HEADER_FLAGS) MULTIBOOT_CHECKSUM equ -(MULTIBOOT_HEADER_MAGIC + MULTIBOOT_HEADER_FLAGS)
EXTERN code, bss, end EXTERN code, bss, end
; This is the GRUB Multiboot header. A boot signature ; This is the GRUB Multiboot header. A boot signature
dd MULTIBOOT_HEADER_MAGIC dd MULTIBOOT_HEADER_MAGIC
dd MULTIBOOT_HEADER_FLAGS dd MULTIBOOT_HEADER_FLAGS
dd MULTIBOOT_CHECKSUM dd MULTIBOOT_CHECKSUM
ALIGN 4 ALIGN 4
; we need already a valid GDT to switch in the 64bit mode ; we need already a valid GDT to switch in the 64bit modus
GDT64: ; Global Descriptor Table (64-bit). GDT64: ; Global Descriptor Table (64-bit).
.Null: equ $ - GDT64 ; The null descriptor. .Null: equ $ - GDT64 ; The null descriptor.
dw 0 ; Limit (low). dw 0 ; Limit (low).
@ -81,90 +81,112 @@ GDT64: ; Global Descriptor Table (64-bit).
dw $ - GDT64 - 1 ; Limit. dw $ - GDT64 - 1 ; Limit.
dq GDT64 ; Base. dq GDT64 ; Base.
times 256 DD 0 ; Stack for booting times 256 DD 0
startup_stack: startup_stack:
SECTION .data SECTION .data
; Create default page tables for the 64bit kernel ; create default page tables for the 64bit kernel
global boot_pml4 global boot_pgd ; aka PML4
ALIGN 4096 ; of course, the page tables have to be page aligned ALIGN 4096 ; of course, the page tables have to be page aligned
NOPTS equ 512
PAGE_MAP_ENTRIES equ (1<<9) boot_pgd times 512 DQ 0
PAGE_SIZE equ (1<<12) boot_pdpt times 512 DQ 0
boot_pd times 512 DQ 0
boot_pml4 times PAGE_MAP_ENTRIES DQ 0 boot_pt times (NOPTS*512) DQ 0
boot_pdpt times PAGE_MAP_ENTRIES DQ 0
boot_pgd times PAGE_MAP_ENTRIES DQ 0
boot_pgt times (KERNEL_SPACE/PAGE_SIZE) DQ 0
SECTION .text SECTION .text
ALIGN 8 ALIGN 8
%if MAX_CORES > 1 %if MAX_CORES > 1
global smp_entry global smp_entry
smp_entry: smp_entry:
; Initialize cpu features ; enable caching, disable paging and fpu emulation
call cpu_init and eax, 0x1ffffffb
; Initialize cr3 register ; ...and turn on FPU exceptions
mov edi, boot_pml4 or eax, 0x22
mov cr0, eax
; clears the current pgd entry
xor eax, eax
mov cr3, eax
; at this stage, we disable the SSE support
mov eax, cr4
and eax, 0xfffbf9ff
mov cr4, eax
; initialize page table
mov edi, boot_pgd
mov cr3, edi mov cr3, edi
; Enable PAE ; we need to enable PAE modus
mov eax, cr4 mov eax, cr4
or eax, 1 << 5 or eax, 1 << 5
mov cr4, eax mov cr4, eax
; Enable longmode (compatibility mode) ; switch to the compatibility mode (which is part of long mode)
mov ecx, 0xC0000080 mov ecx, 0xC0000080
rdmsr rdmsr
or eax, 1 << 8 or eax, 1 << 8
wrmsr wrmsr
; Enable paging ; enable paging
mov eax, cr0 mov eax, cr0
or eax, 1 << 31 | 1 << 0 ; Set the PG-bit, which is the 31nd bit, and the PE-bit, which is the 0th bit. or eax, 1 << 31 | 1 << 0 ; Set the PG-bit, which is the 31nd bit, and the PM-bit, which is the 0th bit.
mov cr0, eax ; According to the multiboot spec the PE-bit has to be set by bootloader already! mov cr0, eax
; Jump to 64-bit longmode mov edi, [esp+4] ; set argumet for smp_start
mov edi, [esp+4] ; Set argumet for smp_start
lgdt [GDT64.Pointer] ; Load the 64-bit global descriptor table. lgdt [GDT64.Pointer] ; Load the 64-bit global descriptor table.
jmp GDT64.Code:smp_start64 ; Set the code segment and enter 64-bit long mode. jmp GDT64.Code:smp_start64 ; Set the code segment and enter 64-bit long mode.
jmp $ ; endless loop jmp $ ; endless loop
%endif %endif
; Search MP Floating Pointer Structure search_apic:
search_mps:
push ebp push ebp
mov ebp, esp mov ebp, esp
push ecx push ecx
xor eax, eax xor eax, eax
mov ecx, [ebp+8] mov ecx, [ebp+8]
.l1: L1:
cmp [ecx], DWORD 0x5f504d5f ; MP_FLT_SIGNATURE cmp [ecx], DWORD 0x5f504d5f ; MP_FLT_SIGNATURE
jne .l2 jne L2
mov al, BYTE [ecx+9] mov al, BYTE [ecx+9]
cmp eax, 4 cmp eax, 4
ja .l2 ja L2
mov al, BYTE [ecx+11] mov al, BYTE [ecx+11]
cmp eax, 0 cmp eax, 0
jne .l2 jne L2
mov eax, ecx mov eax, ecx
jmp .l3 jmp L3
.l2: L2:
add ecx, 4 add ecx, 4
cmp ecx, [ebp+12] cmp ecx, [ebp+12]
jb .l1 jb L1
xor eax, eax xor eax, eax
.l3: L3:
pop ecx pop ecx
pop ebp pop ebp
ret ret
check_longmode: ALIGN 4
; Check for cpuid instruction stublet:
mov esp, startup_stack-4
push ebx ; save pointer to the multiboot structure
mov eax, cr0
; enable caching, disable paging and fpu emulation
and eax, 0x1ffffffb
; ...and turn on FPU exceptions
or eax, 0x22
mov cr0, eax
; clears the current pgd entry
xor eax, eax
mov cr3, eax
; at this stage, we disable the SSE support
mov eax, cr4
and eax, 0xfffbf9ff
mov cr4, eax
; do we have the instruction cpuid?
pushfd pushfd
pop eax pop eax
mov ecx, eax mov ecx, eax
@ -176,22 +198,59 @@ check_longmode:
push ecx push ecx
popfd popfd
xor eax, ecx xor eax, ecx
jz .unsupported jz Linvalid
; Check for extended cpu features (cpuid > 0x80000000) ; cpuid > 0x80000000?
mov eax, 0x80000000 mov eax, 0x80000000
cpuid cpuid
cmp eax, 0x80000001 cmp eax, 0x80000001
jb .unsupported ; It is less, there is no long mode. jb Linvalid ; It is less, there is no long mode.
; Check if longmode is supported ; do we have a long mode?
mov eax, 0x80000001 mov eax, 0x80000001
cpuid cpuid
test edx, 1 << 29 ; Test if the LM-bit, which is bit 29, is set in the D-register. test edx, 1 << 29 ; Test if the LM-bit, which is bit 29, is set in the D-register.
jz .unsupported ; They aren't, there is no long mode. jz Linvalid ; They aren't, there is no long mode.
ret
.unsupported:
jmp $
check_lapic: ; initialize page table
mov edi, boot_pgd
mov cr3, edi
; So lets make PML4T[0] point to the PDPT and so on:
mov DWORD [edi], boot_pdpt ; Set the double word at the destination index to pdpt.
or DWORD [edi], 0x00000003 ; Set present and writeable bit
mov edi, boot_pdpt
mov DWORD [edi], boot_pd ; Set the double word at the destination index to pd.
or DWORD [edi], 0x00000003 ; Set present and writeable bit
mov edi, boot_pd
mov ebx, boot_pt
mov ecx, NOPTS
L0:
mov DWORD [edi], ebx ; Set the double word at the destination index to pt.
or DWORD [edi], 0x00000003 ; Set present and writeable bit
add edi, 8
add ebx, 0x1000
loop L0
%ifdef CONFIG_VGA
; map the VGA address into the virtual address space
mov edi, 0xB8000
shr edi, 9 ; (edi >> 12) * 8
add edi, boot_pt
mov ebx, 0xB8000
or ebx, 0x00000013
mov DWORD [edi], ebx
%endif
; map multiboot structure into the virtual address space
mov edi, [esp]
and edi, 0xFFFFF000
shr edi, 9 ; (edi >> 12) * 8
add edi, boot_pt
mov ebx, [esp]
and ebx, 0xFFFFF000
or ebx, 0x00000003
mov DWORD [edi], ebx
; check if lapic is available
push eax push eax
push ebx push ebx
push ecx push ecx
@ -200,186 +259,123 @@ check_lapic:
cpuid cpuid
and edx, 0x200 and edx, 0x200
cmp edx, 0 cmp edx, 0
je .unsupported je no_lapic
; Map lapic at 0xFEE00000 below the kernel ; map lapic at 0xFEE00000 below the kernel
mov edi, kernel_start - 0x1000 mov edi, kernel_start - 0x1000
shr edi, 9 ; (edi >> 12) * 8 shr edi, 9 ; (edi >> 12) * 8
add edi, boot_pgt add edi, boot_pt
mov ebx, 0xFEE00000 ; LAPIC base address mov ebx, 0xFEE00000
or ebx, 0x00000013 or ebx, 0x00000013
mov DWORD [edi], ebx mov DWORD [edi], ebx
.unsupported: no_lapic:
pop edx pop edx
pop ecx pop ecx
pop ebx pop ebx
pop eax pop eax
ret
cpu_init: ; search APIC
mov eax, cr0
; Enable caching, disable paging and fpu emulation
and eax, 0x1ffffffb
; ...and turn on FPU exceptions
or eax, 0x22
mov cr0, eax
; Clear the current pgd entry
xor eax, eax
mov cr3, eax
; at this stage, we disable the SSE support
mov eax, cr4
and eax, 0xfffbf9ff
mov cr4, eax
ret
; Identity map a single page at address eax
identity_page:
push edi
push ebx
mov edi, eax
and edi, 0xFFFFF000 ; page align in lower half
shr edi, 9 ; (edi >> 12) * 8 (index for boot_pgt)
add edi, boot_pgt
mov ebx, eax
and ebx, 0xFFFFF000 ; page align lower half
or ebx, 0x113 ; set present, global, writable and cache disable bits
mov DWORD [edi], ebx
mov DWORD [edi+4], 0x80000000 ; set execution disable bit in higher half
pop ebx
pop edi
ret
ALIGN 4
stublet:
mov esp, startup_stack-4
; Save pointer to the Multiboot structure
push ebx
; Initialize cpu features
call cpu_init
; Check if longmode is supported
call check_longmode
; Check if lapic is available
call check_lapic
; Find MP Floating Pointer structure
push DWORD 0x100000 push DWORD 0x100000
push DWORD 0xF0000 push DWORD 0xF0000
call search_mps call search_apic
add esp, 8 add esp, 8
cmp eax, 0 cmp eax, 0
jne map_mps jne La
push DWORD 0xA0000 push DWORD 0xA0000
push DWORD 0x9F000 push DWORD 0x9F000
call search_mps call search_apic
add esp, 8 add esp, 8
cmp eax, 0 cmp eax, 0
je map_kernel je Lb
map_mps: La:
; Map MP Floating Pointer structure ; map MP Floating Pointer Structure
mov DWORD [apic_mp], eax mov DWORD [apic_mp], eax
call identity_page mov edi, eax
and edi, 0xFFFFF000
shr edi, 9 ; (edi >> 12) * 8
add edi, boot_pt
mov ebx, eax
and ebx, 0xFFFFF000
or ebx, 0x00000013
mov DWORD [edi], ebx
; Map MP Configuration table ; map mp_config
mov eax, [eax+4] ; Offset for physical address of MP table mov edi, [eax+4]
call identity_page and edi, 0xFFFFF000
shr edi, 9 ; (edi >> 12) * 8
add edi, boot_pt
mov ebx, [eax+4]
and ebx, 0xFFFFF000
or ebx, 0x00000013
mov DWORD [edi], ebx
%ifdef CONFIG_VGA Lb:
; Map VGA textmode plane
mov eax, 0xB8000
call identity_page
%endif
; Map Multiboot structure
mov eax, [esp] ; Pointer is still on the stack
call identity_page
map_kernel:
mov edi, kernel_start mov edi, kernel_start
shr edi, 9 ; (edi >> 12) * 8 (index for boot_pgt) shr edi, 9 ; (kernel_start >> 12) * 8
add edi, boot_pgt add edi, boot_pt
mov ebx, kernel_start mov ebx, kernel_start
or ebx, 0x103 ; set present, global and writable flags or ebx, 0x00000003
mov ecx, kernel_end ; determine kernel size in number of pages mov ecx, kernel_end ; determine kernel size in number of pages
sub ecx, kernel_start sub ecx, kernel_start
shr ecx, 12 shr ecx, 12
inc ecx inc ecx
.l1:
mov DWORD [edi], ebx Lc:
mov DWORD [edi], ebx ; Set the double word at the destination index to the B-register.
add edi, 8 add edi, 8
add ebx, 0x1000 add ebx, 0x1000
loop .l1 loop Lc
init_paging: ; we need to enable PAE modus
mov edi, boot_pml4
mov cr3, edi
mov DWORD [edi], boot_pdpt
or DWORD [edi], 0x03 ; Set present and writable flags
mov edi, boot_pdpt
mov DWORD [edi], boot_pgd
or DWORD [edi], 0x03 ; Set present and writable flags
mov edi, boot_pgd
mov ebx, boot_pgt
mov ecx, PAGE_MAP_ENTRIES ; Map all boot_pgt to the kernel space
.l1:
mov DWORD [edi], ebx
or DWORD [edi], 0x03 ; Set present and writable flags
add edi, 8
add ebx, 0x1000
loop .l1
; Enable PAE
mov eax, cr4 mov eax, cr4
or eax, 1 << 5 or eax, 1 << 5
mov cr4, eax mov cr4, eax
; Enable longmode (compatibility mode) ; switch to the compatibility mode (which is part of long mode)
mov ecx, 0xC0000080 mov ecx, 0xC0000080
rdmsr rdmsr
or eax, (1 << 8) | (1 << 11) ; IA32_EFER.LME = 1, IA32_EFER.NXE = 1 or eax, 1 << 8
wrmsr wrmsr
; Enable paging ; enable paging
mov eax, cr0 mov eax, cr0
or eax, (1 << 31) | (1 << 0) ; Set the PG-bit, which is the 31nd bit, and the PE-bit, which is the 0th bit. or eax, 1 << 31 | 1 << 0 ; Set the PG-bit, which is the 31nd bit, and the PM-bit, which is the 0th bit.
mov cr0, eax mov cr0, eax
; Jump to 64-bit longmode pop ebx ; restore pointer to multiboot structure
pop ebx ; Restore pointer to multiboot structure lgdt [GDT64.Pointer] ; Load the 64-bit global descriptor table.
lgdt [GDT64.Pointer] ; Load the 64-bit global descriptor table. jmp GDT64.Code:start64 ; Set the code segment and enter 64-bit long mode.
jmp GDT64.Code:start64 ; Set the code segment and enter 64-bit long mode.
Linvalid:
jmp $
[BITS 64] [BITS 64]
start64: start64:
; Initialize segment registers ; initialize segment registers
mov ax, GDT64.Data mov ax, GDT64.Data
mov ds, ax mov ds, ax
mov es, ax mov es, ax
mov fs, ax mov fs, ax
mov gs, ax mov gs, ax
mov ss, ax mov ss, ax
; Set default stack pointer ; set default stack pointer
mov rsp, boot_stack mov rsp, boot_stack
add rsp, KERNEL_STACK_SIZE-16 add rsp, KERNEL_STACK_SIZE-16
; Interpret multiboot information ; interpret multiboot information
extern multiboot_init extern multiboot_init
mov rdi, rbx mov rdi, rbx
call multiboot_init call multiboot_init
; Jump to the boot processors's C code ; jump to the boot processors's C code
extern main extern main
call main call main
jmp $ jmp $
%if MAX_CORES > 1 %if MAX_CORES > 1
smp_start64: smp_start64:
; Initialize segment registers ; initialize segment registers
mov ax, GDT64.Data mov ax, GDT64.Data
mov ds, ax mov ds, ax
mov es, ax mov es, ax
@ -387,12 +383,29 @@ smp_start64:
mov gs, ax mov gs, ax
mov ss, ax mov ss, ax
; Jump to the boot processors's C code ; jump to the boot processors's C code
extern smp_start extern smp_start
call smp_start call smp_start
jmp $ jmp $
%endif %endif
global cpu_init
cpu_init:
; mov eax, cr0
; enable caching, disable paging and fpu emulation
; and eax, 0x1ffffffb
; ...and turn on FPU exceptions
; or eax, 0x22
; mov cr0, eax
; clears the current pgd entry
; xor eax, eax
; mov cr3, eax
; at this stage, we disable the SSE support
; mov eax, cr4
; and eax, 0xfffbf9ff
; mov cr4, eax
; ret
; This will set up our new segment registers and is declared in ; This will set up our new segment registers and is declared in
; C as 'extern void gdt_flush();' ; C as 'extern void gdt_flush();'
global gdt_flush global gdt_flush
@ -729,41 +742,41 @@ extern syscall_handler
; used to realize system calls ; used to realize system calls
isrsyscall: isrsyscall:
cli ; disable interrupts during prologue push r15
push r14
; save caller saved registers push r13
push r12
push r11 push r11
push r10 push r10
push r9 push r9
push r8 push r8
push rdi push rdi
push rsi push rsi
push rbp
push rsp
push rbx
push rdx push rdx
push rcx push rcx
push rax
; set kernel data segmenets mov rdi, rsp
mov ax, 0x10
mov ds, ax
; x86-64 ABI calling convention
mov r8, rbx
mov r9, rax
mov rax, 0 ; we've not used vector registers for this va_arg call
sti ; enable interrupts during syscall
call syscall_handler call syscall_handler
cli ; disable interrupts during prologue
; restore caller saved registers pop rax
pop rcx pop rcx
pop rdx pop rdx
pop rbx
add rsp, 8
pop rbp
pop rsi pop rsi
pop rdi pop rdi
pop r8 pop r8
pop r9 pop r9
pop r10 pop r10
pop r11 pop r11
pop r12
pop r13
pop r14
iretq iretq
global irq0 global irq0

View file

@ -50,7 +50,7 @@ size_t* get_current_stack(void)
#endif #endif
// use new page table // use new page table
write_cr3(virt_to_phys((size_t)curr_task->page_map)); write_cr3(virt_to_phys((size_t)curr_task->pgd));
return curr_task->last_stack_pointer; return curr_task->last_stack_pointer;
} }

View file

@ -192,8 +192,8 @@ static const char *exception_messages[] = {
"Breakpoint", "Into Detected Overflow", "Out of Bounds", "Invalid Opcode", "Breakpoint", "Into Detected Overflow", "Out of Bounds", "Invalid Opcode",
"No Coprocessor", "Double Fault", "Coprocessor Segment Overrun", "Bad TSS", "No Coprocessor", "Double Fault", "Coprocessor Segment Overrun", "Bad TSS",
"Segment Not Present", "Stack Fault", "General Protection Fault", "Page Fault", "Segment Not Present", "Stack Fault", "General Protection Fault", "Page Fault",
"Unknown Interrupt", "Math Fault", "Alignment Check", "Machine Check", "Unknown Interrupt", "Coprocessor Fault", "Alignment Check", "Machine Check",
"SIMD Floating-Point", "Virtualization", "Reserved", "Reserved", "Reserved", "Reserved", "Reserved", "Reserved", "Reserved", "Reserved",
"Reserved", "Reserved", "Reserved", "Reserved", "Reserved", "Reserved", "Reserved", "Reserved", "Reserved", "Reserved", "Reserved", "Reserved",
"Reserved", "Reserved" }; "Reserved", "Reserved" };
@ -208,18 +208,13 @@ static const char *exception_messages[] = {
static void fault_handler(struct state *s) static void fault_handler(struct state *s)
{ {
if (s->int_no < 32) { if (s->int_no < 32) {
task_t* task = per_core(current_task); kputs(exception_messages[s->int_no]);
#ifdef CONFIG_X86_32 #ifdef CONFIG_X86_32
kprintf("%s Exception (%d) at cs:eip = %#x:%#lx, core = %u, task = %u, error = %#x\n", kprintf(" Exception (%d) at 0x%x:0x%x on core %u, error code 0x%x, eflags 0x%x\n",
"Register state: eflags = %#lx, eax = %#lx, ebx = %#lx, ecx = %#lx, edx = %#lx, edi = %#lx, esi = %#lx, ebp = %#llx, esp = %#lx\n", s->int_no, s->cs, s->eip, CORE_ID, s->error, s->eflags);
exception_messages[s->int_no], s->int_no, s->cs, s->eip, CORE_ID, task->id, s->error,
s->eflags, s->eax, s->ebx, s->ecx, s->edx, s->edi, s->esi, s->ebp, s->esp);
#elif defined(CONFIG_X86_64) #elif defined(CONFIG_X86_64)
kprintf("%s Exception (%d) at cs:rip = %#x:%#lx, core = %u, task = %u, error = %#lx\n" kprintf(" Exception (%d) at 0x%llx:0x%llx on core %u, error code 0x%llx, rflags 0x%llx\n",
"Register state: rflags = %#lx, rax = %#lx, rbx = %#lx, rcx = %#lx, rdx = %#lx, rdi = %#lx, rsi = %#lx, rbp = %#llx, rsp = %#lx\n", s->int_no, s->cs, s->rip, CORE_ID, s->error, s->rflags);
exception_messages[s->int_no], s->int_no, s->cs, s->rip, CORE_ID, task->id, s->error,
s->rflags, s->rax, s->rbx, s->rcx, s->rdx, s->rdi, s->rsi, s->rbp, s->rsp);
#endif #endif
/* Now, we signalize that we have handled the interrupt */ /* Now, we signalize that we have handled the interrupt */

View file

@ -37,7 +37,7 @@ void kb_init(size_t size, tid_t tid) {
} }
void kb_finish(void) { void kb_finish(void) {
kfree(kb_buffer.buffer); kfree(kb_buffer.buffer, (kb_buffer.maxsize * sizeof(char)));
kb_buffer.buffer = NULL; kb_buffer.buffer = NULL;
kb_buffer.size = 0; kb_buffer.size = 0;
kb_buffer.maxsize = 0; kb_buffer.maxsize = 0;

View file

@ -1,123 +0,0 @@
/*
* Copyright 2013 Steffen Vogel, Chair for Operating Systems,
* RWTH Aachen University
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* This file is part of MetalSVM.
*/
/**
* @author Steffen Vogel
* @file arch/x86/kernel/pmc.c
* @brief Simple interface to IA32 Performance Monitor Counters
*
* This implementation is in parts specific for Intel Core 2 Duo Processors!
*/
#include <errno.h>
#include <asm/pmc.h>
#include <asm/processor.h>
static struct pmc_caps caps = { 0 };
struct pmc_caps* pmc_init()
{
if (!caps.version) {
uint32_t a, b, c, d;
cpuid(0x0A, &a, &b, &c, &d);
caps.version = (a >> 0) & 0xff;
caps.gp_count = (a >> 8) & 0xff;
caps.gp_width = (a >> 16) & 0xff;
caps.ff_count = (d >> 0) & 0x1f;
caps.ff_width = (d >> 5) & 0xff;
caps.arch_events = (b >> 0) & 0x3f;
// check if IA32_PERF_CAPABILITIES MSR is available
cpuid(0x01, &a, &b, &c, &d);
if (caps.version >= 2) {
if (c & (1 << 15 /* PDCM */))
caps.msr = rdmsr(IA32_PERF_CAPABILITIES);
}
}
return &caps;
}
int pmc_config(uint8_t i, uint16_t event, uint32_t flags, uint8_t umask, uint8_t cmask)
{
if (BUILTIN_EXPECT(i > caps.gp_count, 0))
return -EINVAL;
uint64_t evtsel = flags | event;
evtsel |= (cmask << PMC_EVTSEL_CMASK) | (umask << PMC_EVTSEL_UMASK);
wrmsr(IA32_PERFEVTSEL(i), evtsel);
wrmsr(IA32_PMC(i), 0);
return 0;
}
inline int pmc_start(uint8_t i)
{
if (BUILTIN_EXPECT(i > caps.gp_count, 0))
return -EINVAL;
wrmsr(IA32_PERFEVTSEL(i), rdmsr(IA32_PERFEVTSEL(i)) | PMC_EVTSEL_EN);
return 0;
}
inline int pmc_stop(uint8_t i)
{
if (BUILTIN_EXPECT(i > caps.gp_count, 0))
return -EINVAL;
wrmsr(IA32_PERFEVTSEL(i), rdmsr(IA32_PERFEVTSEL(i)) & ~PMC_EVTSEL_EN);
}
inline int pmc_start_all()
{
if (BUILTIN_EXPECT(caps.version < 2, 0))
return -EINVAL;
wrmsr(MSR_PERF_GLOBAL_CTRL, -1L);
}
inline int pmc_stop_all()
{
if (BUILTIN_EXPECT(caps.version < 2, 0))
return -EINVAL;
wrmsr(MSR_PERF_GLOBAL_CTRL, 0);
}
inline uint64_t pmc_read(uint8_t i)
{
if (BUILTIN_EXPECT(i > caps.gp_count, 0))
return 0;
return rdmsr(IA32_PMC(i));
}
inline int pmc_write(uint8_t i, uint64_t val)
{
if (BUILTIN_EXPECT(i > caps.gp_count, 0))
return -EINVAL;
if (caps.version >= 2 && caps.msr & (1 << 13 /* FW_WRITE */))
wrmsr(IA32_A_PMC(i), val);
else
wrmsr(IA32_PMC(i), val);
}

View file

@ -85,51 +85,27 @@ static void fpu_init_fxsr(union fpu_state* fpu)
fx->mxcsr = 0x1f80; fx->mxcsr = 0x1f80;
} }
cpu_info_t cpu_info = { 0, 0, 0, 0 }; cpu_info_t cpu_info = { 0, 0 };
static uint32_t cpu_freq = 0; static uint32_t cpu_freq = 0;
int cpu_detection(void) int cpu_detection(void)
{ {
uint32_t a, b, c, d; uint32_t a, b;
size_t cr4; size_t cr4;
uint8_t first_time = 0; uint8_t first_time = 0;
if (!cpu_info.feature1) { if (!cpu_info.feature1) {
first_time = 1; first_time = 1;
cpuid(1, &a, &b, &cpu_info.feature2, &cpu_info.feature1); cpuid(1, &a, &b, &cpu_info.feature2, &cpu_info.feature1);
cpuid(0x80000001, &a, &b, &c, &cpu_info.feature3);
cpuid(0x80000008, &cpu_info.addr_width, &b, &c, &d);
}
if (first_time) {
kprintf("Paging features: %s%s%s%s%s",
(cpu_info.feature1 & (1 << CPU_FEATUE_PSE)) ? "PSE (2/4Mb) " : "",
(cpu_info.feature1 & (1 << CPU_FEATURE_PAE)) ? "PAE " : "",
(cpu_info.feature1 & (1 << CPU_FEATURE_PGE)) ? "PGE " : "",
(cpu_info.feature1 & (1 << CPU_FEATURE_PAT)) ? "PAT " : "",
(cpu_info.feature1 & (1 << CPU_FEATURE_PSE36)) ? "PSE36" : "");
kprintf("%s%s%s\n",
(cpu_info.feature3 & (1 << CPU_FEATURE_NX)) ? "NX " : "",
(cpu_info.feature3 & (1 << CPU_FEATURE_1GBHP)) ? "PSE (1Gb) " : "",
(cpu_info.feature3 & (1 << CPU_FEATURE_LM)) ? "LM" : "");
kprintf("Physical adress-width: %u bits\n", cpu_info.addr_width & 0xff);
kprintf("Linear adress-width: %u bits\n", (cpu_info.addr_width >> 8) & 0xff);
} }
cr4 = read_cr4(); cr4 = read_cr4();
if (has_fxsr()) if (has_fxsr())
cr4 |= CR4_OSFXSR; cr4 |= 0x200; // set the OSFXSR bit
if (has_sse()) if (has_sse())
cr4 |= CR4_OSXMMEXCPT; cr4 |= 0x400; // set the OSXMMEXCPT bit
if (has_pge())
cr4 |= CR4_PGE;
write_cr4(cr4); write_cr4(cr4);
if (has_nx())
wrmsr(MSR_EFER, rdmsr(MSR_EFER) | EFER_NXE);
if (first_time && has_sse()) if (first_time && has_sse())
wmb = sfence; wmb = sfence;
@ -154,6 +130,7 @@ int cpu_detection(void)
} }
if (first_time && on_hypervisor()) { if (first_time && on_hypervisor()) {
uint32_t c, d;
char vendor_id[13]; char vendor_id[13];
kprintf("MetalSVM is running on a hypervisor!\n"); kprintf("MetalSVM is running on a hypervisor!\n");
@ -167,7 +144,7 @@ int cpu_detection(void)
kprintf("Hypervisor Vendor Id: %s\n", vendor_id); kprintf("Hypervisor Vendor Id: %s\n", vendor_id);
kprintf("Maximum input value for hypervisor CPUID info: 0x%x\n", a); kprintf("Maximum input value for hypervisor CPUID info: 0x%x\n", a);
} }
return 0; return 0;
} }

View file

@ -42,4 +42,38 @@ L3:
pop rax pop rax
ret ret
%if 0
; The following function is derived from JamesM's kernel development tutorials
; (http://www.jamesmolloy.co.uk/tutorial_html/)
global copy_page_physical
copy_page_physical:
push esi ; According to __cdecl, we must preserve the contents of ESI
push edi ; and EDI.
pushf ; push EFLAGS, so we can pop it and reenable interrupts
; later, if they were enabled anyway.
cli ; Disable interrupts, so we aren't interrupted.
; Load these in BEFORE we disable paging!
mov edi, [esp+12+4] ; Destination address
mov esi, [esp+12+8] ; Source address
mov edx, cr0 ; Get the control register...
and edx, 0x7fffffff ; and...
mov cr0, edx ; Disable paging.
cld
mov ecx, 0x400 ; 1024*4bytes = 4096 bytes = page size
rep movsd ; copy page
mov edx, cr0 ; Get the control register again
or edx, 0x80000000 ; and...
mov cr0, edx ; Enable paging.
popf ; Pop EFLAGS back.
pop edi ; Get the original value of EDI
pop esi ; and ESI back.
ret
%endif
SECTION .note.GNU-stack noalloc noexec nowrite progbits SECTION .note.GNU-stack noalloc noexec nowrite progbits

View file

@ -1,68 +0,0 @@
/*
* Copyright 2011 Steffen Vogel, Chair for Operating Systems,
* RWTH Aachen University
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* This file is part of MetalSVM.
*/
#include <asm/uart.h>
#include <asm/io.h>
static inline int is_transmit_empty()
{
return inportb(UART_PORT + 5) & 0x20;
}
static inline int received()
{
return inportb(UART_PORT + 5) & 1;
}
void uart_init()
{
outportb(UART_PORT + 1, 0x00); // Disable all interrupts
outportb(UART_PORT + 3, 0x80); // Enable DLAB (set baud rate divisor)
outportb(UART_PORT + 0, 0x0C); // Set divisor to 12 (lo byte) 9600 baud
outportb(UART_PORT + 1, 0x00); // (hi byte)
outportb(UART_PORT + 3, 0x03); // 8 bits, no parity, one stop bit (8N1)
outportb(UART_PORT + 2, 0xC7); // Enable FIFO, clear them, with 14-byte threshold
outportb(UART_PORT + 4, 0x0B); // IRQs enabled, RTS/DSR set
}
char uart_getchar()
{
while (!received());
return inportb(UART_PORT);
}
void uart_putchar(char chr)
{
while (!is_transmit_empty());
outportb(UART_PORT, chr);
}
void uart_puts(const char* str)
{
while (*str) uart_putchar(*(str++));
}
int uart_gets(char* str, size_t len)
{
size_t ret = 0;
while (ret < len)
str[ret] = uart_getchar();
return ret;
}

View file

@ -46,7 +46,7 @@
* 0x00100000 - 0x0DEADFFF: Kernel (size depends on the configuration) (221MB) * 0x00100000 - 0x0DEADFFF: Kernel (size depends on the configuration) (221MB)
* 0x0DEAE000 - 0x3FFFEFFF: Kernel heap (801MB) * 0x0DEAE000 - 0x3FFFEFFF: Kernel heap (801MB)
* 0x3FFFF000 - 0x3FFFFFFF: Page Tables are mapped in this region (4KB) * 0x3FFFF000 - 0x3FFFFFFF: Page Tables are mapped in this region (4KB)
* (The first 256 entries belongs to kernel space) * (The last 256 entries belongs to kernel space)
*/ */
/* /*
@ -57,14 +57,13 @@ extern const void kernel_start;
extern const void kernel_end; extern const void kernel_end;
// boot task's page directory and page directory lock // boot task's page directory and page directory lock
static page_map_t boot_pgd = {{[0 ... MAP_ENTRIES-1] = 0}}; static page_dir_t boot_pgd = {{[0 ... PGT_ENTRIES-1] = 0}};
static page_map_t boot_pgt[KERNEL_SPACE/(MAP_ENTRIES*PAGE_SIZE)]; static page_table_t pgt_container = {{[0 ... PGT_ENTRIES-1] = 0}};
static page_map_t pgt_container = {{[0 ... MAP_ENTRIES-1] = 0}}; static page_table_t boot_pgt[KERNEL_SPACE/(1024*PAGE_SIZE)];
static spinlock_t kslock = SPINLOCK_INIT; static spinlock_t kslock = SPINLOCK_INIT;
static int paging_enabled = 0; static int paging_enabled = 0;
page_map_t* get_boot_page_map(void) page_dir_t* get_boot_pgd(void)
{ {
return &boot_pgd; return &boot_pgd;
} }
@ -72,26 +71,26 @@ page_map_t* get_boot_page_map(void)
/* /*
* TODO: We create a full copy of the current task. Copy-On-Access will be the better solution. * TODO: We create a full copy of the current task. Copy-On-Access will be the better solution.
* *
* No PGD locking is needed because only create_page_map use this function and holds already the * No PGD locking is needed because onls create_pgd use this function and holds already the
* PGD lock. * PGD lock.
*/ */
inline static size_t copy_page_table(task_t* task, uint32_t pgd_index, page_map_t* pgt, int* counter) inline static size_t copy_page_table(task_t* task, uint32_t pgd_index, page_table_t* pgt, int* counter)
{ {
uint32_t i; uint32_t i;
page_map_t* new_pgt; page_table_t* new_pgt;
size_t phyaddr; size_t phyaddr;
if (BUILTIN_EXPECT(!pgt, 0)) if (BUILTIN_EXPECT(!pgt, 0))
return 0; return 0;
new_pgt = kmalloc(sizeof(page_map_t)); new_pgt = kmalloc(sizeof(page_table_t));
if (!new_pgt) if (!new_pgt)
return 0; return 0;
memset(new_pgt, 0x00, sizeof(page_map_t)); memset(new_pgt, 0x00, sizeof(page_table_t));
if (counter) if (counter)
(*counter)++; (*counter)++;
for(i=0; i<MAP_ENTRIES; i++) { for(i=0; i<PGT_ENTRIES; i++) {
if (pgt->entries[i] & PAGE_MASK) { if (pgt->entries[i] & PAGE_MASK) {
if (!(pgt->entries[i] & PG_USER)) { if (!(pgt->entries[i] & PG_USER)) {
// Kernel page => copy only page entries // Kernel page => copy only page entries
@ -118,11 +117,11 @@ inline static size_t copy_page_table(task_t* task, uint32_t pgd_index, page_map_
return phyaddr; return phyaddr;
} }
int create_page_map(task_t* task, int copy) int create_pgd(task_t* task, int copy)
{ {
page_map_t* pgd; page_dir_t* pgd;
page_map_t* pgt; page_table_t* pgt;
page_map_t* pgt_container; page_table_t* pgt_container;
uint32_t i; uint32_t i;
uint32_t index1, index2; uint32_t index1, index2;
size_t viraddr, phyaddr; size_t viraddr, phyaddr;
@ -134,26 +133,25 @@ int create_page_map(task_t* task, int copy)
// we already know the virtual address of the "page table container" // we already know the virtual address of the "page table container"
// (see file header) // (see file header)
pgt_container = (page_map_t*) ((KERNEL_SPACE - PAGE_SIZE) & PAGE_MASK); pgt_container = (page_table_t*) ((KERNEL_SPACE - PAGE_SIZE) & PAGE_MASK);
// create new page directory for the new task // create new page directory for the new task
pgd = kmalloc(sizeof(page_map_t)); pgd = kmalloc(sizeof(page_dir_t));
if (!pgd) if (!pgd)
return -ENOMEM; return -ENOMEM;
memset(pgd, 0x00, sizeof(page_map_t)); memset(pgd, 0x00, sizeof(page_dir_t));
// create a new "page table container" for the new task // create a new "page table container" for the new task
pgt = kmalloc(sizeof(page_map_t)); pgt = kmalloc(sizeof(page_table_t));
if (!pgt) { if (!pgt) {
kfree(pgd, sizeof(page_map_t)); kfree(pgd, sizeof(page_dir_t));
return -ENOMEM; return -ENOMEM;
} }
memset(pgt, 0x00, sizeof(page_map_t)); memset(pgt, 0x00, sizeof(page_table_t));
// copy kernel tables
spinlock_lock(&kslock); spinlock_lock(&kslock);
for(i=0; i<MAP_ENTRIES; i++) { for(i=0; i<PGT_ENTRIES; i++) {
pgd->entries[i] = boot_pgd.entries[i]; pgd->entries[i] = boot_pgd.entries[i];
// only kernel entries will be copied // only kernel entries will be copied
if (pgd->entries[i] && !(pgd->entries[i] & PG_USER)) if (pgd->entries[i] && !(pgd->entries[i] & PG_USER))
@ -171,33 +169,36 @@ int create_page_map(task_t* task, int copy)
pgd->entries[index1] = ((size_t) virt_to_phys((size_t) pgt) & PAGE_MASK)|KERN_TABLE; pgd->entries[index1] = ((size_t) virt_to_phys((size_t) pgt) & PAGE_MASK)|KERN_TABLE;
pgt->entries[index2] = ((size_t) virt_to_phys((size_t) pgt) & PAGE_MASK)|KERN_PAGE; pgt->entries[index2] = ((size_t) virt_to_phys((size_t) pgt) & PAGE_MASK)|KERN_PAGE;
task->page_map = pgd; task->pgd = pgd;
if (copy) { if (copy) {
spinlock_irqsave_lock(&curr_task->page_lock); spinlock_irqsave_lock(&curr_task->pgd_lock);
for (i=KERNEL_SPACE/(1024*PAGE_SIZE); i<1024; i++) { for (i=KERNEL_SPACE/(1024*PAGE_SIZE); i<1024; i++) {
if (!(curr_task->page_map->entries[i])) if (!(curr_task->pgd->entries[i]))
continue; continue;
if (!(curr_task->page_map->entries[i] & PG_USER)) if (!(curr_task->pgd->entries[i] & PG_USER))
continue; continue;
phyaddr = copy_page_table(task, i, (page_map_t*) ((KERNEL_SPACE - 1024*PAGE_SIZE + i*PAGE_SIZE) & PAGE_MASK), &counter); phyaddr = copy_page_table(task, i, (page_table_t*) ((KERNEL_SPACE - 1024*PAGE_SIZE + i*PAGE_SIZE) & PAGE_MASK), &counter);
if (phyaddr) { if (phyaddr) {
pgd->entries[i] = (phyaddr & PAGE_MASK) | (curr_task->page_map->entries[i] & 0xFFF); pgd->entries[i] = (phyaddr & PAGE_MASK) | (curr_task->pgd->entries[i] & 0xFFF);
pgt->entries[i] = (phyaddr & PAGE_MASK) | KERN_PAGE; pgt->entries[i] = (phyaddr & PAGE_MASK) | KERN_PAGE;
} }
} }
spinlock_irqsave_unlock(&curr_task->page_lock); spinlock_irqsave_unlock(&curr_task->pgd_lock);
} }
return counter; return counter;
} }
int drop_page_map(void) /*
* drops all page frames and the PGD of a user task
*/
int drop_pgd(void)
{ {
page_map_t* pgd = per_core(current_task)->page_map; page_dir_t* pgd = per_core(current_task)->pgd;
size_t phy_pgd = virt_to_phys((size_t) pgd); size_t phy_pgd = virt_to_phys((size_t) pgd);
task_t* task = per_core(current_task); task_t* task = per_core(current_task);
uint32_t i; uint32_t i;
@ -205,9 +206,9 @@ int drop_page_map(void)
if (BUILTIN_EXPECT(pgd == &boot_pgd, 0)) if (BUILTIN_EXPECT(pgd == &boot_pgd, 0))
return -EINVAL; return -EINVAL;
spinlock_irqsave_lock(&task->page_lock); spinlock_irqsave_lock(&task->pgd_lock);
for(i=0; i<MAP_ENTRIES; i++) { for(i=0; i<PGT_ENTRIES; i++) {
if (pgd->entries[i] & PG_USER) { if (pgd->entries[i] & PG_USER) {
put_page(pgd->entries[i] & PAGE_MASK); put_page(pgd->entries[i] & PAGE_MASK);
pgd->entries[i] = 0; pgd->entries[i] = 0;
@ -217,9 +218,9 @@ int drop_page_map(void)
// freeing the page directory // freeing the page directory
put_page(phy_pgd); put_page(phy_pgd);
task->page_map = NULL; task->pgd = NULL;
spinlock_irqsave_unlock(&task->page_lock); spinlock_irqsave_unlock(&task->pgd_lock);
return 0; return 0;
} }
@ -228,24 +229,24 @@ size_t virt_to_phys(size_t viraddr)
{ {
task_t* task = per_core(current_task); task_t* task = per_core(current_task);
uint32_t index1, index2; uint32_t index1, index2;
page_map_t* pgt; page_table_t* pgt;
size_t ret = 0; size_t ret = 0;
if (!paging_enabled) if (!paging_enabled)
return viraddr; return viraddr;
if (BUILTIN_EXPECT(!task || !task->page_map, 0)) if (BUILTIN_EXPECT(!task || !task->pgd, 0))
return 0; return 0;
spinlock_irqsave_lock(&task->page_lock); spinlock_irqsave_lock(&task->pgd_lock);
index1 = viraddr >> 22; index1 = viraddr >> 22;
index2 = (viraddr >> 12) & 0x3FF; index2 = (viraddr >> 12) & 0x3FF;
if (!(task->page_map->entries[index1] & PAGE_MASK)) if (!(task->pgd->entries[index1] & PAGE_MASK))
goto out; goto out;
pgt = (page_map_t*) ((KERNEL_SPACE - 1024*PAGE_SIZE + index1*PAGE_SIZE) & PAGE_MASK); pgt = (page_table_t*) ((KERNEL_SPACE - 1024*PAGE_SIZE + index1*PAGE_SIZE) & PAGE_MASK);
if (!pgt || !(pgt->entries[index2])) if (!pgt || !(pgt->entries[index2]))
goto out; goto out;
@ -254,7 +255,7 @@ size_t virt_to_phys(size_t viraddr)
out: out:
//kprintf("vir %p to phy %p\n", viraddr, ret); //kprintf("vir %p to phy %p\n", viraddr, ret);
spinlock_irqsave_unlock(&task->page_lock); spinlock_irqsave_unlock(&task->pgd_lock);
return ret; return ret;
} }
@ -262,11 +263,11 @@ out:
size_t map_region(size_t viraddr, size_t phyaddr, uint32_t npages, uint32_t flags) size_t map_region(size_t viraddr, size_t phyaddr, uint32_t npages, uint32_t flags)
{ {
task_t* task = per_core(current_task); task_t* task = per_core(current_task);
page_map_t* pgt; page_table_t* pgt;
size_t index, i; size_t index, i;
size_t ret; size_t ret;
if (BUILTIN_EXPECT(!task || !task->page_map, 0)) if (BUILTIN_EXPECT(!task || !task->pgd, 0))
return 0; return 0;
if (BUILTIN_EXPECT(!paging_enabled && (viraddr != phyaddr), 0)) if (BUILTIN_EXPECT(!paging_enabled && (viraddr != phyaddr), 0))
@ -275,7 +276,7 @@ size_t map_region(size_t viraddr, size_t phyaddr, uint32_t npages, uint32_t flag
if (flags & MAP_KERNEL_SPACE) if (flags & MAP_KERNEL_SPACE)
spinlock_lock(&kslock); spinlock_lock(&kslock);
else else
spinlock_irqsave_lock(&task->page_lock); spinlock_irqsave_lock(&task->pgd_lock);
if (!viraddr) { if (!viraddr) {
viraddr = vm_alloc(npages, flags); viraddr = vm_alloc(npages, flags);
@ -291,10 +292,10 @@ size_t map_region(size_t viraddr, size_t phyaddr, uint32_t npages, uint32_t flag
for(i=0; i<npages; i++, viraddr+=PAGE_SIZE, phyaddr+=PAGE_SIZE) { for(i=0; i<npages; i++, viraddr+=PAGE_SIZE, phyaddr+=PAGE_SIZE) {
index = viraddr >> 22; index = viraddr >> 22;
if (!(task->page_map->entries[index])) { if (!(task->pgd->entries[index])) {
page_map_t* pgt_container; page_table_t* pgt_container;
pgt = (page_map_t*) get_page(); pgt = (page_table_t*) get_pages(1);
if (BUILTIN_EXPECT(!pgt, 0)) { if (BUILTIN_EXPECT(!pgt, 0)) {
kputs("map_address: out of memory\n"); kputs("map_address: out of memory\n");
ret = 0; ret = 0;
@ -303,17 +304,17 @@ size_t map_region(size_t viraddr, size_t phyaddr, uint32_t npages, uint32_t flag
// set the new page table into the directory // set the new page table into the directory
if (flags & MAP_USER_SPACE) if (flags & MAP_USER_SPACE)
task->page_map->entries[index] = (uint32_t)pgt|USER_TABLE; task->pgd->entries[index] = (uint32_t)pgt|USER_TABLE;
else else
task->page_map->entries[index] = (uint32_t)pgt|KERN_TABLE; task->pgd->entries[index] = (uint32_t)pgt|KERN_TABLE;
// if paging is already enabled, we need to use the virtual address // if paging is already enabled, we need to use the virtual address
if (paging_enabled) if (paging_enabled)
// we already know the virtual address of the "page table container" // we already know the virtual address of the "page table container"
// (see file header) // (see file header)
pgt_container = (page_map_t*) ((KERNEL_SPACE - PAGE_SIZE) & PAGE_MASK); pgt_container = (page_table_t*) ((KERNEL_SPACE - PAGE_SIZE) & PAGE_MASK);
else else
pgt_container = (page_map_t*) (task->page_map->entries[(KERNEL_SPACE - PAGE_SIZE) >> 22] & PAGE_MASK); pgt_container = (page_table_t*) (task->pgd->entries[(KERNEL_SPACE - PAGE_SIZE) >> 22] & PAGE_MASK);
if (BUILTIN_EXPECT(!pgt_container, 0)) { if (BUILTIN_EXPECT(!pgt_container, 0)) {
kputs("map_address: internal error\n"); kputs("map_address: internal error\n");
@ -329,11 +330,11 @@ size_t map_region(size_t viraddr, size_t phyaddr, uint32_t npages, uint32_t flag
memset((void*) ((KERNEL_SPACE - 1024*PAGE_SIZE + index*PAGE_SIZE) & PAGE_MASK), 0x00, PAGE_SIZE); memset((void*) ((KERNEL_SPACE - 1024*PAGE_SIZE + index*PAGE_SIZE) & PAGE_MASK), 0x00, PAGE_SIZE);
else else
memset(pgt, 0x00, PAGE_SIZE); memset(pgt, 0x00, PAGE_SIZE);
} else pgt = (page_map_t*) (task->page_map->entries[index] & PAGE_MASK); } else pgt = (page_table_t*) (task->pgd->entries[index] & PAGE_MASK);
/* convert physical address to virtual */ /* convert physical address to virtual */
if (paging_enabled) if (paging_enabled)
pgt = (page_map_t*) ((KERNEL_SPACE - 1024*PAGE_SIZE + index*PAGE_SIZE) & PAGE_MASK); pgt = (page_table_t*) ((KERNEL_SPACE - 1024*PAGE_SIZE + index*PAGE_SIZE) & PAGE_MASK);
index = (viraddr >> 12) & 0x3FF; index = (viraddr >> 12) & 0x3FF;
if (pgt->entries[index] && !(flags & MAP_REMAP)) { if (pgt->entries[index] && !(flags & MAP_REMAP)) {
@ -381,7 +382,7 @@ out:
if (flags & MAP_KERNEL_SPACE) if (flags & MAP_KERNEL_SPACE)
spinlock_unlock(&kslock); spinlock_unlock(&kslock);
else else
spinlock_irqsave_unlock(&task->page_lock); spinlock_irqsave_unlock(&task->pgd_lock);
return ret; return ret;
} }
@ -391,18 +392,18 @@ int change_page_permissions(size_t start, size_t end, uint32_t flags)
uint32_t index1, index2, newflags; uint32_t index1, index2, newflags;
size_t viraddr = start & 0xFFFFF000; size_t viraddr = start & 0xFFFFF000;
size_t phyaddr; size_t phyaddr;
page_map_t* pgt; page_table_t* pgt;
page_map_t* pgd; page_dir_t* pgd;
task_t* task = per_core(current_task); task_t* task = per_core(current_task);
if (BUILTIN_EXPECT(!paging_enabled, 0)) if (BUILTIN_EXPECT(!paging_enabled, 0))
return -EINVAL; return -EINVAL;
pgd = per_core(current_task)->page_map; pgd = per_core(current_task)->pgd;
if (BUILTIN_EXPECT(!pgd, 0)) if (BUILTIN_EXPECT(!pgd, 0))
return -EINVAL; return -EINVAL;
spinlock_irqsave_lock(&task->page_lock); spinlock_irqsave_lock(&task->pgd_lock);
while (viraddr < end) while (viraddr < end)
{ {
@ -410,7 +411,7 @@ int change_page_permissions(size_t start, size_t end, uint32_t flags)
index2 = (viraddr >> 12) & 0x3FF; index2 = (viraddr >> 12) & 0x3FF;
while ((viraddr < end) && (index2 < 1024)) { while ((viraddr < end) && (index2 < 1024)) {
pgt = (page_map_t*) (page_map_t*) ((KERNEL_SPACE - 1024*PAGE_SIZE + index1*PAGE_SIZE) & PAGE_MASK); pgt = (page_table_t*) (page_table_t*) ((KERNEL_SPACE - 1024*PAGE_SIZE + index1*PAGE_SIZE) & PAGE_MASK);
if (pgt && pgt->entries[index2]) { if (pgt && pgt->entries[index2]) {
phyaddr = pgt->entries[index2] & PAGE_MASK; phyaddr = pgt->entries[index2] & PAGE_MASK;
newflags = pgt->entries[index2] & 0xFFF; // get old flags newflags = pgt->entries[index2] & 0xFFF; // get old flags
@ -447,7 +448,7 @@ int change_page_permissions(size_t start, size_t end, uint32_t flags)
} }
} }
spinlock_irqsave_unlock(&task->page_lock); spinlock_irqsave_unlock(&task->pgd_lock);
return 0; return 0;
} }
@ -463,9 +464,9 @@ size_t vm_alloc(uint32_t npages, uint32_t flags)
uint32_t index1, index2, j; uint32_t index1, index2, j;
size_t viraddr, i, ret = 0; size_t viraddr, i, ret = 0;
size_t start, end; size_t start, end;
page_map_t* pgt; page_table_t* pgt;
if (BUILTIN_EXPECT(!task || !task->page_map || !paging_enabled, 0)) if (BUILTIN_EXPECT(!task || !task->pgd || !paging_enabled, 0))
return 0; return 0;
if (flags & MAP_KERNEL_SPACE) { if (flags & MAP_KERNEL_SPACE) {
@ -482,7 +483,7 @@ size_t vm_alloc(uint32_t npages, uint32_t flags)
if (flags & MAP_KERNEL_SPACE) if (flags & MAP_KERNEL_SPACE)
spinlock_lock(&kslock); spinlock_lock(&kslock);
else else
spinlock_irqsave_lock(&task->page_lock); spinlock_irqsave_lock(&task->pgd_lock);
viraddr = i = start; viraddr = i = start;
j = 0; j = 0;
@ -490,7 +491,7 @@ size_t vm_alloc(uint32_t npages, uint32_t flags)
index1 = i >> 22; index1 = i >> 22;
index2 = (i >> 12) & 0x3FF; index2 = (i >> 12) & 0x3FF;
pgt = (page_map_t*) ((KERNEL_SPACE - 1024*PAGE_SIZE + index1*PAGE_SIZE) & PAGE_MASK); pgt = (page_table_t*) ((KERNEL_SPACE - 1024*PAGE_SIZE + index1*PAGE_SIZE) & PAGE_MASK);
if (!pgt || !(pgt->entries[index2])) { if (!pgt || !(pgt->entries[index2])) {
i+=PAGE_SIZE; i+=PAGE_SIZE;
j++; j++;
@ -508,7 +509,7 @@ size_t vm_alloc(uint32_t npages, uint32_t flags)
if (flags & MAP_KERNEL_SPACE) if (flags & MAP_KERNEL_SPACE)
spinlock_unlock(&kslock); spinlock_unlock(&kslock);
else else
spinlock_irqsave_unlock(&task->page_lock); spinlock_irqsave_unlock(&task->pgd_lock);
return ret; return ret;
} }
@ -518,22 +519,22 @@ int unmap_region(size_t viraddr, uint32_t npages)
task_t* task = per_core(current_task); task_t* task = per_core(current_task);
uint32_t i; uint32_t i;
uint32_t index1, index2; uint32_t index1, index2;
page_map_t* pgt; page_table_t* pgt;
if (BUILTIN_EXPECT(!task || !task->page_map || !paging_enabled, 0)) if (BUILTIN_EXPECT(!task || !task->pgd || !paging_enabled, 0))
return -EINVAL; return -EINVAL;
if (viraddr <= KERNEL_SPACE) if (viraddr <= KERNEL_SPACE)
spinlock_lock(&kslock); spinlock_lock(&kslock);
else else
spinlock_irqsave_lock(&task->page_lock); spinlock_irqsave_lock(&task->pgd_lock);
for(i=0; i<npages; i++, viraddr+=PAGE_SIZE) for(i=0; i<npages; i++, viraddr+=PAGE_SIZE)
{ {
index1 = viraddr >> 22; index1 = viraddr >> 22;
index2 = (viraddr >> 12) & 0x3FF; index2 = (viraddr >> 12) & 0x3FF;
pgt = (page_map_t*) ((KERNEL_SPACE - 1024*PAGE_SIZE + index1*PAGE_SIZE) & PAGE_MASK); pgt = (page_table_t*) ((KERNEL_SPACE - 1024*PAGE_SIZE + index1*PAGE_SIZE) & PAGE_MASK);
if (!pgt) if (!pgt)
continue; continue;
pgt->entries[index2] &= ~PG_PRESENT; pgt->entries[index2] &= ~PG_PRESENT;
@ -547,7 +548,7 @@ int unmap_region(size_t viraddr, uint32_t npages)
if (viraddr <= KERNEL_SPACE) if (viraddr <= KERNEL_SPACE)
spinlock_unlock(&kslock); spinlock_unlock(&kslock);
else else
spinlock_irqsave_unlock(&task->page_lock); spinlock_irqsave_unlock(&task->pgd_lock);
return 0; return 0;
} }
@ -557,22 +558,22 @@ int vm_free(size_t viraddr, uint32_t npages)
task_t* task = per_core(current_task); task_t* task = per_core(current_task);
uint32_t i; uint32_t i;
uint32_t index1, index2; uint32_t index1, index2;
page_map_t* pgt; page_table_t* pgt;
if (BUILTIN_EXPECT(!task || !task->page_map || !paging_enabled, 0)) if (BUILTIN_EXPECT(!task || !task->pgd || !paging_enabled, 0))
return -EINVAL; return -EINVAL;
if (viraddr <= KERNEL_SPACE) if (viraddr <= KERNEL_SPACE)
spinlock_lock(&kslock); spinlock_lock(&kslock);
else else
spinlock_irqsave_lock(&task->page_lock); spinlock_irqsave_lock(&task->pgd_lock);
for(i=0; i<npages; i++, viraddr+=PAGE_SIZE) for(i=0; i<npages; i++, viraddr+=PAGE_SIZE)
{ {
index1 = viraddr >> 22; index1 = viraddr >> 22;
index2 = (viraddr >> 12) & 0x3FF; index2 = (viraddr >> 12) & 0x3FF;
pgt = (page_map_t*) ((KERNEL_SPACE - 1024*PAGE_SIZE + index1*PAGE_SIZE) & PAGE_MASK); pgt = (page_table_t*) ((KERNEL_SPACE - 1024*PAGE_SIZE + index1*PAGE_SIZE) & PAGE_MASK);
if (!pgt) if (!pgt)
continue; continue;
pgt->entries[index2] = 0; pgt->entries[index2] = 0;
@ -583,7 +584,7 @@ int vm_free(size_t viraddr, uint32_t npages)
if (viraddr <= KERNEL_SPACE) if (viraddr <= KERNEL_SPACE)
spinlock_unlock(&kslock); spinlock_unlock(&kslock);
else else
spinlock_irqsave_unlock(&task->page_lock); spinlock_irqsave_unlock(&task->pgd_lock);
return 0; return 0;
} }
@ -592,8 +593,8 @@ int print_paging_tree(size_t viraddr)
{ {
task_t* task = per_core(current_task); task_t* task = per_core(current_task);
uint32_t index1, index2; uint32_t index1, index2;
page_map_t* pgd = NULL; page_dir_t* pgd = NULL;
page_map_t* pgt = NULL; page_table_t* pgt = NULL;
if (BUILTIN_EXPECT(!viraddr, 0)) if (BUILTIN_EXPECT(!viraddr, 0))
return -EINVAL; return -EINVAL;
@ -601,20 +602,20 @@ int print_paging_tree(size_t viraddr)
index1 = viraddr >> 22; index1 = viraddr >> 22;
index2 = (viraddr >> 12) & 0x3FF; index2 = (viraddr >> 12) & 0x3FF;
spinlock_irqsave_lock(&task->page_lock); spinlock_irqsave_lock(&task->pgd_lock);
kprintf("Paging dump of address 0x%x\n", viraddr); kprintf("Paging dump of address 0x%x\n", viraddr);
pgd = task->page_map; pgd = task->pgd;
kprintf("\tPage directory entry %u: ", index1); kprintf("\tPage directory entry %u: ", index1);
if (pgd) { if (pgd) {
kprintf("0x%0x\n", pgd->entries[index1]); kprintf("0x%0x\n", pgd->entries[index1]);
pgt = (page_map_t*) (pgd->entries[index1] & PAGE_MASK); pgt = (page_table_t*) (pgd->entries[index1] & PAGE_MASK);
} else } else
kputs("invalid page directory\n"); kputs("invalid page directory\n");
// convert physical address to virtual /* convert physical address to virtual */
if (paging_enabled && pgt) if (paging_enabled && pgt)
pgt = (page_map_t*) (KERNEL_SPACE - 1024*PAGE_SIZE + index1*PAGE_SIZE); pgt = (page_table_t*) (KERNEL_SPACE - 1024*PAGE_SIZE + index1*PAGE_SIZE);
kprintf("\tPage table entry %u: ", index2); kprintf("\tPage table entry %u: ", index2);
if (pgt) if (pgt)
@ -622,7 +623,7 @@ int print_paging_tree(size_t viraddr)
else else
kputs("invalid page table\n"); kputs("invalid page table\n");
spinlock_irqsave_unlock(&task->page_lock); spinlock_irqsave_unlock(&task->pgd_lock);
return 0; return 0;
} }
@ -630,12 +631,12 @@ int print_paging_tree(size_t viraddr)
static void pagefault_handler(struct state *s) static void pagefault_handler(struct state *s)
{ {
task_t* task = per_core(current_task); task_t* task = per_core(current_task);
page_dir_t* pgd = task->pgd;
page_table_t* pgt = NULL;
size_t viraddr = read_cr2(); size_t viraddr = read_cr2();
size_t phyaddr; size_t phyaddr;
#ifdef CONFIG_ROCKCREEK #ifdef CONFIG_ROCKCREEK
uint32_t index1, index2; uint32_t index1, index2;
page_map_t* pgd = task->page_map;
page_map_t* pgt = NULL;
#endif #endif
if ((viraddr >= task->start_heap) && (viraddr <= task->end_heap) && (viraddr > KERNEL_SPACE)) { if ((viraddr >= task->start_heap) && (viraddr <= task->end_heap) && (viraddr > KERNEL_SPACE)) {
@ -649,7 +650,7 @@ static void pagefault_handler(struct state *s)
memset((void*) viraddr, 0x00, PAGE_SIZE); memset((void*) viraddr, 0x00, PAGE_SIZE);
return; return;
} }
kprintf("Could not map 0x%x at 0x%x\n", phyaddr, viraddr); kprintf("Could not map 0x%x at 0x%x\n", phyaddr, viraddr);
put_page(phyaddr); put_page(phyaddr);
} }
@ -660,7 +661,7 @@ static void pagefault_handler(struct state *s)
index2 = (viraddr >> 12) & 0x3FF; index2 = (viraddr >> 12) & 0x3FF;
if (!pgd || !(pgd->entries[index1] & PAGE_MASK)) if (!pgd || !(pgd->entries[index1] & PAGE_MASK))
goto default_handler; goto default_handler;
pgt = (page_map_t*) ((KERNEL_SPACE - 1024*PAGE_SIZE + index1*PAGE_SIZE) & PAGE_MASK); pgt = (page_table_t*) ((KERNEL_SPACE - 1024*PAGE_SIZE + index1*PAGE_SIZE) & PAGE_MASK);
if (!pgt || !(pgt->entries[index2])) if (!pgt || !(pgt->entries[index2]))
goto default_handler; goto default_handler;
if (pgt->entries[index2] & PG_SVM_INIT) { if (pgt->entries[index2] & PG_SVM_INIT) {
@ -686,14 +687,14 @@ default_handler:
int arch_paging_init(void) int arch_paging_init(void)
{ {
uint32_t i, npages, index1, index2; uint32_t i, npages, index1, index2;
page_map_t* pgt; page_table_t* pgt;
size_t viraddr; size_t viraddr;
// replace default pagefault handler // uninstall default handler and install our own
irq_uninstall_handler(14); irq_uninstall_handler(14);
irq_install_handler(14, pagefault_handler); irq_install_handler(14, pagefault_handler);
// create a page table to reference to the other page tables // Create a page table to reference to the other page tables
pgt = &pgt_container; pgt = &pgt_container;
// map this table at the end of the kernel space // map this table at the end of the kernel space
@ -702,21 +703,21 @@ int arch_paging_init(void)
index2 = (viraddr >> 12) & 0x3FF; index2 = (viraddr >> 12) & 0x3FF;
// now, we create a self reference // now, we create a self reference
per_core(current_task)->page_map->entries[index1] = (((size_t) pgt) & PAGE_MASK)|KERN_TABLE; per_core(current_task)->pgd->entries[index1] = (((size_t) pgt) & PAGE_MASK)|KERN_TABLE;
pgt->entries[index2] = ((size_t) pgt & PAGE_MASK)|KERN_PAGE; pgt->entries[index2] = ((size_t) pgt & 0xFFFFF000)|KERN_PAGE;
// create the other PGTs for the kernel space // create the other PGTs for the kernel space
for(i=0; i<KERNEL_SPACE/(1024*PAGE_SIZE)-1; i++) { for(i=0; i<KERNEL_SPACE/(1024*PAGE_SIZE)-1; i++) {
size_t phyaddr = boot_pgt+i; size_t phyaddr = boot_pgt+i;
memset((void*) phyaddr, 0x00, sizeof(page_map_t)); memset((void*) phyaddr, 0x00, sizeof(page_table_t));
per_core(current_task)->page_map->entries[i] = (phyaddr & PAGE_MASK)|KERN_TABLE; per_core(current_task)->pgd->entries[i] = (phyaddr & PAGE_MASK)|KERN_TABLE;
pgt->entries[i] = (phyaddr & PAGE_MASK)|KERN_PAGE; pgt->entries[i] = (phyaddr & PAGE_MASK)|KERN_PAGE;
} }
/* /*
* Set the page table and page directory entries for the kernel. * Set the page table and page directory entries for the kernel. We map the kernel's physical address
* We map the kernel's physical address to the same virtual address. * to the same virtual address.
*/ */
npages = ((size_t) &kernel_end - (size_t) &kernel_start) >> PAGE_SHIFT; npages = ((size_t) &kernel_end - (size_t) &kernel_start) >> PAGE_SHIFT;
if ((size_t)&kernel_end & (PAGE_SIZE-1)) if ((size_t)&kernel_end & (PAGE_SIZE-1))
@ -724,7 +725,7 @@ int arch_paging_init(void)
map_region((size_t)&kernel_start, (size_t)&kernel_start, npages, MAP_KERNEL_SPACE); map_region((size_t)&kernel_start, (size_t)&kernel_start, npages, MAP_KERNEL_SPACE);
#if MAX_CORES > 1 #if MAX_CORES > 1
// reserve page for smp boot code // Reserve page for smp boot code
if (!map_region(SMP_SETUP_ADDR, SMP_SETUP_ADDR, 1, MAP_KERNEL_SPACE|MAP_NO_CACHE)) { if (!map_region(SMP_SETUP_ADDR, SMP_SETUP_ADDR, 1, MAP_KERNEL_SPACE|MAP_NO_CACHE)) {
kputs("could not reserve page for smp boot code\n"); kputs("could not reserve page for smp boot code\n");
return -ENOMEM; return -ENOMEM;
@ -737,12 +738,16 @@ int arch_paging_init(void)
#endif #endif
#ifdef CONFIG_MULTIBOOT #ifdef CONFIG_MULTIBOOT
// map mb_info into the kernel space /*
* of course, mb_info has to map into the kernel space
*/
if (mb_info) if (mb_info)
map_region((size_t) mb_info & PAGE_MASK, (size_t) mb_info & PAGE_MASK, 1, MAP_KERNEL_SPACE); map_region((size_t) mb_info & PAGE_MASK, (size_t) mb_info & PAGE_MASK, 1, MAP_KERNEL_SPACE);
#if 0 #if 0
// map reserved memory regions into the kernel space /*
* Map reserved memory regions into the kernel space
*/
if (mb_info && (mb_info->flags & MULTIBOOT_INFO_MEM_MAP)) { if (mb_info && (mb_info->flags & MULTIBOOT_INFO_MEM_MAP)) {
multiboot_memory_map_t* mmap = (multiboot_memory_map_t*) mb_info->mmap_addr; multiboot_memory_map_t* mmap = (multiboot_memory_map_t*) mb_info->mmap_addr;
multiboot_memory_map_t* mmap_end = (void*) ((size_t) mb_info->mmap_addr + mb_info->mmap_length); multiboot_memory_map_t* mmap_end = (void*) ((size_t) mb_info->mmap_addr + mb_info->mmap_length);
@ -800,7 +805,7 @@ int arch_paging_init(void)
kprintf("Map FPGA regsiters at 0x%x\n", viraddr); kprintf("Map FPGA regsiters at 0x%x\n", viraddr);
#endif #endif
// enable paging /* enable paging */
write_cr3((uint32_t) &boot_pgd); write_cr3((uint32_t) &boot_pgd);
i = read_cr0(); i = read_cr0();
i = i | (1 << 31); i = i | (1 << 31);
@ -817,7 +822,10 @@ int arch_paging_init(void)
bootinfo->addr = viraddr; bootinfo->addr = viraddr;
#endif #endif
// we turned on paging => now, we are able to register our task /*
* we turned on paging
* => now, we are able to register our task
*/
register_task(); register_task();
// APIC registers into the kernel address space // APIC registers into the kernel address space

File diff suppressed because it is too large Load diff

View file

@ -20,7 +20,7 @@
#include <metalsvm/stddef.h> #include <metalsvm/stddef.h>
#include <metalsvm/stdio.h> #include <metalsvm/stdio.h>
#include <metalsvm/stdlib.h> #include <metalsvm/stdlib.h>
#include <metalsvm/memory.h> #include <metalsvm/mmu.h>
#include <metalsvm/tasks.h> #include <metalsvm/tasks.h>
#include <metalsvm/page.h> #include <metalsvm/page.h>
#include <metalsvm/errno.h> #include <metalsvm/errno.h>

View file

@ -70,7 +70,7 @@ static ssize_t socket_write(fildes_t* file, uint8_t* buffer, size_t size)
return -ENOMEM; return -ENOMEM;
memcpy(tmp, buffer, size); memcpy(tmp, buffer, size);
ret = lwip_write(file->offset, tmp, size); ret = lwip_write(file->offset, tmp, size);
kfree(tmp); kfree(tmp, size);
#endif #endif
if (ret < 0) if (ret < 0)
ret = -errno; ret = -errno;
@ -147,7 +147,7 @@ int socket_init(vfs_node_t* node, const char* name)
} while(blist); } while(blist);
kfree(new_node); kfree(new_node, sizeof(vfs_node_t));
return -ENOMEM; return -ENOMEM;
} }

View file

@ -83,8 +83,15 @@ static ssize_t stdio_read(fildes_t* file, uint8_t* buffer, size_t size)
static ssize_t stdio_write(fildes_t* file, uint8_t* buffer, size_t size) static ssize_t stdio_write(fildes_t* file, uint8_t* buffer, size_t size)
{ {
int i; int i;
for (i = 0; i<size; i++, buffer++) for (i = 0; i<size; i++, buffer++) {
#ifdef CONFIG_VGA
vga_putchar(*buffer);
#elif defined(CONFIG_UART)
uart_putchar(*buffer);
#else
kputchar(*buffer); kputchar(*buffer);
#endif
}
file->offset += size; file->offset += size;
return size; return size;
@ -145,7 +152,7 @@ int null_init(vfs_node_t* node, const char* name)
} while(blist); } while(blist);
kfree(new_node); kfree(new_node, sizeof(vfs_node_t));
return -ENOMEM; return -ENOMEM;
} }
@ -204,7 +211,7 @@ int stdin_init(vfs_node_t* node, const char* name)
} while(blist); } while(blist);
kfree(new_node); kfree(new_node, sizeof(vfs_node_t));
return -ENOMEM; return -ENOMEM;
} }
@ -263,7 +270,7 @@ int stdout_init(vfs_node_t* node, const char* name)
} while(blist); } while(blist);
kfree(new_node); kfree(new_node, sizeof(vfs_node_t));
return -ENOMEM; return -ENOMEM;
} }
@ -322,7 +329,7 @@ int stderr_init(vfs_node_t* node, const char* name)
} while(blist); } while(blist);
kfree(new_node); kfree(new_node, sizeof(vfs_node_t));
return -ENOMEM; return -ENOMEM;
} }

24
fs/fs.c
View file

@ -132,30 +132,6 @@ int close_fs(fildes_t* file)
return ret; return ret;
} }
void list_fs(vfs_node_t* node, uint32_t depth)
{
int j, i = 0;
dirent_t* dirent = NULL;
fildes_t* file = kmalloc(sizeof(fildes_t));
file->offset = 0;
file->flags = 0;
while ((dirent = readdir_fs(node, i)) != 0) {
kprintf("%*c|- %s\n", 2*depth, ' ', dirent->name);
if (strcmp(dirent->name, ".") && strcmp(dirent->name, "..")) {
vfs_node_t *new_node = finddir_fs(node, dirent->name);
if (new_node) {
kprintf("%*c\\\n", 2*depth, ' ');
list_fs(new_node, depth + 1);
}
}
i++;
}
kfree(file);
}
struct dirent* readdir_fs(vfs_node_t * node, uint32_t index) struct dirent* readdir_fs(vfs_node_t * node, uint32_t index)
{ {
struct dirent* ret = NULL; struct dirent* ret = NULL;

View file

@ -210,7 +210,7 @@ static int initrd_open(fildes_t* file, const char* name)
if (file->node->type == FS_FILE) { if (file->node->type == FS_FILE) {
if ((file->flags & O_CREAT) && (file->flags & O_EXCL)) if ((file->flags & O_CREAT) && (file->flags & O_EXCL))
return -EEXIST; return -EEXIST;
/* in the case of O_TRUNC kfree all the nodes */ /* in the case of O_TRUNC kfree all the nodes */
if (file->flags & O_TRUNC) { if (file->flags & O_TRUNC) {
uint32_t i; uint32_t i;
@ -221,7 +221,8 @@ static int initrd_open(fildes_t* file, const char* name)
/* the first blist pointer have do remain valid. */ /* the first blist pointer have do remain valid. */
for(i=0; i<MAX_DATABLOCKS && !data; i++) { for(i=0; i<MAX_DATABLOCKS && !data; i++) {
if (blist->data[i]) { if (blist->data[i]) {
kfree(blist->data[i]); kfree(blist->data[i],
sizeof(data_block_t));
} }
} }
if (blist->next) { if (blist->next) {
@ -233,12 +234,12 @@ static int initrd_open(fildes_t* file, const char* name)
do { do {
for(i=0; i<MAX_DATABLOCKS && !data; i++) { for(i=0; i<MAX_DATABLOCKS && !data; i++) {
if (blist->data[i]) { if (blist->data[i]) {
kfree(blist->data[i]); kfree(blist->data[i], sizeof(data_block_t));
} }
} }
lastblist = blist; lastblist = blist;
blist = blist->next; blist = blist->next;
kfree(lastblist); kfree(lastblist, sizeof(block_list_t));
} while(blist); } while(blist);
} }
@ -252,7 +253,7 @@ static int initrd_open(fildes_t* file, const char* name)
/* opendir was called: */ /* opendir was called: */
if (name[0] == '\0') if (name[0] == '\0')
return 0; return 0;
/* open file was called: */ /* open file was called: */
if (!(file->flags & O_CREAT)) if (!(file->flags & O_CREAT))
return -ENOENT; return -ENOENT;
@ -263,11 +264,11 @@ static int initrd_open(fildes_t* file, const char* name)
vfs_node_t* new_node = kmalloc(sizeof(vfs_node_t)); vfs_node_t* new_node = kmalloc(sizeof(vfs_node_t));
if (BUILTIN_EXPECT(!new_node, 0)) if (BUILTIN_EXPECT(!new_node, 0))
return -EINVAL; return -EINVAL;
blist = &file->node->block_list; blist = &file->node->block_list;
dir_block_t* dir_block; dir_block_t* dir_block;
dirent_t* dirent; dirent_t* dirent;
memset(new_node, 0x00, sizeof(vfs_node_t)); memset(new_node, 0x00, sizeof(vfs_node_t));
new_node->type = FS_FILE; new_node->type = FS_FILE;
new_node->read = &initrd_read; new_node->read = &initrd_read;
@ -285,7 +286,7 @@ static int initrd_open(fildes_t* file, const char* name)
if (!dirent->vfs_node) { if (!dirent->vfs_node) {
dirent->vfs_node = new_node; dirent->vfs_node = new_node;
strncpy(dirent->name, (char*) name, MAX_FNAME); strncpy(dirent->name, (char*) name, MAX_FNAME);
goto exit_create_file; // TODO: there might be a better Solution goto exit_create_file; // there might be a better Solution ***************
} }
} }
} }
@ -424,9 +425,9 @@ static vfs_node_t* initrd_mkdir(vfs_node_t* node, const char* name)
blist = blist->next; blist = blist->next;
} while(blist); } while(blist);
kfree(dir_block); kfree(dir_block, sizeof(dir_block_t));
out: out:
kfree(new_node); kfree(new_node, sizeof(vfs_node_t));
return NULL; return NULL;
} }

View file

@ -34,14 +34,14 @@ extern "C" {
#define PAGE_SHIFT 12 #define PAGE_SHIFT 12
#define CACHE_LINE 64 #define CACHE_LINE 64
#define MAILBOX_SIZE 32 #define MAILBOX_SIZE 32
#define TIMER_FREQ 100 // in HZ #define TIMER_FREQ 100 /* in HZ */
#define CLOCK_TICK_RATE 1193182 // 8254 chip's internal oscillator frequency #define CLOCK_TICK_RATE 1193182 /* 8254 chip's internal oscillator frequency */
#define INT_SYSCALL 0x80 #define INT_SYSCALL 0x80
#define KERNEL_SPACE (1*1024*1024*1024) #define KERNEL_SPACE (1*1024*1024*1024)
#define VIDEO_MEM_ADDR 0xB8000 // the video memory address #define VIDEO_MEM_ADDR 0xB8000 // the video memora address
#define SMP_SETUP_ADDR 0x07000 #define SMP_SETUP_ADDR 0x07000
#define UART_PORT 0x3F8 // 0x2F8 for SCC
#define BYTE_ORDER LITTLE_ENDIAN #define BYTE_ORDER LITTLE_ENDIAN
/* /*
* address space / (page_size * sizeof(uint8_t)) * address space / (page_size * sizeof(uint8_t))
@ -52,7 +52,7 @@ extern "C" {
#define CONFIG_PCI #define CONFIG_PCI
#define CONFIG_LWIP #define CONFIG_LWIP
#define CONFIG_VGA #define CONFIG_VGA
#define CONFIG_UART //#define CONFIG_UART
#define CONFIG_KEYBOARD #define CONFIG_KEYBOARD
#define CONFIG_MULTIBOOT #define CONFIG_MULTIBOOT
//#define CONFIG_ROCKCREEK //#define CONFIG_ROCKCREEK
@ -72,7 +72,7 @@ extern "C" {
//#define SHMADD //#define SHMADD
#define SHMDBG #define SHMDBG
//#define SHMADD_CACHEABLE //#define SHMADD_CACHEABLE
#define SCC_BOOTINFO 0x80000 #define SCC_BOOTINFO 0x80000
#define BUILTIN_EXPECT(exp, b) __builtin_expect((exp), (b)) #define BUILTIN_EXPECT(exp, b) __builtin_expect((exp), (b))
//#define BUILTIN_EXPECT(exp, b) (exp) //#define BUILTIN_EXPECT(exp, b) (exp)

View file

@ -242,9 +242,6 @@ int open_fs(fildes_t* file, const char* fname);
/** @brief Yet to be documented */ /** @brief Yet to be documented */
int close_fs(fildes_t * file); int close_fs(fildes_t * file);
/** @brief List a filesystem hirachically */
void list_fs(vfs_node_t* node, uint32_t depth);
/** @brief Get dir entry at index /** @brief Get dir entry at index
* @param node VFS node to get dir entry from * @param node VFS node to get dir entry from
* @param index Index position of desired dir entry * @param index Index position of desired dir entry

View file

@ -1,72 +0,0 @@
/*
* Copyright 2010 Steffen Vogel, Chair for Operating Systems,
* RWTH Aachen University
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* This file is part of MetalSVM.
*/
#ifndef __MALLOC_H__
#define __MALLOC_H__
#include <metalsvm/stddef.h>
#ifdef __cplusplus
extern "C" {
#endif
/// Binary exponent of maximal size for kmalloc()
#define BUDDY_MAX 32 // 4 GB
/// Binary exponent of minimal buddy size
#define BUDDY_MIN 3 // 8 Byte >= sizeof(buddy_t)
/// Binary exponent of the size which we allocate with buddy_fill()
#define BUDDY_ALLOC 15 // 32 KByte >= PAGE_SIZE
#define BUDDY_LISTS (BUDDY_MAX-BUDDY_MIN+1)
#define BUDDY_MAGIC 0xBABE
union buddy;
/** @brief Buddy
*
* Every free memory block is stored in a linked list according to its size.
* We can use this free memory to store store this buddy_t union which represents
* this block (the buddy_t union is alligned to the front).
* Therefore the address of the buddy_t union is equal with the address
* of the underlying free memory block.
*
* Every allocated memory block is prefixed with its binary size exponent and
* a known magic number. This prefix is hidden by the user because its located
* before the actual memory address returned by kmalloc()
*/
typedef union buddy {
/// Pointer to the next buddy in the linked list.
union buddy* next;
struct {
/// The binary exponent of the block size
uint8_t exponent;
/// Must be equal to BUDDY_MAGIC for a valid memory block
uint16_t magic;
} prefix;
} buddy_t;
/** @brief Dump free buddies */
void buddy_dump(void);
#ifdef __cplusplus
}
#endif
#endif

View file

@ -31,6 +31,7 @@
#include <metalsvm/stddef.h> #include <metalsvm/stddef.h>
#include <asm/atomic.h> #include <asm/atomic.h>
//#include <asm/mmu.h>
#ifdef __cplusplus #ifdef __cplusplus
extern "C" { extern "C" {
@ -49,54 +50,33 @@ extern atomic_int32_t total_available_pages;
*/ */
int mmu_init(void); int mmu_init(void);
/** @brief Get continuous pages /** @brief get continuous pages
* *
* Use first fit algorithm to find a suitable, continous physical memory region * This function finds a continuous page region (first fit algorithm)
*
* @param no_pages Desired number of pages
* *
* @param npages Desired number of pages
* @return * @return
* - physical address on success * - physical address on success
* - 0 on failure * - 0 on failure
*/ */
size_t get_pages(uint32_t npages); size_t get_pages(uint32_t no_pages);
/** @brief Get a single page /** @brief get a single page
* *
* Convenience function: uses get_pages(1); * Convenience function: uses get_pages(1);
*/ */
static inline size_t get_page(void) { return get_pages(1); } static inline size_t get_page(void) { return get_pages(1); }
/** @brief Put back a sequence of continous pages /** @brief Put back a page after use
* *
* @param phyaddr Physical address of the first page * @param phyaddr Physical address to put back
* @param npages Number of pages
* *
* @return number of pages which were marked as used before calling * @return
*/
int put_pages(size_t phyaddr, size_t npages);
/** @brief Put a single page
*
* Convenience function: uses put_pages(1);
*/
static inline int put_page(size_t phyaddr) { return put_pages(phyaddr, 1); }
/** @brief Copy a physical page frames
*
* @param psrc physical address of source page frames
* @param pdest physical address of source page frames
* @param npages number of pages
* @return
* - 0 on success * - 0 on success
* - -1 on failure * - -EINVAL (-22) on failure
*/ */
int copy_pages(size_t pdest, size_t psrc, size_t npages); int put_page(size_t phyaddr);
/** @brief Copy a single page
*
* Convenience function: uses copy_pages(pdest, psrc, 1);
*/
static inline int copy_page(size_t pdest, size_t psrc) { return copy_pages(pdest, psrc, 1); }
#ifdef __cplusplus #ifdef __cplusplus
} }

View file

@ -29,7 +29,10 @@
#include <metalsvm/stddef.h> #include <metalsvm/stddef.h>
#include <asm/page.h> #include <asm/page.h>
/** @brief Sets up the environment, page directories etc and enables paging. */ /**
* Sets up the environment, page directories etc and
* enables paging.
*/
static inline int paging_init(void) { return arch_paging_init(); } static inline int paging_init(void) { return arch_paging_init(); }
#endif #endif

View file

@ -28,10 +28,14 @@
extern "C" { extern "C" {
#endif #endif
#define NULL ((void*) 0) #define NULL ((void*) 0)
typedef unsigned int tid_t; typedef unsigned int tid_t;
#define PAGE_SIZE (1 << PAGE_SHIFT)
#define PAGE_MASK ~(PAGE_SIZE - 1)
#define PAGE_ALIGN(addr) (((addr) + PAGE_SIZE - 1) & PAGE_MASK)
#if MAX_CORES == 1 #if MAX_CORES == 1
#define per_core(name) name #define per_core(name) name
#define DECLARE_PER_CORE(type, name) extern type name; #define DECLARE_PER_CORE(type, name) extern type name;
@ -62,10 +66,10 @@ typedef unsigned int tid_t;
irq_nested_enable(flags);\ irq_nested_enable(flags);\
return ret; \ return ret; \
} }
#define CORE_ID smp_id() #define CORE_ID smp_id()
#endif #endif
// needed to find the task, which is currently running on this core /* needed to find the task, which is currently running on this core */
struct task; struct task;
DECLARE_PER_CORE(struct task*, current_task); DECLARE_PER_CORE(struct task*, current_task);

View file

@ -29,66 +29,72 @@
#ifndef __STDLIB_H__ #ifndef __STDLIB_H__
#define __STDLIB_H__ #define __STDLIB_H__
#include <metalsvm/stddef.h> #include <metalsvm/config.h>
#include <metalsvm/tasks_types.h>
#include <asm/stddef.h>
#ifdef __cplusplus #ifdef __cplusplus
extern "C" { extern "C" {
#endif #endif
#define MAP_KERNEL_SPACE (1 << 0)
#define MAP_NO_ACCESS (1 << 0) #define MAP_USER_SPACE (1 << 1)
#define MAP_READ_ONLY (1 << 1) #define MAP_PAGE_TABLE (1 << 2)
#define MAP_USER_SPACE (1 << 2) #define MAP_NO_CACHE (1 << 3)
#define MAP_CODE (1 << 3) #define MAP_WT (1 << 5)
#define MAP_WT (1 << 4) #define MAP_CODE (1 << 6)
#define MAP_NO_CACHE (1 << 5) #define MAP_READONLY (1 << 7)
#define MAP_MPE (1 << 6) #ifdef CONFIG_ROCKCREEK
#define MAP_SVM_STRONG (1 << 7) #define MAP_MPE (1 << 8)
#define MAP_SVM_LAZYRELEASE (1 << 8) #endif
#define MAP_SVM_INIT (1 << 9) #define MAP_SVM_STRONG (1 << 9)
#define MAP_SVM_LAZYRELEASE (1 << 10)
#define MAP_KERNEL_SPACE (0 << 2) // legacy compatibility #define MAP_SVM_INIT (1 << 11)
#define MAP_REMAP (1 << 12) #define MAP_NO_ACCESS (1 << 12)
//#define MAP_NON_CONTINUOUS (1 << 13) // TODO #define MAP_REMAP (1 << 13)
void NORETURN abort(void); void NORETURN abort(void);
/** @brief General page allocator function /** @brief Kernel's memory allocator function.
* *
* This function allocates and maps whole pages. * This will just call mem_allocation with
* To avoid fragmentation you should use kmalloc() and kfree()! * the flags MAP_KERNEL_SPACE and MAP_HEAP.
*
* @return Pointer to the new memory range
*/
void* kmalloc(size_t);
/** @brief Kernel's more general memory allocator function.
*
* This function lets you choose flags for the newly allocated memory.
* *
* @param sz Desired size of the new memory * @param sz Desired size of the new memory
* @param flags Flags to for map_region(), vma_add() * @param flags Flags to specify
* *
* @return Pointer to the new memory range * @return Pointer to the new memory range
*/ */
void* palloc(size_t sz, uint32_t flags); void* mem_allocation(size_t sz, uint32_t flags);
/** @brief Free general kernel memory /** @brief Free memory
* *
* The pmalloc() doesn't track how much memory was allocated for which pointer, * The kernel malloc doesn't track how
* much memory was allocated for which pointer,
* so you have to specify how much memory shall be freed. * so you have to specify how much memory shall be freed.
*
* @param sz The size which should freed
*/ */
void pfree(void* addr, size_t sz); void kfree(void*, size_t);
/** @brief The memory allocator function /** @brief Create a new stack for a new task
* *
* This allocator uses a buddy system to manage free memory. * @return start address of the new stack
*
* @return Pointer to the new memory range
*/ */
void* kmalloc(size_t sz); void* create_stack(void);
/** @brief The memory free function /** @brief Delete stack of a finished task
* *
* Releases memory allocated by malloc() * @param addr Pointer to the stack
* * @return 0 on success
* @param addr The address to the memory block allocated by malloc()
*/ */
void kfree(void* addr); int destroy_stack(task_t* addr);
/** @brief String to long /** @brief String to long
* *
@ -107,7 +113,7 @@ unsigned long strtoul(const char* nptr, char** endptr, int base);
*/ */
static inline int atoi(const char *str) static inline int atoi(const char *str)
{ {
return (int)strtol(str, (char **) NULL, 10); return (int)strtol(str, (char **)NULL, 10);
} }
#ifdef __cplusplus #ifdef __cplusplus

View file

@ -147,7 +147,9 @@ tid_t wait(int32_t* result);
*/ */
void update_load(void); void update_load(void);
/** @brief Print the current cpu load */ /** @brief Print the current cpu load
*
*/
void dump_load(void); void dump_load(void);
#if MAX_CORES > 1 #if MAX_CORES > 1
@ -199,7 +201,9 @@ int block_current_task(void);
*/ */
int set_timer(uint64_t deadline); int set_timer(uint64_t deadline);
/** @brief check is a timer is expired */ /** @brief check is a timer is expired
*
*/
void check_timers(void); void check_timers(void);
/** @brief Abort current task */ /** @brief Abort current task */

View file

@ -36,7 +36,6 @@
#include <metalsvm/mailbox_types.h> #include <metalsvm/mailbox_types.h>
#include <asm/tasks_types.h> #include <asm/tasks_types.h>
#include <asm/atomic.h> #include <asm/atomic.h>
#include <asm/page.h>
#ifdef __cplusplus #ifdef __cplusplus
extern "C" { extern "C" {
@ -63,6 +62,7 @@ extern "C" {
#define TASK_L2 (1 << 3) #define TASK_L2 (1 << 3)
typedef int (*entry_point_t)(void*); typedef int (*entry_point_t)(void*);
struct page_dir;
/** @brief The task_t structure */ /** @brief The task_t structure */
typedef struct task { typedef struct task {
@ -86,12 +86,12 @@ typedef struct task {
struct task* prev; struct task* prev;
/// last core id on which the task was running /// last core id on which the task was running
uint32_t last_core; uint32_t last_core;
/// usage in number of pages (including page map tables) /// usage in number of pages
atomic_int32_t user_usage; atomic_int32_t user_usage;
/// locks access to all page maps with PG_USER flag set /// avoids concurrent access to the page directory
spinlock_irqsave_t page_lock; spinlock_irqsave_t pgd_lock;
/// pointer to page directory (32bit) or page map level 4 (64bit) table respectively /// pointer to the page directory
page_map_t* page_map; struct page_dir* pgd;
/// lock for the VMA_list /// lock for the VMA_list
spinlock_t vma_lock; spinlock_t vma_lock;
/// list of VMAs /// list of VMAs
@ -100,12 +100,14 @@ typedef struct task {
filp_t* fildes_table; filp_t* fildes_table;
/// starting time/tick of the task /// starting time/tick of the task
uint64_t start_tick; uint64_t start_tick;
/// the userspace heap /// start address of the heap
vma_t* heap; size_t start_heap;
/// end address of the heap
size_t end_heap;
/// LwIP error code /// LwIP error code
int lwip_err; int lwip_err;
/// mail inbox /// mail inbox
mailbox_wait_msg_t inbox; mailbox_wait_msg_t inbox;
/// mail outbox array /// mail outbox array
mailbox_wait_msg_t* outbox[MAX_TASKS]; mailbox_wait_msg_t* outbox[MAX_TASKS];
/// FPU state /// FPU state

View file

@ -27,101 +27,56 @@
#define __VMA_H__ #define __VMA_H__
#include <metalsvm/stddef.h> #include <metalsvm/stddef.h>
#include <asm/page.h>
#ifdef __cplusplus #ifdef __cplusplus
extern "C" { extern "C" {
#endif #endif
/// Read access to this VMA is allowed
#define VMA_READ (1 << 0) #define VMA_READ (1 << 0)
/// Write access to this VMA is allowed
#define VMA_WRITE (1 << 1) #define VMA_WRITE (1 << 1)
/// Instructions fetches in this VMA are allowed
#define VMA_EXECUTE (1 << 2) #define VMA_EXECUTE (1 << 2)
/// This VMA is cacheable
#define VMA_CACHEABLE (1 << 3) #define VMA_CACHEABLE (1 << 3)
/// This VMA is not accessable #define VMA_NOACCESS (1 << 4)
#define VMA_NO_ACCESS (1 << 4)
/// This VMA should be part of the userspace
#define VMA_USER (1 << 5)
/// A collection of flags used for the kernel heap (kmalloc)
#define VMA_HEAP (VMA_READ|VMA_WRITE|VMA_CACHEABLE)
// boundaries for VAS allocation
#define VMA_KERN_MIN PAGE_SIZE // we skip the first page
#define VMA_KERN_MAX KERNEL_SPACE
#define VMA_USER_MAX PAGE_MAP_PGT
struct vma; struct vma;
/** @brief VMA structure definition /** @brief VMA structure definition */
*
* Each item in this linked list marks a used part of the virtual address space.
* Its used by vm_alloc() to find holes between them.
*/
typedef struct vma { typedef struct vma {
/// Start address of the memory area /// Start address of the memory area
size_t start; size_t start;
/// End address of the memory area /// End address of the memory area
size_t end; size_t end;
/// Type flags field /// Type flags field
uint32_t flags; uint32_t type;
/// Pointer of next VMA element in the list /// Pointer of next VMA element in the list
struct vma* next; struct vma* next;
/// Pointer to previous VMA element in the list /// Pointer to previous VMA element in the list
struct vma* prev; struct vma* prev;
} vma_t; } vma_t;
/** @brief Add a new virtual memory area to the list of VMAs /** @brief Add a new virtual memory region to the list of VMAs
* *
* @param start Start address of the new area * @param task Pointer to the task_t structure of the task
* @param end End address of the new area * @param start Start address of the new region
* @param flags Type flags the new area shall have * @param end End address of the new region
* @param type Type flags the new region shall have
* *
* @return * @return
* - 0 on success * - 0 on success
* - -EINVAL (-22) or -EINVAL (-12) on failure * - -EINVAL (-22) or -EINVAL (-12) on failure
*/ */
int vma_add(size_t start, size_t end, uint32_t flags); int vma_add(struct task* task, size_t start, size_t end, uint32_t type);
/** @brief Search for a free memory area /** @brief Dump information about this task's VMAs into the terminal.
* *
* @param size Size of requestes VMA in bytes * This will print out Start, end and flags for each VMA in the task's list
* @param flags
* @return Type flags the new area shall have
* - 0 on failure
* - the start address of a free area
*/
size_t vma_alloc(size_t size, uint32_t flags);
/** @brief Free an allocated memory area
* *
* @param start Start address of the area to be freed * @param task The task's task_t structure
* @param end End address of the to be freed
* @return * @return
* - 0 on success * - 0 on success
* - -EINVAL (-22) on failure * - -EINVAL (-22) on failure
*/ */
int vma_free(size_t start, size_t end); int vma_dump(struct task* task);
/** @brief Free all virtual memory areas
*
* @return
* - 0 on success
*/
int drop_vma_list();
/** @brief Copy the VMA list of the current task to task
*
* @param task The task where the list should be copied to
* @return
* - 0 on success
*/
int copy_vma_list(struct task* task);
/** @brief Dump information about this task's VMAs into the terminal. */
void vma_dump();
#ifdef __cplusplus #ifdef __cplusplus
} }

View file

@ -63,7 +63,7 @@ extern const void bss_end;
int lowlevel_init(void) int lowlevel_init(void)
{ {
// initialize .bss section // initialize .bss section
memset((char*) &bss_start, 0x00, (char*) &bss_end - (char*) &bss_start); memset((void*)&bss_start, 0x00, ((size_t) &bss_end - (size_t) &bss_start));
koutput_init(); koutput_init();
@ -238,6 +238,49 @@ int network_shutdown(void)
return 0; return 0;
} }
#if 0
static void list_fs(vfs_node_t* node, uint32_t depth)
{
int j, i = 0;
dirent_t* dirent = NULL;
fildes_t* file = kmalloc(sizeof(fildes_t));
file->offset = 0;
file->flags = 0;
while ((dirent = readdir_fs(node, i)) != 0) {
for(j=0; j<depth; j++)
kputs(" ");
kprintf("%s\n", dirent->name);
if (strcmp(dirent->name, ".") && strcmp(dirent->name, "..")) {
vfs_node_t *new_node = finddir_fs(node, dirent->name);
if (new_node) {
if (new_node->type == FS_FILE) {
char buff[16] = {[0 ... 15] = 0x00};
file->node = new_node;
file->offset = 0;
file->flags = 0;
read_fs(file, (uint8_t*)buff, 8);
for(j=0; j<depth+1; j++)
kputs(" ");
kprintf("content: %s\n", buff);
} else list_fs(new_node, depth+1);
}
}
i++;
}
kfree(file, sizeof(fildes_t));
}
static void list_root(void) {
kprintf("List of the file system:\n/\n");
list_fs(fs_root, 1);
}
#endif
int initd(void* arg) int initd(void* arg)
{ {
#ifdef CONFIG_LWIP #ifdef CONFIG_LWIP
@ -274,13 +317,9 @@ int initd(void* arg)
#endif #endif
#endif #endif
#if 1 // list_root();
kputs("Filesystem:\n");
list_fs(fs_root, 1);
#endif
test_init(); test_init();
return 0; return 0;
} }

View file

@ -21,7 +21,7 @@
#include <metalsvm/stdio.h> #include <metalsvm/stdio.h>
#include <metalsvm/string.h> #include <metalsvm/string.h>
#include <metalsvm/time.h> #include <metalsvm/time.h>
#include <metalsvm/memory.h> #include <metalsvm/mmu.h>
#include <metalsvm/tasks.h> #include <metalsvm/tasks.h>
#include <metalsvm/processor.h> #include <metalsvm/processor.h>
#include <metalsvm/errno.h> #include <metalsvm/errno.h>
@ -29,7 +29,6 @@
#include <metalsvm/fs.h> #include <metalsvm/fs.h>
#include <asm/irq.h> #include <asm/irq.h>
#include <asm/irqflags.h> #include <asm/irqflags.h>
#include <asm/page.h>
#include <asm/kb.h> #include <asm/kb.h>
#ifdef CONFIG_ROCKCREEK #ifdef CONFIG_ROCKCREEK
#include <asm/icc.h> #include <asm/icc.h>
@ -72,9 +71,8 @@ int main(void)
pushbg(COL_BLUE); pushbg(COL_BLUE);
kprintf("This is MetalSVM %s Build %u, %u\n", kprintf("This is MetalSVM %s Build %u, %u\n",
METALSVM_VERSION, &__BUILD_DATE, &__BUILD_TIME); METALSVM_VERSION, &__BUILD_DATE, &__BUILD_TIME);
popbg(); popbg();
system_init(); system_init();
irq_init(); irq_init();
timer_init(); timer_init();
@ -87,7 +85,7 @@ int main(void)
icc_init(); icc_init();
svm_init(); svm_init();
#endif #endif
initrd_init(); initrd_init();
irq_enable(); irq_enable();
@ -103,10 +101,9 @@ int main(void)
disable_timer_irq(); disable_timer_irq();
#endif #endif
sleep(2); sleep(5);
create_kernel_task(&id, initd, NULL, NORMAL_PRIO); create_kernel_task(&id, initd, NULL, NORMAL_PRIO);
kprintf("Create initd with id %u\n", id); kprintf("Create initd with id %u\n", id);
reschedule(); reschedule();
while(1) { while(1) {

View file

@ -105,11 +105,11 @@ static int sys_open(const char* name, int flags, int mode)
/* file doesn't exist! */ /* file doesn't exist! */
if (check < 0) { if (check < 0) {
/* tidy up the fildescriptor */ /* tidy up the fildescriptor */
kfree(curr_task->fildes_table[fd]); kfree(curr_task->fildes_table[fd], sizeof(fildes_t));
curr_task->fildes_table[fd] = NULL; curr_task->fildes_table[fd] = NULL;
return check; return check;
} }
return fd; return fd;
} }
@ -196,7 +196,7 @@ static int sys_socket(int domain, int type, int protocol)
/* file doesn't exist! */ /* file doesn't exist! */
if (curr_task->fildes_table[fd]->node == NULL) { if (curr_task->fildes_table[fd]->node == NULL) {
/* tidy up the fildescriptor */ /* tidy up the fildescriptor */
kfree(curr_task->fildes_table[fd]); kfree(curr_task->fildes_table[fd], sizeof(fildes_t));
curr_task->fildes_table[fd] = NULL; curr_task->fildes_table[fd] = NULL;
return -ENOENT; return -ENOENT;
} }
@ -236,11 +236,11 @@ static int sys_accept(int s, struct sockaddr* addr, socklen_t* addrlen)
curr_task->fildes_table[fd]->offset = sock2; curr_task->fildes_table[fd]->offset = sock2;
curr_task->fildes_table[fd]->count = 1; curr_task->fildes_table[fd]->count = 1;
curr_task->fildes_table[fd]->node = findnode_fs("/dev/socket"); curr_task->fildes_table[fd]->node = findnode_fs("/dev/socket");
/* file doesn't exist! */ /* file doesn't exist! */
if (curr_task->fildes_table[fd]->node == NULL) { if (curr_task->fildes_table[fd]->node == NULL) {
/* tidy up the fildescriptor */ /* tidy up the fildescriptor */
kfree(curr_task->fildes_table[fd]); kfree(curr_task->fildes_table[fd], sizeof(fildes_t));
curr_task->fildes_table[fd] = NULL; curr_task->fildes_table[fd] = NULL;
return -ENOENT; return -ENOENT;
} }
@ -273,7 +273,7 @@ static int sys_close(int fd)
/* close command failed -> return check = errno */ /* close command failed -> return check = errno */
if (BUILTIN_EXPECT(check < 0, 0)) if (BUILTIN_EXPECT(check < 0, 0))
return check; return check;
kfree(curr_task->fildes_table[fd]); kfree(curr_task->fildes_table[fd], sizeof(fildes_t));
curr_task->fildes_table[fd] = NULL; curr_task->fildes_table[fd] = NULL;
} else { } else {
curr_task->fildes_table[fd]->count--; curr_task->fildes_table[fd]->count--;
@ -356,7 +356,7 @@ static int sys_dup(int fd)
* free the memory which was allocated in get_fildes() * free the memory which was allocated in get_fildes()
* cause will link it to another existing memory space * cause will link it to another existing memory space
*/ */
kfree(curr_task->fildes_table[new_fd]); kfree(curr_task->fildes_table[new_fd], sizeof(fildes_t));
/* and link it to another existing memory space */ /* and link it to another existing memory space */
curr_task->fildes_table[new_fd] = curr_task->fildes_table[fd]; curr_task->fildes_table[new_fd] = curr_task->fildes_table[fd];
@ -381,7 +381,7 @@ static int sys_dup2(int fd, int fd2)
/* If fd and fd2 are equal, then dup2() just returns fd2 */ /* If fd and fd2 are equal, then dup2() just returns fd2 */
if (fd == fd2) if (fd == fd2)
return fd2; return fd2;
/* /*
* if descriptor fd2 is already in use, it is first deallocated * if descriptor fd2 is already in use, it is first deallocated
* as if a close(2) call had been done first * as if a close(2) call had been done first
@ -398,32 +398,30 @@ static int sys_dup2(int fd, int fd2)
static int sys_sbrk(int incr) static int sys_sbrk(int incr)
{ {
task_t* task = per_core(current_task); task_t* task = per_core(current_task);
vma_t* heap = task->heap; vma_t* tmp = NULL;
int ret; int ret;
spinlock_lock(&task->vma_lock); spinlock_lock(&task->vma_lock);
if (BUILTIN_EXPECT(!heap,0 )) { tmp = task->vma_list;
kprintf("sys_sbrk: missing heap!\n"); while(tmp && !((task->end_heap >= tmp->start) && (task->end_heap <= tmp->end)))
abort(); tmp = tmp->next;
}
ret = heap->end; ret = (int) task->end_heap;
heap->end += incr; task->end_heap += incr;
if (heap->end < heap->start) if (task->end_heap < task->start_heap)
heap->end = heap->start; task->end_heap = task->start_heap;
// allocation and mapping of new pages for the heap // resize virtual memory area
// is catched by the pagefault handler if (tmp && (tmp->end <= task->end_heap))
tmp->end = task->end_heap;
kprintf("sys_sbrk: task = %d, heap->start = %#lx, heap->end = %#lx, incr = %#4x\n", task->id, heap->start, heap->end, incr); // TOD0: remove
spinlock_unlock(&task->vma_lock); spinlock_unlock(&task->vma_lock);
return ret; return ret;
} }
int syscall_handler(size_t sys_nr, ...) int syscall_handler(uint32_t sys_nr, ...)
{ {
int ret = -EINVAL; int ret = -EINVAL;
va_list vl; va_list vl;
@ -502,7 +500,7 @@ int syscall_handler(size_t sys_nr, ...)
break; break;
case __NR_wait: { case __NR_wait: {
int32_t* status = va_arg(vl, int32_t*); int32_t* status = va_arg(vl, int32_t*);
ret = wait(status); ret = wait(status);
break; break;
} }
@ -551,7 +549,7 @@ int syscall_handler(size_t sys_nr, ...)
ret = -ENOTSOCK; ret = -ENOTSOCK;
break; break;
} }
//kprintf("lwip_connect: %p with lenght %i and Socket %i", name, namelen, per_core(current_task)->fildes_table[fd].offset); // TODO: remove //kprintf("lwip_connect: %p with lenght %i and Socket %i", name, namelen, per_core(current_task)->fildes_table[fd].offset);
ret = lwip_connect(per_core(current_task)->fildes_table[fd]->offset, name, namelen); ret = lwip_connect(per_core(current_task)->fildes_table[fd]->offset, name, namelen);
@ -603,7 +601,7 @@ int syscall_handler(size_t sys_nr, ...)
} }
#endif #endif
default: default:
kprintf("syscall_handler: invalid system call %u\n", sys_nr); kputs("invalid system call\n");
ret = -ENOSYS; ret = -ENOSYS;
break; break;
}; };

View file

@ -30,7 +30,7 @@
#include <metalsvm/stdlib.h> #include <metalsvm/stdlib.h>
#include <metalsvm/string.h> #include <metalsvm/string.h>
#include <metalsvm/errno.h> #include <metalsvm/errno.h>
#include <metalsvm/memory.h> #include <metalsvm/mmu.h>
#include <metalsvm/page.h> #include <metalsvm/page.h>
#include <metalsvm/tasks.h> #include <metalsvm/tasks.h>
#include <metalsvm/processor.h> #include <metalsvm/processor.h>
@ -47,27 +47,26 @@
* A task's id will be its position in this array. * A task's id will be its position in this array.
*/ */
static task_t task_table[MAX_TASKS] = { \ static task_t task_table[MAX_TASKS] = { \
[0] = {0, TASK_IDLE, NULL, NULL, 0, 0, 0, NULL, NULL, 0, ATOMIC_INIT(0), SPINLOCK_IRQSAVE_INIT, 0, SPINLOCK_INIT, NULL, NULL, 0, NULL}, \ [0] = {0, TASK_IDLE, NULL, NULL, 0, 0, 0, NULL, NULL, 0, ATOMIC_INIT(0), SPINLOCK_IRQSAVE_INIT, NULL, SPINLOCK_INIT, NULL, NULL, 0, 0, 0, 0}, \
[1 ... MAX_TASKS-1] = {0, TASK_INVALID, NULL, NULL, 0, 0, 0, NULL, NULL, 0, ATOMIC_INIT(0), SPINLOCK_IRQSAVE_INIT, 0, SPINLOCK_INIT, NULL, NULL, 0, NULL} [1 ... MAX_TASKS-1] = {0, TASK_INVALID, NULL, NULL, 0, 0, 0, NULL, NULL, 0, ATOMIC_INIT(0), SPINLOCK_IRQSAVE_INIT, NULL, SPINLOCK_INIT, NULL, NULL, 0, 0, 0, 0}};
};
static spinlock_irqsave_t table_lock = SPINLOCK_IRQSAVE_INIT; static spinlock_irqsave_t table_lock = SPINLOCK_IRQSAVE_INIT;
#ifndef CONFIG_TICKLESS #ifndef CONFIG_TICKLESS
#if MAX_CORES > 1 #if MAX_CORES > 1
static runqueue_t runqueues[MAX_CORES] = { \ static runqueue_t runqueues[MAX_CORES] = { \
[0] = {task_table+0, NULL, 0, {[0 ... 2] = 0}, TIMER_FREQ/5, TIMER_FREQ/2, 0, {[0 ... MAX_PRIO-1] = {NULL, NULL}}, {NULL, NULL}, SPINLOCK_IRQSAVE_INIT}, \ [0] = {task_table+0, NULL, 0, {[0 ... 2] = 0}, TIMER_FREQ/5, TIMER_FREQ/2, 0, {[0 ... MAX_PRIO-1] = {NULL, NULL}}, {NULL, NULL}, SPINLOCK_IRQSAVE_INIT}, \
[1 ... MAX_CORES-1] = {NULL, NULL, 0, {[0 ... 2] = 0}, TIMER_FREQ/5, TIMER_FREQ/2, 0, {[0 ... MAX_PRIO-1] = {NULL, NULL}}, {NULL, NULL}, SPINLOCK_IRQSAVE_INIT}}; [1 ... MAX_CORES-1] = {NULL, NULL, 0, {[0 ... 2] = 0}, TIMER_FREQ/5, TIMER_FREQ/2, 0, {[0 ... MAX_PRIO-1] = {NULL, NULL}}, {NULL, NULL}, SPINLOCK_IRQSAVE_INIT}};
#else #else
static runqueue_t runqueues[1] = { \ static runqueue_t runqueues[1] = { \
[0] = {task_table+0, NULL, 0, {[0 ... 2] = 0}, TIMER_FREQ/5, TIMER_FREQ/2, 0, {[0 ... MAX_PRIO-1] = {NULL, NULL}}, {NULL, NULL}, SPINLOCK_IRQSAVE_INIT}}; [0] = {task_table+0, NULL, 0, {[0 ... 2] = 0}, TIMER_FREQ/5, TIMER_FREQ/2, 0, {[0 ... MAX_PRIO-1] = {NULL, NULL}}, {NULL, NULL}, SPINLOCK_IRQSAVE_INIT}};
#endif #endif
#else #else
#if MAX_CORES > 1 #if MAX_CORES > 1
static runqueue_t runqueues[MAX_CORES] = { \ static runqueue_t runqueues[MAX_CORES] = { \
[0] = {task_table+0, NULL, 0, 0, {[0 ... MAX_PRIO-1] = {NULL, NULL}}, {NULL, NULL}, SPINLOCK_IRQSAVE_INIT}, \ [0] = {task_table+0, NULL, 0, 0, {[0 ... MAX_PRIO-1] = {NULL, NULL}}, {NULL, NULL}, SPINLOCK_IRQSAVE_INIT}, \
[1 ... MAX_CORES-1] = {NULL, NULL, 0, 0, {[0 ... MAX_PRIO-1] = {NULL, NULL}}, {NULL, NULL}, SPINLOCK_IRQSAVE_INIT}}; [1 ... MAX_CORES-1] = {NULL, NULL, 0, 0, {[0 ... MAX_PRIO-1] = {NULL, NULL}}, {NULL, NULL}, SPINLOCK_IRQSAVE_INIT}};
#else #else
static runqueue_t runqueues[1] = { \ static runqueue_t runqueues[1] = { \
[0] = {task_table+0, NULL, 0, 0, {[0 ... MAX_PRIO-1] = {NULL, NULL}}, {NULL, NULL}, SPINLOCK_IRQSAVE_INIT}}; [0] = {task_table+0, NULL, 0, 0, {[0 ... MAX_PRIO-1] = {NULL, NULL}}, {NULL, NULL}, SPINLOCK_IRQSAVE_INIT}};
#endif #endif
#endif #endif
@ -79,7 +78,6 @@ DEFINE_PER_CORE(task_t*, current_task, task_table+0);
extern const void boot_stack; extern const void boot_stack;
/** @brief helper function for the assembly code to determine the current task /** @brief helper function for the assembly code to determine the current task
*
* @return Pointer to the task_t structure of current task * @return Pointer to the task_t structure of current task
*/ */
task_t* get_current_task(void) { task_t* get_current_task(void) {
@ -98,37 +96,6 @@ uint32_t get_highest_priority(void)
return msb(runqueues[CORE_ID].prio_bitmap); return msb(runqueues[CORE_ID].prio_bitmap);
} }
/** @brief Create a new stack for a new task
*
* @return start address of the new stack
*/
static void* create_stack(void)
{
/*
* TODO: our stack should be non-executable!
* We need this atm because nested functions in page64.c
* are using trampolines on the stack.
*/
return palloc(KERNEL_STACK_SIZE, MAP_CODE);
}
/** @brief Delete stack of a finished task
*
* @param addr Pointer to the stack
* @return
* - 0 on success
* - -EINVAL on failure
*/
static int destroy_stack(task_t* task)
{
if (BUILTIN_EXPECT(!task || !task->stack, 0))
return -EINVAL;
pfree(task->stack, KERNEL_STACK_SIZE);
return 0;
}
int multitasking_init(void) { int multitasking_init(void) {
if (BUILTIN_EXPECT(task_table[0].status != TASK_IDLE, 0)) { if (BUILTIN_EXPECT(task_table[0].status != TASK_IDLE, 0)) {
kputs("Task 0 is not an idle task\n"); kputs("Task 0 is not an idle task\n");
@ -137,7 +104,7 @@ int multitasking_init(void) {
mailbox_wait_msg_init(&task_table[0].inbox); mailbox_wait_msg_init(&task_table[0].inbox);
memset(task_table[0].outbox, 0x00, sizeof(mailbox_wait_msg_t*)*MAX_TASKS); memset(task_table[0].outbox, 0x00, sizeof(mailbox_wait_msg_t*)*MAX_TASKS);
task_table[0].page_map = get_boot_page_map(); task_table[0].pgd = get_boot_pgd();
task_table[0].flags = TASK_DEFAULT_FLAGS; task_table[0].flags = TASK_DEFAULT_FLAGS;
task_table[0].prio = IDLE_PRIO; task_table[0].prio = IDLE_PRIO;
task_table[0].stack = (void*) &boot_stack; task_table[0].stack = (void*) &boot_stack;
@ -161,7 +128,7 @@ size_t get_idle_task(uint32_t id)
atomic_int32_set(&task_table[id].user_usage, 0); atomic_int32_set(&task_table[id].user_usage, 0);
mailbox_wait_msg_init(&task_table[id].inbox); mailbox_wait_msg_init(&task_table[id].inbox);
memset(task_table[id].outbox, 0x00, sizeof(mailbox_wait_msg_t*)*MAX_TASKS); memset(task_table[id].outbox, 0x00, sizeof(mailbox_wait_msg_t*)*MAX_TASKS);
task_table[id].page_map = get_boot_page_map(); task_table[id].pgd = get_boot_pgd();
current_task[id].var = task_table+id; current_task[id].var = task_table+id;
runqueues[id].idle = task_table+id; runqueues[id].idle = task_table+id;
@ -226,8 +193,10 @@ static void wakeup_blocked_tasks(int result)
spinlock_irqsave_unlock(&table_lock); spinlock_irqsave_unlock(&table_lock);
} }
/** @brief A procedure to be called by procedures which are called by exiting tasks. */ /** @brief A procedure to be called by
* procedures which are called by exiting tasks. */
static void NORETURN do_exit(int arg) { static void NORETURN do_exit(int arg) {
vma_t* tmp;
task_t* curr_task = per_core(current_task); task_t* curr_task = per_core(current_task);
uint32_t flags, core_id, fd, status; uint32_t flags, core_id, fd, status;
@ -235,17 +204,17 @@ static void NORETURN do_exit(int arg) {
for (fd = 0; fd < NR_OPEN; fd++) { for (fd = 0; fd < NR_OPEN; fd++) {
if(curr_task->fildes_table[fd] != NULL) { if(curr_task->fildes_table[fd] != NULL) {
/* /*
* Delete a descriptor from the per-process object * delete a descriptor from the per-process object
* reference table. If this is not the last reference to the underlying * reference table. If this is not the last reference to the underlying
* object, the object will be ignored. * object, the object will be ignored.
*/ */
if (curr_task->fildes_table[fd]->count == 1) { if (curr_task->fildes_table[fd]->count == 1) {
// try to close the file /* try to close the file */
status = close_fs(curr_task->fildes_table[fd]); status = close_fs(curr_task->fildes_table[fd]);
// close command failed -> return check = errno /* close command failed -> return check = errno */
if (BUILTIN_EXPECT(status < 0, 0)) if (BUILTIN_EXPECT(status < 0, 0))
kprintf("Task %u was not able to close file descriptor %i. close_fs returned %d", curr_task->id, fd, -status); kprintf("Task %u was not able to close file descriptor %i. close_fs returned %d", curr_task->id, fd, -status);
kfree(curr_task->fildes_table[fd]); kfree(curr_task->fildes_table[fd], sizeof(fildes_t));
curr_task->fildes_table[fd] = NULL; curr_task->fildes_table[fd] = NULL;
} else { } else {
curr_task->fildes_table[fd]->count--; curr_task->fildes_table[fd]->count--;
@ -253,33 +222,37 @@ static void NORETURN do_exit(int arg) {
} }
} }
} }
//finally the table has to be cleared.
kfree(curr_task->fildes_table); // finally the table has to be cleared kfree(curr_task->fildes_table, sizeof(filp_t)*NR_OPEN);
} }
kprintf("Terminate task: %u, return value %d\n", curr_task->id, arg); kprintf("Terminate task: %u, return value %d\n", curr_task->id, arg);
wakeup_blocked_tasks(arg); wakeup_blocked_tasks(arg);
flags = irq_nested_disable();
drop_vma_list(); //vma_dump(curr_task);
spinlock_lock(&curr_task->vma_lock);
/* // remove memory regions
* This marks all userpages as free. Nevertheless they are still existing while((tmp = curr_task->vma_list) != NULL) {
* and used by the MMU until the task finishes. Therefore we need to disable kfree((void*) tmp->start, tmp->end - tmp->start + 1);
* context switching by disabling interrupts (see above)! We may also make use curr_task->vma_list = tmp->next;
* of the TLB and global kernel pages. kfree((void*) tmp, sizeof(vma_t));
*/ }
drop_page_map();
#if 1 spinlock_unlock(&curr_task->vma_lock);
drop_pgd(); // delete page directory and its page tables
#if 0
if (atomic_int32_read(&curr_task->user_usage)) if (atomic_int32_read(&curr_task->user_usage))
kprintf("Memory leak! Task %d did not release %d pages\n", kprintf("Memory leak! Task %d did not release %d pages\n",
curr_task->id, atomic_int32_read(&curr_task->user_usage)); curr_task->id, atomic_int32_read(&curr_task->user_usage));
#endif #endif
curr_task->status = TASK_FINISHED; curr_task->status = TASK_FINISHED;
// decrease the number of active tasks // decrease the number of active tasks
flags = irq_nested_disable();
core_id = CORE_ID; core_id = CORE_ID;
spinlock_irqsave_lock(&runqueues[core_id].lock); spinlock_irqsave_lock(&runqueues[core_id].lock);
runqueues[core_id].nr_tasks--; runqueues[core_id].nr_tasks--;
@ -289,7 +262,9 @@ static void NORETURN do_exit(int arg) {
reschedule(); reschedule();
kprintf("Kernel panic: scheduler on core %d found no valid task\n", CORE_ID); kprintf("Kernel panic: scheduler on core %d found no valid task\n", CORE_ID);
while(1) HALT; while(1) {
HALT;
}
} }
/** @brief A procedure to be called by kernel tasks */ /** @brief A procedure to be called by kernel tasks */
@ -325,7 +300,6 @@ void NORETURN abort(void) {
static int create_task(tid_t* id, entry_point_t ep, void* arg, uint8_t prio, uint32_t core_id) static int create_task(tid_t* id, entry_point_t ep, void* arg, uint8_t prio, uint32_t core_id)
{ {
task_t* curr_task; task_t* curr_task;
task_t* new_task = NULL;
int ret = -ENOMEM; int ret = -ENOMEM;
uint32_t i; uint32_t i;
@ -345,72 +319,64 @@ static int create_task(tid_t* id, entry_point_t ep, void* arg, uint8_t prio, uin
#endif #endif
{ {
core_id = CORE_ID; core_id = CORE_ID;
kprintf("create_task: invalid core id! Set id to %u!\n", core_id); kprintf("Inavlid core id! Set id to %u!\n", core_id);
} }
curr_task = per_core(current_task); curr_task = per_core(current_task);
// search free entry in task table
for(i=0; i<MAX_TASKS; i++) { for(i=0; i<MAX_TASKS; i++) {
if (task_table[i].status == TASK_INVALID) { if (task_table[i].status == TASK_INVALID) {
new_task = &task_table[i]; atomic_int32_set(&task_table[i].user_usage, 0);
ret = create_pgd(task_table+i, 0);
if (ret < 0) {
ret = -ENOMEM;
goto create_task_out;
}
task_table[i].id = i;
task_table[i].status = TASK_READY;
task_table[i].last_stack_pointer = NULL;
task_table[i].stack = create_stack();
task_table[i].flags = TASK_DEFAULT_FLAGS;
task_table[i].prio = prio;
task_table[i].last_core = 0;
spinlock_init(&task_table[i].vma_lock);
task_table[i].vma_list = NULL;
task_table[i].fildes_table = NULL;
mailbox_wait_msg_init(&task_table[i].inbox);
memset(task_table[i].outbox, 0x00, sizeof(mailbox_wait_msg_t*)*MAX_TASKS);
task_table[i].outbox[curr_task->id] = &curr_task->inbox;
if (id)
*id = i;
ret = create_default_frame(task_table+i, ep, arg);
task_table[i].start_heap = 0;
task_table[i].end_heap = 0;
task_table[i].lwip_err = 0;
task_table[i].start_tick = get_clock_tick();
// add task in the runqueue
spinlock_irqsave_lock(&runqueues[core_id].lock);
runqueues[core_id].prio_bitmap |= (1 << prio);
runqueues[core_id].nr_tasks++;
if (!runqueues[core_id].queue[prio-1].first) {
task_table[i].next = task_table[i].prev = NULL;
runqueues[core_id].queue[prio-1].first = task_table+i;
runqueues[core_id].queue[prio-1].last = task_table+i;
} else {
task_table[i].prev = runqueues[core_id].queue[prio-1].last;
task_table[i].next = NULL;
runqueues[core_id].queue[prio-1].last->next = task_table+i;
runqueues[core_id].queue[prio-1].last = task_table+i;
}
spinlock_irqsave_unlock(&runqueues[core_id].lock);
break; break;
} }
} }
if (BUILTIN_EXPECT(!new_task, 0)) { create_task_out:
ret = -ENOMEM;
goto out;
}
atomic_int32_set(&new_task->user_usage, 0);
ret = copy_page_map(new_task, 0);
if (ret < 0) {
ret = -ENOMEM;
goto out;
}
new_task->id = i;
new_task->status = TASK_READY;
new_task->last_stack_pointer = NULL;
new_task->stack = create_stack();
new_task->flags = TASK_DEFAULT_FLAGS;
new_task->prio = prio;
new_task->last_core = 0;
spinlock_init(&new_task->vma_lock);
new_task->vma_list = NULL;
new_task->fildes_table = NULL;
mailbox_wait_msg_init(&new_task->inbox);
memset(new_task->outbox, 0x00, sizeof(mailbox_wait_msg_t*)*MAX_TASKS);
new_task->outbox[curr_task->id] = &curr_task->inbox;
if (id)
*id = i;
ret = create_default_frame(new_task, ep, arg);
new_task->lwip_err = 0;
new_task->start_tick = get_clock_tick();
// add task in the runqueue
spinlock_irqsave_lock(&runqueues[core_id].lock);
runqueues[core_id].prio_bitmap |= (1 << prio);
runqueues[core_id].nr_tasks++;
if (!runqueues[core_id].queue[prio-1].first) {
new_task->next = new_task->prev = NULL;
runqueues[core_id].queue[prio-1].first = new_task;
runqueues[core_id].queue[prio-1].last = new_task;
}
else {
new_task->prev = runqueues[core_id].queue[prio-1].last;
new_task->next = NULL;
runqueues[core_id].queue[prio-1].last->next = new_task;
runqueues[core_id].queue[prio-1].last = new_task;
}
spinlock_irqsave_unlock(&runqueues[core_id].lock);
out:
spinlock_irqsave_unlock(&table_lock); spinlock_irqsave_unlock(&table_lock);
return ret; return ret;
@ -421,97 +387,109 @@ int sys_fork(void)
int ret = -ENOMEM; int ret = -ENOMEM;
unsigned int i, core_id, fd_i; unsigned int i, core_id, fd_i;
task_t* parent_task = per_core(current_task); task_t* parent_task = per_core(current_task);
task_t* child_task = NULL; vma_t** child;
vma_t* parent;
vma_t* tmp;
spinlock_lock(&parent_task->vma_lock);
spinlock_irqsave_lock(&table_lock); spinlock_irqsave_lock(&table_lock);
core_id = CORE_ID; core_id = CORE_ID;
// search free entry in task_table
for(i=0; i<MAX_TASKS; i++) { for(i=0; i<MAX_TASKS; i++) {
if (task_table[i].status == TASK_INVALID) { if (task_table[i].status == TASK_INVALID) {
child_task = &task_table[i]; atomic_int32_set(&task_table[i].user_usage, 0);
ret = create_pgd(task_table+i, 1);
if (ret < 0) {
ret = -ENOMEM;
goto create_task_out;
}
task_table[i].id = i;
task_table[i].last_stack_pointer = NULL;
task_table[i].stack = create_stack();
spinlock_init(&task_table[i].vma_lock);
// copy VMA list
child = &task_table[i].vma_list;
parent = parent_task->vma_list;
tmp = NULL;
while(parent) {
*child = (vma_t*) kmalloc(sizeof(vma_t));
if (BUILTIN_EXPECT(!child, 0))
break;
(*child)->start = parent->start;
(*child)->end = parent->end;
(*child)->type = parent->type;
(*child)->prev = tmp;
(*child)->next = NULL;
parent = parent->next;
tmp = *child;
child = &((*child)->next);
}
/* init fildes_table */
task_table[i].fildes_table = kmalloc(sizeof(filp_t)*NR_OPEN);
memcpy(task_table[i].fildes_table, parent_task->fildes_table, sizeof(filp_t)*NR_OPEN);
for (fd_i = 0; fd_i < NR_OPEN; fd_i++)
if ((task_table[i].fildes_table[fd_i]) != NULL)
task_table[i].fildes_table[fd_i]->count++;
mailbox_wait_msg_init(&task_table[i].inbox);
memset(task_table[i].outbox, 0x00, sizeof(mailbox_wait_msg_t*)*MAX_TASKS);
task_table[i].outbox[parent_task->id] = &parent_task->inbox;
task_table[i].flags = parent_task->flags;
memcpy(&(task_table[i].fpu), &(parent_task->fpu), sizeof(union fpu_state));
task_table[i].start_tick = get_clock_tick();
task_table[i].start_heap = 0;
task_table[i].end_heap = 0;
task_table[i].lwip_err = 0;
task_table[i].prio = parent_task->prio;
task_table[i].last_core = parent_task->last_core;
// add task in the runqueue
spinlock_irqsave_lock(&runqueues[core_id].lock);
runqueues[core_id].prio_bitmap |= (1 << parent_task->prio);
runqueues[core_id].nr_tasks++;
if (!runqueues[core_id].queue[parent_task->prio-1].first) {
task_table[i].next = task_table[i].prev = NULL;
runqueues[core_id].queue[parent_task->prio-1].first = task_table+i;
runqueues[core_id].queue[parent_task->prio-1].last = task_table+i;
} else {
task_table[i].prev = runqueues[core_id].queue[parent_task->prio-1].last;
task_table[i].next = NULL;
runqueues[core_id].queue[parent_task->prio-1].last->next = task_table+i;
runqueues[core_id].queue[parent_task->prio-1].last = task_table+i;
}
spinlock_irqsave_unlock(&runqueues[core_id].lock);
ret = arch_fork(task_table+i);
if (parent_task != per_core(current_task)) {
// Oh, the current task is the new child task!
// Leave the function without releasing the locks
// because the locks are already released
// by the parent task!
return 0;
}
if (!ret) {
task_table[i].status = TASK_READY;
ret = i;
}
break; break;
} }
} }
if (BUILTIN_EXPECT(!child_task, 0)) { create_task_out:
ret = -ENOMEM;
goto out;
}
atomic_int32_set(&child_task->user_usage, 0);
ret = copy_page_map(child_task, 1);
if (ret < 0) {
ret = -ENOMEM;
goto out;
}
ret = copy_vma_list(child_task);
if (BUILTIN_EXPECT(!ret, 0)) {
ret = -ENOMEM;
goto out;
}
child_task->id = i;
child_task->last_stack_pointer = NULL;
child_task->stack = create_stack();
// init fildes_table
child_task->fildes_table = kmalloc(sizeof(filp_t)*NR_OPEN);
memcpy(child_task->fildes_table, parent_task->fildes_table, sizeof(filp_t)*NR_OPEN);
for (fd_i=0; fd_i<NR_OPEN; fd_i++) {
if ((child_task->fildes_table[fd_i]) != NULL)
child_task->fildes_table[fd_i]->count++;
}
// init mailbox
mailbox_wait_msg_init(&child_task->inbox);
memset(child_task->outbox, 0x00, sizeof(mailbox_wait_msg_t*)*MAX_TASKS);
child_task->outbox[parent_task->id] = &parent_task->inbox;
child_task->flags = parent_task->flags;
memcpy(&child_task->fpu, &parent_task->fpu, sizeof(union fpu_state));
child_task->start_tick = get_clock_tick();
child_task->lwip_err = 0;
child_task->prio = parent_task->prio;
child_task->last_core = parent_task->last_core;
// add task in the runqueue
spinlock_irqsave_lock(&runqueues[core_id].lock);
runqueues[core_id].prio_bitmap |= (1 << parent_task->prio);
runqueues[core_id].nr_tasks++;
if (!runqueues[core_id].queue[parent_task->prio-1].first) {
child_task->next = child_task->prev = NULL;
runqueues[core_id].queue[parent_task->prio-1].first = child_task;
runqueues[core_id].queue[parent_task->prio-1].last = child_task;
}
else {
child_task->prev = runqueues[core_id].queue[parent_task->prio-1].last;
child_task->next = NULL;
runqueues[core_id].queue[parent_task->prio-1].last->next = child_task;
runqueues[core_id].queue[parent_task->prio-1].last = child_task;
}
spinlock_irqsave_unlock(&runqueues[core_id].lock);
ret = arch_fork(child_task);
if (parent_task != per_core(current_task))
/*
* Oh, the current task is the new child task!
* Leave the function without releasing the locks
* because the locks are already released by the parent task!
*/
return 0;
if (!ret) {
child_task->status = TASK_READY;
ret = i;
}
out:
spinlock_irqsave_unlock(&table_lock); spinlock_irqsave_unlock(&table_lock);
spinlock_unlock(&parent_task->vma_lock);
return ret; return ret;
} }
@ -537,7 +515,7 @@ static int kernel_entry(void* args)
ret = kernel_args->func(kernel_args->args); ret = kernel_args->func(kernel_args->args);
kfree(kernel_args); kfree(kernel_args, sizeof(kernel_args_t));
return ret; return ret;
} }
@ -583,15 +561,16 @@ static int load_task(load_args_t* largs)
{ {
uint32_t i, offset, idx, fd_i; uint32_t i, offset, idx, fd_i;
uint32_t addr, npages, flags; uint32_t addr, npages, flags;
size_t stack = 0, heap = 0; size_t stack = 0;
elf_header_t header; elf_header_t header;
elf_program_header_t prog_header; elf_program_header_t prog_header;
//elf_section_header_t sec_header; //elf_section_header_t sec_header;
fildes_t *file = kmalloc(sizeof(fildes_t)); // TODO: kfree is missing! ///!!! kfree is missing!
fildes_t *file = kmalloc(sizeof(fildes_t));
file->offset = 0; file->offset = 0;
file->flags = 0; file->flags = 0;
// TODO: init the hole fildes_t struct! //TODO: init the hole fildes_t struct!
task_t* curr_task = per_core(current_task); task_t* curr_task = per_core(current_task);
int err; int err;
@ -602,22 +581,22 @@ static int load_task(load_args_t* largs)
if (!file->node) if (!file->node)
return -EINVAL; return -EINVAL;
// init fildes_table /* init fildes_table */
spinlock_irqsave_lock(&table_lock); spinlock_irqsave_lock(&table_lock);
if (!curr_task->fildes_table) { if (!task_table[curr_task->id].fildes_table) {
curr_task->fildes_table = kmalloc(sizeof(filp_t)*NR_OPEN); task_table[curr_task->id].fildes_table = kmalloc(sizeof(filp_t)*NR_OPEN);
if (BUILTIN_EXPECT(!curr_task->fildes_table, 0)) { if (BUILTIN_EXPECT(!task_table[curr_task->id].fildes_table, 0)) {
spinlock_irqsave_unlock(&table_lock); spinlock_irqsave_unlock(&table_lock);
return -ENOMEM; return -ENOMEM;
} }
memset(curr_task->fildes_table, 0x00, sizeof(filp_t)*NR_OPEN); memset(task_table[curr_task->id].fildes_table, 0x00, sizeof(filp_t)*NR_OPEN);
for (fd_i = 0; fd_i < 3; fd_i++) { for (fd_i = 0; fd_i < 3; fd_i++) {
curr_task->fildes_table[fd_i] = kmalloc(sizeof(fildes_t)); task_table[curr_task->id].fildes_table[fd_i] = kmalloc(sizeof(fildes_t));
curr_task->fildes_table[fd_i]->count = 1; task_table[curr_task->id].fildes_table[fd_i]->count = 1;
} }
curr_task->fildes_table[0]->node = findnode_fs("/dev/stdin"); task_table[curr_task->id].fildes_table[0]->node = findnode_fs("/dev/stdin");
curr_task->fildes_table[1]->node = findnode_fs("/dev/stdout"); task_table[curr_task->id].fildes_table[1]->node = findnode_fs("/dev/stdout");
curr_task->fildes_table[2]->node = findnode_fs("/dev/stderr"); task_table[curr_task->id].fildes_table[2]->node = findnode_fs("/dev/stderr");
} }
spinlock_irqsave_unlock(&table_lock); spinlock_irqsave_unlock(&table_lock);
@ -638,43 +617,43 @@ static int load_task(load_args_t* largs)
if (BUILTIN_EXPECT(header.ident._class != ELF_CLASS_32, 0)) if (BUILTIN_EXPECT(header.ident._class != ELF_CLASS_32, 0))
goto invalid; goto invalid;
#elif defined(CONFIG_X86_64) #else
if (BUILTIN_EXPECT(header.machine != ELF_EM_X86_64, 0)) if (BUILTIN_EXPECT(header.machine != ELF_EM_X86_64, 0))
goto invalid; goto invalid;
if (BUILTIN_EXPECT(header.ident._class != ELF_CLASS_64, 0)) if (BUILTIN_EXPECT(header.ident._class != ELF_CLASS_64, 0))
goto invalid; goto invalid;
#else
#error "unknown arch"
#endif #endif
if (BUILTIN_EXPECT(header.ident.data != ELF_DATA_2LSB, 0)) if (BUILTIN_EXPECT(header.ident.data != ELF_DATA_2LSB, 0))
goto invalid; goto invalid;
if (header.entry < KERNEL_SPACE) if (header.entry <= KERNEL_SPACE)
goto invalid; goto invalid;
// interpret program header table // interpret program header table
for (i=0; i<header.ph_entry_count; i++) { for (i=0; i<header.ph_entry_count; i++) {
file->offset = header.ph_offset+i*header.ph_entry_size; file->offset = header.ph_offset+i*header.ph_entry_size;
if (read_fs(file, (uint8_t*) &prog_header, sizeof(elf_program_header_t)) == 0) { if (read_fs(file, (uint8_t*)&prog_header, sizeof(elf_program_header_t)) == 0) {
kprintf("Could not read programm header!\n"); kprintf("Could not read programm header!\n");
continue; continue;
} }
switch(prog_header.type) { switch(prog_header.type)
{
case ELF_PT_LOAD: // load program segment case ELF_PT_LOAD: // load program segment
if (!prog_header.virt_addr) if (!prog_header.virt_addr)
continue; continue;
npages = PAGE_FLOOR(prog_header.mem_size) >> PAGE_BITS; npages = (prog_header.mem_size >> PAGE_SHIFT);
if (prog_header.mem_size & (PAGE_SIZE-1))
npages++;
addr = get_pages(npages); addr = get_pages(npages);
flags = MAP_USER_SPACE; flags = MAP_USER_SPACE;
if (prog_header.flags & PF_X) if (prog_header.flags & PF_X)
flags |= MAP_CODE; flags |= MAP_CODE;
if (!(prog_header.flags & PF_W))
flags |= MAP_READ_ONLY;
// map page frames in the address space of the current task // map page frames in the address space of the current task
if (!map_region(prog_header.virt_addr, addr, npages, flags)) { if (!map_region(prog_header.virt_addr, addr, npages, flags)) {
@ -683,30 +662,35 @@ static int load_task(load_args_t* largs)
} }
// clear pages // clear pages
memset((void*) prog_header.virt_addr, 0x00, npages * PAGE_SIZE); memset((void*) prog_header.virt_addr, 0x00, npages*PAGE_SIZE);
// update heap location // set starting point of the heap
if (heap < prog_header.virt_addr + prog_header.mem_size) if (curr_task->start_heap < prog_header.virt_addr+prog_header.mem_size)
heap = prog_header.virt_addr+prog_header.mem_size; curr_task->start_heap = curr_task->end_heap = prog_header.virt_addr+prog_header.mem_size;
// load program // load program
file->offset = prog_header.offset; file->offset = prog_header.offset;
read_fs(file, (uint8_t*) prog_header.virt_addr, prog_header.file_size); read_fs(file, (uint8_t*)prog_header.virt_addr, prog_header.file_size);
flags = VMA_CACHEABLE | VMA_USER; flags = VMA_CACHEABLE;
if (prog_header.flags & PF_R) if (prog_header.flags & PF_R)
flags |= VMA_READ; flags |= VMA_READ;
if (prog_header.flags & PF_W) if (prog_header.flags & PF_W)
flags |= VMA_WRITE; flags |= VMA_WRITE;
if (prog_header.flags & PF_X) if (prog_header.flags & PF_X)
flags |= VMA_EXECUTE; flags |= VMA_EXECUTE;
vma_add(curr_task, prog_header.virt_addr, prog_header.virt_addr+npages*PAGE_SIZE-1, flags);
vma_add(prog_header.virt_addr, prog_header.virt_addr+npages*PAGE_SIZE, flags); if (!(prog_header.flags & PF_W))
change_page_permissions(prog_header.virt_addr, prog_header.virt_addr+npages*PAGE_SIZE-1, flags);
break; break;
case ELF_PT_GNU_STACK: // Indicates stack executability case ELF_PT_GNU_STACK: // Indicates stack executability
// create user-level stack // create user-level stack
npages = PAGE_FLOOR(DEFAULT_STACK_SIZE) >> PAGE_BITS; npages = DEFAULT_STACK_SIZE >> PAGE_SHIFT;
if (DEFAULT_STACK_SIZE & (PAGE_SIZE-1))
npages++;
addr = get_pages(npages); addr = get_pages(npages);
stack = header.entry*2; // virtual address of the stack stack = header.entry*2; // virtual address of the stack
@ -724,8 +708,7 @@ static int load_task(load_args_t* largs)
flags |= VMA_WRITE; flags |= VMA_WRITE;
if (prog_header.flags & PF_X) if (prog_header.flags & PF_X)
flags |= VMA_EXECUTE; flags |= VMA_EXECUTE;
vma_add(curr_task, stack, stack+npages*PAGE_SIZE-1, flags);
vma_add(stack, stack+npages*PAGE_SIZE, flags);
break; break;
} }
} }
@ -743,23 +726,8 @@ static int load_task(load_args_t* largs)
} }
#endif #endif
// setup heap
if (!curr_task->heap)
curr_task->heap = (vma_t*) kmalloc(sizeof(vma_t));
if (BUILTIN_EXPECT(!curr_task->heap || !heap, 0)) {
kprintf("load_task: heap is missing!\n");
return -ENOMEM;
}
curr_task->heap->flags = VMA_HEAP|VMA_USER;
curr_task->heap->start = heap;
curr_task->heap->end = heap;
// TODO: insert into list
if (BUILTIN_EXPECT(!stack, 0)) { if (BUILTIN_EXPECT(!stack, 0)) {
kprintf("load_task: stack is missing!\n"); kprintf("Stack is missing!\n");
return -ENOMEM; return -ENOMEM;
} }
@ -772,9 +740,9 @@ static int load_task(load_args_t* largs)
// push argv on the stack // push argv on the stack
offset -= largs->argc * sizeof(char*); offset -= largs->argc * sizeof(char*);
for (i=0; i<largs->argc; i++) { for(i=0; i<largs->argc; i++) {
((char**) (stack+offset))[i] = (char*) (stack+idx); ((char**) (stack+offset))[i] = (char*) (stack+idx);
while(((char*) stack)[idx] != '\0') while(((char*) stack)[idx] != '\0')
idx++; idx++;
idx++; idx++;
@ -782,7 +750,7 @@ static int load_task(load_args_t* largs)
// push env on the stack // push env on the stack
offset -= (largs->envc+1) * sizeof(char*); offset -= (largs->envc+1) * sizeof(char*);
for (i=0; i<largs->envc; i++) { for(i=0; i<largs->envc; i++) {
((char**) (stack+offset))[i] = (char*) (stack+idx); ((char**) (stack+offset))[i] = (char*) (stack+idx);
while(((char*) stack)[idx] != '\0') while(((char*) stack)[idx] != '\0')
@ -803,10 +771,10 @@ static int load_task(load_args_t* largs)
*((char***) (stack+offset)) = (char**) (stack + offset + 2*sizeof(char**) + (largs->envc+1) * sizeof(char*)); *((char***) (stack+offset)) = (char**) (stack + offset + 2*sizeof(char**) + (largs->envc+1) * sizeof(char*));
// push argc on the stack // push argc on the stack
offset -= sizeof(size_t); offset -= sizeof(int);
*((int*) (stack+offset)) = largs->argc; *((int*) (stack+offset)) = largs->argc;
kfree(largs); kfree(largs, sizeof(load_args_t));
// clear fpu state // clear fpu state
curr_task->flags &= ~(TASK_FPU_USED|TASK_FPU_INIT); curr_task->flags &= ~(TASK_FPU_USED|TASK_FPU_INIT);
@ -817,12 +785,12 @@ static int load_task(load_args_t* largs)
invalid: invalid:
kprintf("Invalid executable!\n"); kprintf("Invalid executable!\n");
kprintf("Magic number: 0x%x\n", (uint32_t) header.ident.magic); kprintf("magic number 0x%x\n", (uint32_t) header.ident.magic);
kprintf("Header type: 0x%x\n", (uint32_t) header.type); kprintf("header type 0x%x\n", (uint32_t) header.type);
kprintf("Machine type: 0x%x\n", (uint32_t) header.machine); kprintf("machine type 0x%x\n", (uint32_t) header.machine);
kprintf("ELF ident class: 0x%x\n", (uint32_t) header.ident._class); kprintf("elf ident class 0x%x\n", (uint32_t) header.ident._class);
kprintf("ELF ident data: 0x%x\n", header.ident.data); kprintf("elf identdata !0x%x\n", header.ident.data);
kprintf("Program entry point: 0x%x\n", (size_t) header.entry); kprintf("program entry point 0x%x\n", (size_t) header.entry);
return -EINVAL; return -EINVAL;
} }
@ -838,7 +806,7 @@ static int user_entry(void* arg)
ret = load_task((load_args_t*) arg); ret = load_task((load_args_t*) arg);
kfree(arg); kfree(arg, sizeof(load_args_t));
return ret; return ret;
} }
@ -856,6 +824,7 @@ static int user_entry(void* arg)
*/ */
int create_user_task_on_core(tid_t* id, const char* fname, char** argv, uint32_t core_id) int create_user_task_on_core(tid_t* id, const char* fname, char** argv, uint32_t core_id)
{ {
#ifdef CONFIG_X86_32
vfs_node_t* node; vfs_node_t* node;
int argc = 0; int argc = 0;
size_t i, buffer_size = 0; size_t i, buffer_size = 0;
@ -891,19 +860,24 @@ int create_user_task_on_core(tid_t* id, const char* fname, char** argv, uint32_t
while ((*dest++ = *src++) != 0); while ((*dest++ = *src++) != 0);
} }
// create new task /* create new task */
return create_task(id, user_entry, load_args, NORMAL_PRIO, core_id); return create_task(id, user_entry, load_args, NORMAL_PRIO, core_id);
#else
return -EINVAL;
#endif
} }
/** @brief Used by the execve-Systemcall */ /** @brief Used by the execve-Systemcall */
int sys_execve(const char* fname, char** argv, char** env) int sys_execve(const char* fname, char** argv, char** env)
{ {
vfs_node_t* node; vfs_node_t* node;
vma_t* tmp;
size_t i, buffer_size = 0; size_t i, buffer_size = 0;
load_args_t* load_args = NULL; load_args_t* load_args = NULL;
char *dest, *src; char *dest, *src;
int ret, argc = 0; int ret, argc = 0;
int envc = 0; int envc = 0;
task_t* curr_task = per_core(current_task);
node = findnode_fs((char*) fname); node = findnode_fs((char*) fname);
if (!node || !(node->type == FS_FILE)) if (!node || !(node->type == FS_FILE))
@ -946,8 +920,16 @@ int sys_execve(const char* fname, char** argv, char** env)
while ((*dest++ = *src++) != 0); while ((*dest++ = *src++) != 0);
} }
spinlock_lock(&curr_task->vma_lock);
// remove old program // remove old program
drop_vma_list(); while((tmp = curr_task->vma_list) != NULL) {
kfree((void*) tmp->start, tmp->end - tmp->start + 1);
curr_task->vma_list = tmp->next;
kfree((void*) tmp, sizeof(vma_t));
}
spinlock_unlock(&curr_task->vma_lock);
/* /*
* we use a trap gate to enter the kernel * we use a trap gate to enter the kernel
@ -958,7 +940,7 @@ int sys_execve(const char* fname, char** argv, char** env)
ret = load_task(load_args); ret = load_task(load_args);
kfree(load_args); kfree(load_args, sizeof(load_args_t));
return ret; return ret;
} }

View file

@ -34,7 +34,13 @@
#define VGA_EARLY_PRINT 1 #define VGA_EARLY_PRINT 1
#define UART_EARLY_PRINT 2 #define UART_EARLY_PRINT 2
#ifdef CONFIG_VGA
static uint32_t early_print = VGA_EARLY_PRINT;
#elif defined(CONFIG_UART)
static uint32_t early_print = UART_EARLY_PRINT;
#else
static uint32_t early_print = NO_EARLY_PRINT; static uint32_t early_print = NO_EARLY_PRINT;
#endif
static spinlock_irqsave_t olock = SPINLOCK_IRQSAVE_INIT; static spinlock_irqsave_t olock = SPINLOCK_IRQSAVE_INIT;
static atomic_int32_t kmsg_counter = ATOMIC_INIT(0); static atomic_int32_t kmsg_counter = ATOMIC_INIT(0);
static unsigned char kmessages[KMSG_SIZE] __attribute__ ((section(".kmsg"))) = {[0 ... KMSG_SIZE-1] = 0x00}; static unsigned char kmessages[KMSG_SIZE] __attribute__ ((section(".kmsg"))) = {[0 ... KMSG_SIZE-1] = 0x00};
@ -130,7 +136,7 @@ int kmsg_init(vfs_node_t * node, const char *name)
} }
} while (blist); } while (blist);
kfree(new_node); kfree(new_node, sizeof(vfs_node_t));
return -ENOMEM; return -ENOMEM;
} }
@ -139,10 +145,6 @@ int koutput_init(void)
{ {
#ifdef CONFIG_VGA #ifdef CONFIG_VGA
vga_init(); vga_init();
early_print |= VGA_EARLY_PRINT;
#endif
#ifdef CONFIG_UART
early_print |= UART_EARLY_PRINT;
#endif #endif
return 0; return 0;
@ -159,16 +161,12 @@ int kputchar(int c)
kmessages[pos % KMSG_SIZE] = (unsigned char) c; kmessages[pos % KMSG_SIZE] = (unsigned char) c;
#ifdef CONFIG_VGA #ifdef CONFIG_VGA
if (early_print & VGA_EARLY_PRINT) if (early_print == VGA_EARLY_PRINT)
vga_putchar(c); vga_putchar(c);
#endif #endif
#ifdef CONFIG_UART #ifdef CONFIG_UART
if (early_print & UART_EARLY_PRINT) { if (early_print == UART_EARLY_PRINT)
if (c == '\n')
uart_putchar('\r');
uart_putchar(c); uart_putchar(c);
}
#endif #endif
if (early_print != NO_EARLY_PRINT) if (early_print != NO_EARLY_PRINT)
@ -188,14 +186,11 @@ int kputs(const char *str)
pos = atomic_int32_inc(&kmsg_counter); pos = atomic_int32_inc(&kmsg_counter);
kmessages[pos % KMSG_SIZE] = str[i]; kmessages[pos % KMSG_SIZE] = str[i];
#ifdef CONFIG_VGA #ifdef CONFIG_VGA
if (early_print & VGA_EARLY_PRINT) if (early_print == VGA_EARLY_PRINT)
vga_putchar(str[i]); vga_putchar(str[i]);
#endif #endif
#ifdef CONFIG_UART #ifdef CONFIG_UART
if (early_print & UART_EARLY_PRINT) if (early_print == UART_EARLY_PRINT)
if (str[i] == '\n')
uart_putchar('\r');
uart_putchar(str[i]); uart_putchar(str[i]);
#endif #endif
} }

View file

@ -1,4 +1,4 @@
C_source := memory.c vma.c malloc.c C_source := memory.c vma.c
MODULE := mm MODULE := mm
include $(TOPDIR)/Makefile.inc include $(TOPDIR)/Makefile.inc

View file

@ -1,207 +0,0 @@
/*
* Copyright 2010 Steffen Vogel, Chair for Operating Systems,
* RWTH Aachen University
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* This file is part of MetalSVM.
*/
#include <metalsvm/malloc.h>
#include <metalsvm/spinlock.h>
#include <metalsvm/stdio.h>
#include <metalsvm/memory.h>
/// A linked list for each binary size exponent
static buddy_t* buddy_lists[BUDDY_LISTS] = { NULL };
/// Lock for the buddy lists
static spinlock_t buddy_lock = SPINLOCK_INIT;
/** @brief Check if larger free buddies are available */
static inline int buddy_large_avail(uint8_t exp)
{
while (exp<BUDDY_MAX && !buddy_lists[exp-BUDDY_MIN])
exp++;
return exp != BUDDY_MAX;
}
/** @brief Calculate the required buddy size */
static inline int buddy_exp(size_t sz)
{
int exp;
for (exp=0; sz>(1<<exp); exp++);
if (exp > BUDDY_MAX)
exp = 0;
if (exp < BUDDY_MIN)
exp = BUDDY_MIN;
return exp;
}
/** @brief Get a free buddy by potentially splitting a larger one */
static buddy_t* buddy_get(int exp)
{
spinlock_lock(&buddy_lock);
buddy_t** list = &buddy_lists[exp-BUDDY_MIN];
buddy_t* buddy = *list;
buddy_t* split;
if (buddy)
// there is already a free buddy =>
// we remove it from the list
*list = buddy->next;
else if (exp >= BUDDY_ALLOC && !buddy_large_avail(exp))
// theres no free buddy larger than exp =>
// we can allocate new memory
buddy = (buddy_t*) palloc(1<<exp, 0);
else {
// we recursivly request a larger buddy...
buddy = buddy_get(exp+1);
if (BUILTIN_EXPECT(!buddy, 0))
goto out;
// ... and split it, by putting the second half back to the list
split = (buddy_t*) ((size_t) buddy + (1<<exp));
split->next = *list;
*list = split;
}
out:
spinlock_unlock(&buddy_lock);
return buddy;
}
/** @brief Put a buddy back to its free list
*
* TODO: merge adjacent buddies (memory compaction)
*/
static void buddy_put(buddy_t* buddy)
{
spinlock_lock(&buddy_lock);
buddy_t** list = &buddy_lists[buddy->prefix.exponent-BUDDY_MIN];
buddy->next = *list;
*list = buddy;
spinlock_unlock(&buddy_lock);
}
void buddy_dump()
{
size_t free = 0;
int i;
for (i=0; i<BUDDY_LISTS; i++) {
buddy_t* buddy;
int exp = i+BUDDY_MIN;
if (buddy_lists[i])
kprintf("buddy_list[%u] (exp=%u, size=%lu bytes):\n", i, exp, 1<<exp);
for (buddy=buddy_lists[i]; buddy; buddy=buddy->next) {
kprintf(" %p -> %p \n", buddy, buddy->next);
free += 1<<exp;
}
}
kprintf("free buddies: %lu bytes\n", free);
}
void* palloc(size_t sz, uint32_t flags)
{
size_t phyaddr, viraddr;
uint32_t npages = PAGE_FLOOR(sz) >> PAGE_BITS;
kprintf("palloc(%lu) (%lu pages)\n", sz, npages); // TODO: remove
// get free virtual address space
viraddr = vma_alloc(npages*PAGE_SIZE, VMA_HEAP);
if (BUILTIN_EXPECT(!viraddr, 0))
return NULL;
// get continous physical pages
phyaddr = get_pages(npages);
if (BUILTIN_EXPECT(!phyaddr, 0)) {
vma_free(viraddr, viraddr+npages*PAGE_SIZE);
return NULL;
}
// map physical pages to VMA
viraddr = map_region(viraddr, phyaddr, npages, flags);
if (BUILTIN_EXPECT(!viraddr, 0)) {
vma_free(viraddr, viraddr+npages*PAGE_SIZE);
put_pages(phyaddr, npages);
return NULL;
}
return (void*) viraddr;
}
void pfree(void* addr, size_t sz)
{
if (BUILTIN_EXPECT(!addr || !sz, 0))
return;
size_t i;
size_t phyaddr;
size_t viraddr = (size_t) addr & PAGE_MASK;
uint32_t npages = PAGE_FLOOR(sz) >> PAGE_BITS;
// memory is propably not continously mapped!
for (i=0; i<npages; i++) {
phyaddr = virt_to_phys(viraddr+i*PAGE_SIZE);
put_page(phyaddr);
}
unmap_region(viraddr, npages);
vma_free(viraddr, viraddr+npages*PAGE_SIZE);
}
void* kmalloc(size_t sz)
{
if (BUILTIN_EXPECT(!sz, 0))
return NULL;
// add space for the prefix
sz += sizeof(buddy_t);
kprintf("kmalloc(%lu)\n", sz); // TODO: remove
int exp = buddy_exp(sz);
if (BUILTIN_EXPECT(!exp, 0))
return NULL;
buddy_t* buddy = buddy_get(exp);
if (BUILTIN_EXPECT(!buddy, 0))
return NULL;
// setup buddy prefix
buddy->prefix.magic = BUDDY_MAGIC;
buddy->prefix.exponent = exp;
// pointer arithmetic: we hide the prefix
return buddy+1;
}
void kfree(void *addr)
{
if (BUILTIN_EXPECT(!addr, 0))
return;
buddy_t* buddy = (buddy_t*) addr - 1; // get prefix
// check magic
if (BUILTIN_EXPECT(buddy->prefix.magic != BUDDY_MAGIC, 0))
return;
buddy_put(buddy);
}

View file

@ -20,7 +20,7 @@
#include <metalsvm/stdio.h> #include <metalsvm/stdio.h>
#include <metalsvm/string.h> #include <metalsvm/string.h>
#include <metalsvm/stdlib.h> #include <metalsvm/stdlib.h>
#include <metalsvm/memory.h> #include <metalsvm/mmu.h>
#include <metalsvm/spinlock.h> #include <metalsvm/spinlock.h>
#include <metalsvm/time.h> #include <metalsvm/time.h>
#include <metalsvm/processor.h> #include <metalsvm/processor.h>
@ -37,15 +37,17 @@
#endif #endif
/* /*
* Set whole address space as occupied: * 0 => free
* 0 => free, 1 => occupied * 1 => occupied
*
* Set whole address space as occupied
*/ */
static uint8_t bitmap[BITMAP_SIZE] = {[0 ... BITMAP_SIZE-1] = 0xFF}; static uint8_t bitmap[BITMAP_SIZE]; // = {[0 ... BITMAP_SIZE-1] = 0xFF};
static spinlock_t bitmap_lock = SPINLOCK_INIT; static spinlock_t bitmap_lock = SPINLOCK_INIT;
static size_t alloc_start;
atomic_int32_t total_pages = ATOMIC_INIT(0); atomic_int32_t total_pages = ATOMIC_INIT(0);
atomic_int32_t total_allocated_pages = ATOMIC_INIT(0); atomic_int32_t total_allocated_pages = ATOMIC_INIT(0);
atomic_int32_t total_available_pages = ATOMIC_INIT(0); atomic_int32_t total_available_pages = ATOMIC_INIT(0);
/* /*
* Note that linker symbols are not variables, they have no memory allocated for * Note that linker symbols are not variables, they have no memory allocated for
@ -62,12 +64,20 @@ inline static int page_marked(size_t i)
return (bitmap[index] & (1 << mod)); return (bitmap[index] & (1 << mod));
} }
inline static int page_unmarked(size_t i)
{
return !page_marked(i);
}
inline static void page_set_mark(size_t i) inline static void page_set_mark(size_t i)
{ {
size_t index = i >> 3; size_t index = i >> 3;
size_t mod = i & 0x7; size_t mod = i & 0x7;
bitmap[index] = bitmap[index] | (1 << mod); //if (page_marked(i))
// kprintf("page %u is alread marked\n", i);
bitmap[index] = bitmap[index] | (1 << mod);
} }
inline static void page_clear_mark(size_t i) inline static void page_clear_mark(size_t i)
@ -75,16 +85,178 @@ inline static void page_clear_mark(size_t i)
size_t index = i / 8; size_t index = i / 8;
size_t mod = i % 8; size_t mod = i % 8;
if (page_unmarked(i))
kprintf("page %u is already unmarked\n", i);
bitmap[index] = bitmap[index] & ~(1 << mod); bitmap[index] = bitmap[index] & ~(1 << mod);
} }
int mmu_init(void)
{
size_t kernel_size;
unsigned int i;
size_t addr;
int ret = 0;
// at first, set default value of the bitmap
memset(bitmap, 0xFF, sizeof(uint8_t)*BITMAP_SIZE);
#ifdef CONFIG_MULTIBOOT
if (mb_info && (mb_info->flags & MULTIBOOT_INFO_MEM_MAP)) {
size_t end_addr;
multiboot_memory_map_t* mmap = (multiboot_memory_map_t*) ((size_t) mb_info->mmap_addr);
multiboot_memory_map_t* mmap_end = (void*) ((size_t) mb_info->mmap_addr + mb_info->mmap_length);
while (mmap < mmap_end) {
if (mmap->type == MULTIBOOT_MEMORY_AVAILABLE) {
/* set the available memory as "unused" */
addr = mmap->addr;
end_addr = addr + mmap->len;
while (addr < end_addr) {
page_clear_mark(addr >> PAGE_SHIFT);
addr += PAGE_SIZE;
atomic_int32_inc(&total_pages);
atomic_int32_inc(&total_available_pages);
}
}
mmap++;
}
} else {
kputs("Unable to initialize the memory management subsystem\n");
while(1) {
HALT;
}
}
#elif defined(CONFIG_ROCKCREEK)
/* of course, the first slots belong to the private memory */
for(addr=0x00; addr<1*0x1000000; addr+=PAGE_SIZE) {
page_clear_mark(addr >> PAGE_SHIFT);
if (addr > addr + PAGE_SIZE)
break;
atomic_int32_inc(&total_pages);
atomic_int32_inc(&total_available_pages);
}
// Note: The last slot belongs always to the private memory.
for(addr=0xFF000000; addr<0xFFFFFFFF; addr+=PAGE_SIZE) {
page_clear_mark(addr >> PAGE_SHIFT);
if (addr > addr + PAGE_SIZE)
break;
atomic_int32_inc(&total_pages);
atomic_int32_inc(&total_available_pages);
}
/*
* Mark the bootinfo as used.
*/
page_set_mark((size_t)bootinfo >> PAGE_SHIFT);
atomic_int32_inc(&total_allocated_pages);
atomic_int32_dec(&total_available_pages);
#else
#error Currently, MetalSVM supports only the Multiboot specification or the RockCreek processor!
#endif
kernel_size = (size_t) &kernel_end - (size_t) &kernel_start;
if (kernel_size & (PAGE_SIZE-1))
kernel_size += PAGE_SIZE - (kernel_size & (PAGE_SIZE-1));
atomic_int32_add(&total_allocated_pages, kernel_size >> PAGE_SHIFT);
atomic_int32_sub(&total_available_pages, kernel_size >> PAGE_SHIFT);
/* set kernel space as used */
for(i=(size_t) &kernel_start >> PAGE_SHIFT; i < (size_t) &kernel_end >> PAGE_SHIFT; i++)
page_set_mark(i);
if ((size_t) &kernel_end & (PAGE_SIZE-1))
page_set_mark(i);
alloc_start = (size_t) &kernel_end >> PAGE_SHIFT;
if ((size_t) &kernel_end & (PAGE_SIZE-1))
alloc_start++;
#if MAX_CORES > 1
// reserve physical page for SMP boot code
page_set_mark(SMP_SETUP_ADDR >> PAGE_SHIFT);
atomic_int32_add(&total_allocated_pages, 1);
atomic_int32_sub(&total_available_pages, 1);
#endif
ret = paging_init();
if (ret) {
kprintf("Failed to initialize paging: %d\n", ret);
return ret;
}
#ifdef CONFIG_MULTIBOOT
/*
* Modules like the init ram disk are already loaded.
* Therefore, we set these pages as used.
*/
if (mb_info && (mb_info->flags & MULTIBOOT_INFO_MODS)) {
multiboot_module_t* mmodule = (multiboot_module_t*) ((size_t) mb_info->mods_addr);
/*
* Mark the mb_info as used.
*/
page_set_mark((size_t)mb_info >> PAGE_SHIFT);
atomic_int32_inc(&total_allocated_pages);
atomic_int32_dec(&total_available_pages);
for(addr = mb_info->mods_addr; addr < mb_info->mods_addr + mb_info->mods_count * sizeof(multiboot_module_t); addr += PAGE_SIZE) {
page_set_mark(addr >> PAGE_SHIFT);
atomic_int32_inc(&total_allocated_pages);
atomic_int32_dec(&total_available_pages);
}
for(i=0; i<mb_info->mods_count; i++, mmodule++) {
for(addr=mmodule->mod_start; addr<mmodule->mod_end; addr+=PAGE_SIZE) {
page_set_mark(addr >> PAGE_SHIFT);
atomic_int32_inc(&total_allocated_pages);
atomic_int32_dec(&total_available_pages);
}
}
}
#elif defined(CONFIG_ROCKCREEK)
/*
* Now, we are able to read the FPGA registers and to
* determine the number of slots for private memory.
*/
uint32_t slots = *((volatile uint8_t*) (FPGA_BASE + 0x8244));
if (slots == 0)
slots = 1;
kprintf("MetalSVM use %d slots for private memory\n", slots);
// define the residual private slots as free
for(addr=1*0x1000000; addr<slots*0x1000000; addr+=PAGE_SIZE) {
page_clear_mark(addr >> PAGE_SHIFT);
if (addr > addr + PAGE_SIZE)
break;
atomic_int32_inc(&total_pages);
atomic_int32_inc(&total_available_pages);
}
/*
* The init ram disk are already loaded.
* Therefore, we set these pages as used.
*/
for(addr=bootinfo->addr; addr < bootinfo->addr+bootinfo->size; addr+=PAGE_SIZE) {
// This area is already mapped, so we need to virt_to_phys() these addresses.
page_set_mark(virt_to_phys(addr) >> PAGE_SHIFT);
atomic_int32_inc(&total_allocated_pages);
atomic_int32_dec(&total_available_pages);
}
#endif
return ret;
}
/*
* Use first fit algorithm to find a suitable physical memory region
*/
size_t get_pages(uint32_t npages) size_t get_pages(uint32_t npages)
{ {
// first page is reserved uint32_t i, j, l;
static size_t start = 1; uint32_t k = 0;
size_t i, j, l;
size_t k = 0;
size_t ret = 0; size_t ret = 0;
if (BUILTIN_EXPECT(!npages, 0)) if (BUILTIN_EXPECT(!npages, 0))
@ -94,8 +266,7 @@ size_t get_pages(uint32_t npages)
return ret; return ret;
spinlock_lock(&bitmap_lock); spinlock_lock(&bitmap_lock);
i = alloc_start;
i = start;
next_try: next_try:
while((k < BITMAP_SIZE) && page_marked(i)) { while((k < BITMAP_SIZE) && page_marked(i)) {
k++; k++;
@ -113,7 +284,7 @@ next_try:
} }
if (i+j >= BITMAP_SIZE) { if (i+j >= BITMAP_SIZE) {
i = 1; i = 0;
goto next_try; goto next_try;
} }
@ -121,12 +292,11 @@ next_try:
goto oom; goto oom;
ret = i*PAGE_SIZE; ret = i*PAGE_SIZE;
kprintf("get_pages: ret 0x%x, i = %d, j = %d, npages = %d\n", ret, i, j, npages); // TODO: remove //kprintf("alloc: ret 0x%x, i = %d, j = %d, npages = %d\n", ret, i, j, npages);
for(l=i; l<i+j; l++) for(l=i; l<i+j; l++)
page_set_mark(l); page_set_mark(l);
start = i+j; alloc_start = i+j;
spinlock_unlock(&bitmap_lock); spinlock_unlock(&bitmap_lock);
atomic_int32_add(&total_allocated_pages, npages); atomic_int32_add(&total_allocated_pages, npages);
@ -140,253 +310,88 @@ oom:
return ret; return ret;
} }
int put_pages(size_t phyaddr, size_t npages) int put_page(size_t phyaddr)
{ {
if (BUILTIN_EXPECT(!phyaddr || !npages, 0)) uint32_t index = phyaddr >> PAGE_SHIFT;
if (BUILTIN_EXPECT(!phyaddr, 0))
return -EINVAL; return -EINVAL;
size_t i, ret = 0;
size_t base = phyaddr >> PAGE_BITS;
spinlock_lock(&bitmap_lock); spinlock_lock(&bitmap_lock);
page_clear_mark(index);
for (i=0; i<npages; i++) {
if (page_marked(base+i)) {
page_clear_mark(base+i);
ret++;
}
}
spinlock_unlock(&bitmap_lock); spinlock_unlock(&bitmap_lock);
atomic_int32_sub(&total_allocated_pages, ret); atomic_int32_sub(&total_allocated_pages, 1);
atomic_int32_add(&total_available_pages, ret); atomic_int32_add(&total_available_pages, 1);
kprintf("put_pages: phyaddr=%#lx, npages = %d, ret = %d\n", phyaddr, npages, ret); // TODO: remove return 0;
return ret;
} }
int copy_pages(size_t pdest, size_t psrc, size_t npages) void* mem_allocation(size_t sz, uint32_t flags)
{ {
size_t viraddr; size_t phyaddr, viraddr;
size_t vdest, vsrc; uint32_t npages = sz >> PAGE_SHIFT;
// allocate virtual memory areas if (sz & (PAGE_SIZE-1))
viraddr = vma_alloc(2*npages*PAGE_SIZE, VMA_HEAP); npages++;
if (BUILTIN_EXPECT(!viraddr, 0))
return -ENOMEM;
// map pages phyaddr = get_pages(npages);
vsrc = map_region(viraddr, psrc, npages, MAP_KERNEL_SPACE); if (BUILTIN_EXPECT(!phyaddr, 0))
vdest = map_region(viraddr+npages*PAGE_SIZE, pdest, npages, MAP_KERNEL_SPACE); return 0;
if (BUILTIN_EXPECT(!vsrc || !vdest, 0)) {
unmap_region(viraddr, 2*npages);
return -ENOMEM;
}
kprintf("copy_pages: copy %u pages from: %#lx (%#lx) to %#lx (%#lx)\n", npages, vsrc, psrc, vdest, pdest); // TODO remove viraddr = map_region(0, phyaddr, npages, flags);
// copy the whole page return (void*) viraddr;
memcpy((void*) vdest, (void*) vsrc, npages*PAGE_SIZE);
// householding
unmap_region(viraddr, 2*npages);
vma_free(viraddr, viraddr+2*npages*PAGE_SIZE);
return pdest;
} }
int mmu_init(void) void* kmalloc(size_t sz)
{ {
unsigned int i; return mem_allocation(sz, MAP_KERNEL_SPACE);
size_t addr;
int ret = 0;
#ifdef CONFIG_MULTIBOOT
if (mb_info) {
if (mb_info->flags & MULTIBOOT_INFO_MEM_MAP) {
multiboot_memory_map_t* mmap = (multiboot_memory_map_t*) ((size_t) mb_info->mmap_addr);
multiboot_memory_map_t* mmap_end = (void*) ((size_t) mb_info->mmap_addr + mb_info->mmap_length);
// mark available memory as free
while (mmap < mmap_end) {
if (mmap->type == MULTIBOOT_MEMORY_AVAILABLE) {
for (addr=mmap->addr; addr < mmap->addr + mmap->len; addr += PAGE_SIZE) {
page_clear_mark(addr >> PAGE_BITS);
atomic_int32_inc(&total_pages);
atomic_int32_inc(&total_available_pages);
}
}
mmap++;
}
}
else if (mb_info->flags & MULTIBOOT_INFO_MEM) {
size_t page;
size_t pages_lower = mb_info->mem_lower >> 2;
size_t pages_upper = mb_info->mem_upper >> 2;
for (page=0; page<pages_lower; page++)
page_clear_mark(page);
for (page=0x100000; page<pages_upper+0x100000; page++)
page_clear_mark(page);
atomic_int32_add(&total_pages, pages_lower + pages_upper);
atomic_int32_add(&total_available_pages, pages_lower + pages_upper);
}
else {
kputs("Unable to initialize the memory management subsystem\n");
while (1) HALT;
}
// mark mb_info as used
page_set_mark((size_t) mb_info >> PAGE_BITS);
atomic_int32_inc(&total_allocated_pages);
atomic_int32_dec(&total_available_pages);
// mark modules list as used
if (mb_info->flags & MULTIBOOT_INFO_MODS) {
for(addr=mb_info->mods_addr; addr<mb_info->mods_addr+mb_info->mods_count*sizeof(multiboot_module_t); addr+=PAGE_SIZE) {
page_set_mark(addr >> PAGE_BITS);
atomic_int32_inc(&total_allocated_pages);
atomic_int32_dec(&total_available_pages);
}
}
}
#elif defined(CONFIG_ROCKCREEK)
// of course, the first slots belong to the private memory
for(addr=0x00; addr<1*0x1000000; addr+=PAGE_SIZE) {
page_clear_mark(addr >> PAGE_BITS);
if (addr > addr + PAGE_SIZE)
break;
atomic_int32_inc(&total_pages);
atomic_int32_inc(&total_available_pages);
}
// Note: The last slot belongs always to the private memory.
for(addr=0xFF000000; addr<0xFFFFFFFF; addr+=PAGE_SIZE) {
page_clear_mark(addr >> PAGE_BITS);
if (addr > addr + PAGE_SIZE)
break;
atomic_int32_inc(&total_pages);
atomic_int32_inc(&total_available_pages);
}
// mark the bootinfo as used.
page_set_mark((size_t)bootinfo >> PAGE_BITS);
atomic_int32_inc(&total_allocated_pages);
atomic_int32_dec(&total_available_pages);
#else
#error Currently, MetalSVM supports only the Multiboot specification or the RockCreek processor!
#endif
// mark kernel as used
for(addr=(size_t) &kernel_start; addr<(size_t) &kernel_end; addr+=PAGE_SIZE) {
page_set_mark(addr >> PAGE_BITS);
atomic_int32_inc(&total_allocated_pages);
atomic_int32_dec(&total_available_pages);
}
#if MAX_CORES > 1
page_set_mark(SMP_SETUP_ADDR >> PAGE_BITS);
atomic_int32_inc(&total_allocated_pages);
atomic_int32_dec(&total_available_pages);
#endif
// enable paging and map SMP, VGA, Multiboot modules etc.
ret = paging_init();
if (ret) {
kprintf("Failed to initialize paging: %d\n", ret);
return ret;
}
// add kernel to VMA list
vma_add(PAGE_CEIL((size_t) &kernel_start),
PAGE_FLOOR((size_t) &kernel_end),
VMA_READ|VMA_WRITE|VMA_EXECUTE|VMA_CACHEABLE);
// add LAPIC tp VMA list
vma_add((size_t) &kernel_start - PAGE_SIZE,
(size_t) &kernel_start,
VMA_READ|VMA_WRITE);
#ifdef CONFIG_VGA
// add VGA to VMA list
vma_add(PAGE_CEIL(VIDEO_MEM_ADDR),
PAGE_FLOOR(VIDEO_MEM_ADDR) + PAGE_SIZE,
VMA_READ|VMA_WRITE);
#endif
#if MAX_CORES > 1
// reserve page for SMP boot code
vma_add(PAGE_CEIL(SMP_SETUP_ADDR),
PAGE_FLOOR(SMP_SETUP_ADDR) + PAGE_SIZE,
VMA_READ|VMA_WRITE|VMA_EXECUTE|VMA_CACHEABLE);
#endif
#ifdef CONFIG_MULTIBOOT
/*
* Modules like the init ram disk are already loaded.
* Therefore, we set these pages as used.
*/
if (mb_info) {
vma_add(PAGE_CEIL((size_t) mb_info),
PAGE_FLOOR((size_t) mb_info + sizeof(multiboot_info_t)),
VMA_READ|VMA_CACHEABLE);
if (mb_info->flags & MULTIBOOT_INFO_MODS) {
multiboot_module_t* mmodule = (multiboot_module_t*) ((size_t) mb_info->mods_addr);
vma_add(PAGE_CEIL((size_t) mb_info->mods_addr),
PAGE_FLOOR((size_t) mb_info->mods_addr + mb_info->mods_count*sizeof(multiboot_module_t)),
VMA_READ|VMA_CACHEABLE);
for(i=0; i<mb_info->mods_count; i++) {
vma_add(PAGE_CEIL(mmodule[i].mod_start),
PAGE_FLOOR(mmodule[i].mod_end),
VMA_READ|VMA_WRITE|VMA_CACHEABLE);
for(addr=mmodule[i].mod_start; addr<mmodule[i].mod_end; addr+=PAGE_SIZE) {
page_set_mark(addr >> PAGE_BITS);
atomic_int32_inc(&total_allocated_pages);
atomic_int32_dec(&total_available_pages);
}
}
}
}
#elif defined(CONFIG_ROCKCREEK)
/*
* Now, we are able to read the FPGA registers and to
* determine the number of slots for private memory.
*/
uint32_t slots = *((volatile uint8_t*) (FPGA_BASE + 0x8244));
if (slots == 0)
slots = 1;
kprintf("MetalSVM use %d slots for private memory\n", slots);
// define the residual private slots as free
for(addr=1*0x1000000; addr<slots*0x1000000; addr+=PAGE_SIZE) {
page_clear_mark(addr >> PAGE_BITS);
if (addr > addr + PAGE_SIZE)
break;
atomic_int32_inc(&total_pages);
atomic_int32_inc(&total_available_pages);
}
/*
* The init ram disk are already loaded.
* Therefore, we set these pages as used.
*/
for(addr=bootinfo->addr; addr<bootinfo->addr+bootinfo->size; addr+=PAGE_SIZE) {
// this area is already mapped, so we need to virt_to_phys() these addresses.
page_set_mark(virt_to_phys(addr) >> PAGE_BITS);
atomic_int32_inc(&total_allocated_pages);
atomic_int32_dec(&total_available_pages);
}
#endif
return ret;
} }
void kfree(void* addr, size_t sz)
{
uint32_t index, npages, i;
size_t phyaddr;
if (BUILTIN_EXPECT(!addr && !sz, 0))
return;
npages = sz >> PAGE_SHIFT;
if (sz & (PAGE_SIZE-1))
npages++;
spinlock_lock(&bitmap_lock);
for(i=0; i<npages; i++) {
unmap_region((size_t) addr+i*PAGE_SIZE, 1);
phyaddr = virt_to_phys((size_t) addr+i*PAGE_SIZE);
if (!phyaddr)
continue;
index = phyaddr >> PAGE_SHIFT;
page_clear_mark(index);
}
spinlock_unlock(&bitmap_lock);
vm_free((size_t) addr, npages);
atomic_int32_sub(&total_allocated_pages, npages);
atomic_int32_add(&total_available_pages, npages);
}
void* create_stack(void)
{
return kmalloc(KERNEL_STACK_SIZE);
}
int destroy_stack(task_t* task)
{
if (BUILTIN_EXPECT(!task || !task->stack, 0))
return -EINVAL;
kfree(task->stack, KERNEL_STACK_SIZE);
return 0;
}

333
mm/vma.c
View file

@ -1,5 +1,5 @@
/* /*
* Copyright 2011 Steffen Vogel, Chair for Operating Systems, * Copyright 2011 Stefan Lankes, Chair for Operating Systems,
* RWTH Aachen University * RWTH Aachen University
* *
* Licensed under the Apache License, Version 2.0 (the "License"); * Licensed under the Apache License, Version 2.0 (the "License");
@ -17,318 +17,85 @@
* This file is part of MetalSVM. * This file is part of MetalSVM.
*/ */
#include <metalsvm/vma.h>
#include <metalsvm/stdlib.h> #include <metalsvm/stdlib.h>
#include <metalsvm/stdio.h> #include <metalsvm/stdio.h>
#include <metalsvm/tasks_types.h> #include <metalsvm/tasks_types.h>
#include <metalsvm/spinlock.h> #include <metalsvm/spinlock.h>
#include <metalsvm/vma.h>
#include <metalsvm/errno.h> #include <metalsvm/errno.h>
/* /*
* Kernel space VMA list and lock * add a new virtual memory region to the list of VMAs
*
* For bootstrapping we initialize the VMA list with one empty VMA
* (start == end) and expand this VMA by calls to vma_alloc()
*/ */
static vma_t vma_boot = { VMA_KERN_MIN, VMA_KERN_MIN, VMA_HEAP }; int vma_add(task_t* task, size_t start, size_t end, uint32_t type)
static vma_t* vma_list = &vma_boot;
static spinlock_t vma_lock = SPINLOCK_INIT;
size_t vma_alloc(size_t size, uint32_t flags)
{ {
task_t* task = per_core(current_task); vma_t* new_vma;
spinlock_t* lock;
vma_t** list; if (BUILTIN_EXPECT(!task || start > end, 0))
kprintf("vma_alloc(0x%lx, 0x%x)\n", size, flags);
size_t base, limit; // boundaries for search
size_t start, end;
if (BUILTIN_EXPECT(!size, 0))
return 0;
if (flags & VMA_USER) {
base = VMA_KERN_MAX;
limit = VMA_USER_MAX;
list = &task->vma_list;
lock = &task->vma_lock;
}
else {
base = VMA_KERN_MIN;
limit = VMA_KERN_MAX;
list = &vma_list;
lock = &vma_lock;
}
spinlock_lock(lock);
// first fit search for free memory area
vma_t* pred = NULL; // vma before current gap
vma_t* succ = *list; // vma after current gap
do {
start = (pred) ? pred->end : base;
end = (succ) ? succ->start : limit;
if (end > start && end - start > size)
break; // we found a gap
pred = succ;
succ = (succ) ? succ->next : NULL;
} while (pred || succ);
if (BUILTIN_EXPECT(end > limit || end < start || end - start < size, 0)) {
spinlock_unlock(lock);
return 0;
}
if (pred && pred->flags == flags) {
pred->end = start+size;
}
else {
// insert new VMA
vma_t* new = kmalloc(sizeof(vma_t));
if (BUILTIN_EXPECT(!new, 0)) {
spinlock_unlock(lock);
return 0;
}
new->start = start;
new->end = start+size;
new->flags = flags;
new->next = succ;
new->prev = pred;
if (succ)
succ->prev = new;
if (pred)
pred->next = new;
else
*list = new;
}
spinlock_unlock(lock);
return start;
}
int vma_free(size_t start, size_t end)
{
task_t* task = per_core(current_task);
spinlock_t* lock;
vma_t* vma;
vma_t** list;
if (BUILTIN_EXPECT(start >= end, 0))
return -EINVAL; return -EINVAL;
if (end < VMA_KERN_MAX) { new_vma = kmalloc(sizeof(new_vma));
lock = &vma_lock; if (!new_vma)
list = &vma_list;
}
else if (start >= VMA_KERN_MAX) {
lock = &task->vma_lock;
list = &task->vma_list;
}
if (BUILTIN_EXPECT(!*list, 0))
return -EINVAL;
spinlock_lock(lock);
// search vma
vma = *list;
while (vma) {
if (start >= vma->start && end <= vma->end) break;
vma = vma->next;
}
if (BUILTIN_EXPECT(!vma, 0)) {
spinlock_unlock(lock);
return -EINVAL;
}
// free/resize vma
if (start == vma->start && end == vma->end) {
if (vma == *list)
*list = vma->next; // update list head
if (vma->prev)
vma->prev->next = vma->next;
if (vma->next)
vma->next->prev = vma->prev;
kfree(vma);
}
else if (start == vma->start)
vma->start = end;
else if (end == vma->end)
vma->end = start;
else {
vma_t* new = kmalloc(sizeof(vma_t));
if (BUILTIN_EXPECT(!new, 0)) {
spinlock_unlock(lock);
return -ENOMEM;
}
new->end = vma->end;
vma->end = start;
new->start = end;
new->next = vma->next;
vma->next = new;
new->prev = vma;
}
spinlock_unlock(lock);
return 0;
}
int vma_add(size_t start, size_t end, uint32_t flags)
{
task_t* task = per_core(current_task);
spinlock_t* lock;
vma_t** list;
kprintf("vma_add(0x%lx, 0x%lx, 0x%x)\n", start, end, flags);
if (BUILTIN_EXPECT(start >= end, 0))
return -EINVAL;
if (flags & VMA_USER) {
list = &task->vma_list;
lock = &task->vma_lock;
// check if address is in userspace
if (BUILTIN_EXPECT(start < VMA_KERN_MAX, 0))
return -EINVAL;
}
else {
list = &vma_list;
lock = &vma_lock;
// check if address is in kernelspace
if (BUILTIN_EXPECT(end >= VMA_KERN_MAX, 0))
return -EINVAL;
}
spinlock_lock(lock);
// search gap
vma_t* pred = NULL;
vma_t* succ = *list;
do {
if ((!pred || pred->end <= start) &&
(!succ || succ->start >= end))
break;
pred = succ;
succ = succ->next;
} while (pred || succ);
// TODO: check bounds
// insert new VMA
vma_t* new = kmalloc(sizeof(vma_t));
if (BUILTIN_EXPECT(!new, 0)) {
spinlock_unlock(lock);
return -ENOMEM; return -ENOMEM;
spinlock_lock(&task->vma_lock);
new_vma->start = start;
new_vma->end = end;
new_vma->type = type;
if (!(task->vma_list)) {
new_vma->next = new_vma->prev = NULL;
task->vma_list = new_vma;
} else {
vma_t* tmp = task->vma_list;
while (tmp->next && tmp->start < start)
tmp = tmp->next;
new_vma->next = tmp->next;
new_vma->prev = tmp;
tmp->next = new_vma;
} }
new->start = start; spinlock_unlock(&task->vma_lock);
new->end = end;
new->flags = flags;
new->next = succ;
new->prev = pred;
if (succ)
succ->prev = new;
if (pred)
pred->next = new;
else
*list = new;
spinlock_unlock(lock);
return 0; return 0;
} }
int copy_vma_list(task_t* task) int vma_dump(task_t* task)
{ {
task_t* parent_task = per_core(current_task); vma_t* tmp;
if (BUILTIN_EXPECT(!task, 0))
return -EINVAL;
spinlock_init(&task->vma_lock);
spinlock_lock(&parent_task->vma_lock);
spinlock_lock(&task->vma_lock); spinlock_lock(&task->vma_lock);
vma_t* last = NULL; tmp = task->vma_list;
vma_t* parent = parent_task->vma_list; while (tmp) {
kprintf("%8x - %8x: ", tmp->start, tmp->end);
while (parent) { if (tmp->type & VMA_READ)
vma_t *new = kmalloc(sizeof(vma_t)); kputs("r");
if (BUILTIN_EXPECT(!new, 0)) {
spinlock_unlock(&task->vma_lock);
spinlock_unlock(&parent_task->vma_lock);
return -ENOMEM;
}
new->start = parent->start;
new->end = parent->end;
new->flags = parent->flags;
new->prev = last;
if (last)
last->next = new;
else else
task->vma_list = new; kputs("-");
last = new; if (tmp->type & VMA_WRITE)
parent = parent->next; kputs("w");
} else
kputs("-");
return 0; if (tmp->type & VMA_EXECUTE)
} kputs("x");
else
kputs("-");
kputs("\n");
int drop_vma_list() tmp = tmp->next;
{
task_t* task = per_core(current_task);
vma_t* vma;
kprintf("drop_vma_list: task = %u\n", task->id); // TODO: remove
spinlock_lock(&task->vma_lock);
while ((vma = task->vma_list)) {
task->vma_list = vma->next;
kfree(vma);
} }
spinlock_unlock(&task->vma_lock); spinlock_unlock(&task->vma_lock);
return 0; return 0;
} }
void vma_dump()
{
void print_vma(vma_t *vma) {
while (vma) {
kprintf("0x%lx - 0x%lx: size=%x, flags=%c%c%c\n", vma->start, vma->end, vma->end - vma->start,
(vma->flags & VMA_READ) ? 'r' : '-',
(vma->flags & VMA_WRITE) ? 'w' : '-',
(vma->flags & VMA_EXECUTE) ? 'x' : '-');
vma = vma->next;
}
}
task_t* task = per_core(current_task);
kputs("Kernelspace VMAs:\n");
spinlock_lock(&vma_lock);
print_vma(vma_list);
spinlock_unlock(&vma_lock);
kputs("Userspace VMAs:\n");
spinlock_lock(&task->vma_lock);
print_vma(task->vma_list);
spinlock_unlock(&task->vma_lock);
}

View file

@ -1,80 +1,68 @@
ARCH = x86 ARCH = x86
BIT = 32
NEWLIB = ../x86/i586-metalsvm-elf32 NEWLIB = ../x86/i586-metalsvm-elf32
MAKE = make MAKE = make
STRIP_DEBUG = --strip-debug STRIP_DEBUG = --strip-debug
KEEP_DEBUG = --only-keep-debug KEEP_DEBUG = --only-keep-debug
LDFLAGS =
# Default section offsets in x86-64 ELF files are aligned to the page-size.
# For x86-64 the pagesize is huge (2 MB) with the consquence of large sparse
# ELF files (which lead to a huge initrd). To solve this, we manually set the page-size to 4 KB.
override LDFLAGS += -T link.ld -Wl,-n,-z,max-page-size=0x1000
# other implicit rules # other implicit rules
%.o : %.c %.o : %.c
$(CC_FOR_TARGET) -c $(CFLAGS) -o $@ $< $(CC_FOR_TARGET) -c $(CFLAGS) -o $@ $<
default: all default: all
all: memtest hello tests jacobi mshell server client rlogind fork all: memtest hello tests jacobi mshell server client rlogind
jacobi: jacobi.o jacobi: jacobi.o
$(CC_FOR_TARGET) $(LDFLAGS) -o $@ $< -lm $(CC_FOR_TARGET) -T link.ld -o $@ $(LDFLAGS) $< -lm
$(OBJCOPY_FOR_TARGET) $(KEEP_DEBUG) $@ $@.sym $(OBJCOPY_FOR_TARGET) $(KEEP_DEBUG) $@ $@.sym
$(OBJCOPY_FOR_TARGET) $(STRIP_DEBUG) $@ $(OBJCOPY_FOR_TARGET) $(STRIP_DEBUG) $@
chmod a-x $@.sym chmod a-x $@.sym
memtest: memtest.o memtest: memtest.o
$(CC_FOR_TARGET) $(LDFLAGS) -o $@ $< $(CC_FOR_TARGET) -T link.ld -o $@ $(LDFLAGS) $<
$(OBJCOPY_FOR_TARGET) $(KEEP_DEBUG) $@ $@.sym $(OBJCOPY_FOR_TARGET) $(KEEP_DEBUG) $@ $@.sym
$(OBJCOPY_FOR_TARGET) $(STRIP_DEBUG) $@ $(OBJCOPY_FOR_TARGET) $(STRIP_DEBUG) $@
chmod a-x $@.sym chmod a-x $@.sym
fork: fork.o
$(CC_FOR_TARGET) $(LDFLAGS) -o $@ $<
$(OBJCOPY_FOR_TARGET) $(KEEP_DEBUG) $@ $@.sym
$(OBJCOPY_FOR_TARGET) $(STRIP_DEBUG) $@
chmod a-x $@.sym
tests: tests.o tests: tests.o
$(CC_FOR_TARGET) $(LDFLAGS) -o $@ $< $(CC_FOR_TARGET) -T link.ld -o $@ $(LDFLAGS) $<
$(OBJCOPY_FOR_TARGET) $(KEEP_DEBUG) $@ $@.sym $(OBJCOPY_FOR_TARGET) $(KEEP_DEBUG) $@ $@.sym
$(OBJCOPY_FOR_TARGET) $(STRIP_DEBUG) $@ $(OBJCOPY_FOR_TARGET) $(STRIP_DEBUG) $@
chmod a-x $@.sym chmod a-x $@.sym
hello: hello.o hello: hello.o
$(CC_FOR_TARGET) $(LDFLAGS) -o $@ $< $(CC_FOR_TARGET) -T link.ld -o $@ $(LDFLAGS) $<
$(OBJCOPY_FOR_TARGET) $(KEEP_DEBUG) $@ $@.sym $(OBJCOPY_FOR_TARGET) $(KEEP_DEBUG) $@ $@.sym
$(OBJCOPY_FOR_TARGET) $(STRIP_DEBUG) $@ $(OBJCOPY_FOR_TARGET) $(STRIP_DEBUG) $@
chmod a-x $@.sym chmod a-x $@.sym
mshell: mshell.o mshell: mshell.o
$(CC_FOR_TARGET) $(LDFLAGS) -o $@ $< $(CC_FOR_TARGET) -T link.ld -o $@ $(LDFLAGS) $<
$(OBJCOPY_FOR_TARGET) $(KEEP_DEBUG) $@ $@.sym $(OBJCOPY_FOR_TARGET) $(KEEP_DEBUG) $@ $@.sym
$(OBJCOPY_FOR_TARGET) $(STRIP_DEBUG) $@ $(OBJCOPY_FOR_TARGET) $(STRIP_DEBUG) $@
chmod a-x $@.sym chmod a-x $@.sym
rlogind: rlogind.o rlogind: rlogind.o
$(CC_FOR_TARGET) $(LDFLAGS) -o $@ $< -lsocket $(CC_FOR_TARGET) -T link.ld -o $@ $(LDFLAGS) $< -lsocket
$(OBJCOPY_FOR_TARGET) $(KEEP_DEBUG) $@ $@.sym $(OBJCOPY_FOR_TARGET) $(KEEP_DEBUG) $@ $@.sym
$(OBJCOPY_FOR_TARGET) $(STRIP_DEBUG) $@ $(OBJCOPY_FOR_TARGET) $(STRIP_DEBUG) $@
chmod a-x $@.sym chmod a-x $@.sym
server: server.o server: server.o
$(CC_FOR_TARGET) $(LDFLAGS) -o $@ $< -lsocket $(CC_FOR_TARGET) -T link.ld -o $@ $(LDFLAGS) $< -lsocket
$(OBJCOPY_FOR_TARGET) $(KEEP_DEBUG) $@ $@.sym $(OBJCOPY_FOR_TARGET) $(KEEP_DEBUG) $@ $@.sym
$(OBJCOPY_FOR_TARGET) $(STRIP_DEBUG) $@ $(OBJCOPY_FOR_TARGET) $(STRIP_DEBUG) $@
chmod a-x $@.sym chmod a-x $@.sym
client: client.o client: client.o
$(CC_FOR_TARGET) $(LDFLAGS) -o $@ $< -lsocket $(CC_FOR_TARGET) -T link.ld -o $@ $(LDFLAGS) $< -lsocket
$(OBJCOPY_FOR_TARGET) $(KEEP_DEBUG) $@ $@.sym $(OBJCOPY_FOR_TARGET) $(KEEP_DEBUG) $@ $@.sym
$(OBJCOPY_FOR_TARGET) $(STRIP_DEBUG) $@ $(OBJCOPY_FOR_TARGET) $(STRIP_DEBUG) $@
chmod a-x $@.sym chmod a-x $@.sym
clean: clean:
$(RM) hello tests server client rlogind memtest fork mshell jacobi hello *.sym *.o *~ $(RM) hello tests server client rlogind memtest mshell jacobi hello *.sym *.o *~
depend: depend:
$(CC_FOR_TARGET) -MM $(CFLAGS) *.c > Makefile.dep $(CC_FOR_TARGET) -MM $(CFLAGS) *.c > Makefile.dep

View file

@ -1,55 +0,0 @@
/*
* Copyright 2013 Steffen Vogel, Chair for Operating Systems,
* RWTH Aachen University
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* This file is part of MetalSVM.
*/
#include <stdlib.h>
#include <stdio.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <unistd.h>
#include <errno.h>
int main(int argc, char** argv)
{
printf("======== USER: test forking...\n");
int id = fork();
int ret = 0;
if (id < 0) {
perror(" PARENT: fork");
exit(-1);
}
else if (id == 0) {
printf(" CHILD: This is the child. My pid is %u\n", getpid());
printf(" CHILD: Running memtest...\n");
const char *argv[] = {"/bin/memtest", "512", "kb", "10", NULL};
execve(argv[0], argv, NULL);
perror(" CHILD: exec() returned: ");
ret = -1;
}
else {
printf(" PARENT: Here is the parent. My pid is %u\n", getpid());
wait(&ret);
printf(" PARENT: My child returned with code %i...\n", ret);
}
return ret;
}

View file

@ -68,7 +68,7 @@ int main(int argc, char** argv)
exit(1); exit(1);
} }
testdirent = readdir(testdir); testdirent = readdir(testdir);
printf("1. Dirent: %s\n", testdirent->d_name); printf("1. Dirent: %s", testdirent->d_name);
closedir(testdir); closedir(testdir);
return errno; return errno;

View file

@ -20,83 +20,41 @@
#include <stdlib.h> #include <stdlib.h>
#include <stdio.h> #include <stdio.h>
#include <string.h> #include <string.h>
#include <time.h> #include <unistd.h>
#include <sys/times.h> #include <fcntl.h>
#include <errno.h>
void sleep(int sec) { #include <dirent.h>
struct tms tms;
clock_t t, s = times(&tms);
do {
t = times(&tms);
}
while (t - s <= 1000 * sec);
}
int print_usage() { int print_usage() {
printf("usage: size mb/kb/b [chunks]\n"); printf("usage: [size mb/kb/b]");
exit(-1); exit(0);
} }
int main(int argc, char** argv) int main(int argc, char** argv)
{ {
int multp = 0; int m = 0;
int size = 0; uint32_t size = 0;
int chunks = 1; if(argc <= 2)
void **test;
if (argc <= 2 || argc > 4)
print_usage(); print_usage();
if(argc == 3) {
size = atoi(argv[1]); if(!strcmp(argv[2], "mb"))
if (size <= 0) m = 1024*1024;
print_usage(); else if(!strcmp(argv[2], "kb"))
m = 1024;
if (!strcasecmp(argv[2], "mb")) else if(!strcmp(argv[2], "b"))
multp = (1 << 20); m = 0;
else if (!strcasecmp(argv[2], "kb"))
multp = (1 << 10);
else if (!strcasecmp(argv[2], "b"))
multp = (1 << 0);
else
print_usage();
size *= multp;
if (argc == 4)
chunks = atoi(argv[3]);
test = malloc(chunks * sizeof(void *));
printf("malloc(%lu)\n", chunks * sizeof(void *));
if (!test) {
printf("malloc(%lu) - FAILED!\n", chunks * sizeof(void *));
exit(-1);
}
// allocate...
int i;
for (i = 0; i < chunks; i++) {
test[i] = malloc(size);
if (test[i])
printf("malloc(%d)\tCHUNK: %d START: %p END: %p\n", size, i, test[i], test[i] + size);
else else
printf("malloc(%d)\tFAILED! Abort allocation, start with freeing memory\n", size); print_usage();
sleep(1);
} }
if(argc > 3)
print_usage();
size = atoi(argv[1]);
if(size <= 0)
print_usage();
// and release again size *= m;
for (i = 0; i < chunks; i++) { uint8_t* test = malloc(size);
if (test[i]) { printf("malloc(%d) - START: %p END: %p \n", size, test, test + size);
free(test[i]);
printf("free(%p)\tCHUNK: %d\n", test[i], i);
}
sleep(1);
}
free(test);
printf("free(%p)\n", test);
return 0; return 0;
} }

View file

@ -56,8 +56,9 @@ L1:
call rax call rax
L2: L2:
; register a function to be called at normal process termination ; register a function to be called at normal process termination
mov rdi, __do_global_dtors push __do_global_dtors
call atexit call atexit
pop rax
; call init function ; call init function
call __do_global_ctors call __do_global_ctors
@ -75,17 +76,13 @@ L4:
; arguments are already on the stack ; arguments are already on the stack
; call the user's function ; call the user's function
pop rdi ; argc
pop rsi ; argv pointer
pop rdx ; env pointer
call main call main
; call exit from the C library so atexit gets called, and the ; call exit from the C library so atexit gets called, and the
; C++ destructors get run. This calls our exit routine below ; C++ destructors get run. This calls our exit routine below
; when it's done. ; when it's done.
; call "exit" ; call "exit"
mov rdi, rax push rax
call exit call exit
; endless loop ; endless loop

View file

@ -85,7 +85,7 @@ syscall(int nr, unsigned long arg0, unsigned long arg1, unsigned long arg2,
asm volatile (_SYSCALLSTR(INT_SYSCALL) asm volatile (_SYSCALLSTR(INT_SYSCALL)
: "=a" (res) : "=a" (res)
: "D" (nr), "S" (arg0), "d" (arg1), "c" (arg2), "b" (arg3), "a" (arg4) : "0" (nr), "b" (arg0), "c" (arg1), "d" (arg2), "S" (arg3), "D" (arg4)
: "memory", "cc"); : "memory", "cc");
return res; return res;

View file

@ -2,18 +2,6 @@
symbol-file metalsvm.sym symbol-file metalsvm.sym
target remote localhost:1234 target remote localhost:1234
# Debugging 32bit code # Configure breakpoints and everything as you wish here.
#set architecture i386 break main
#break stublet
#continue
# Debugging 64bit code
set architecture i386:x86-64
#break main
# Debugging userspace
#add-symbol-file newlib/examples/memtest.sym 0x40200000
#break main
#continue # skip kernel main()
continue continue

View file

@ -1,15 +0,0 @@
#!ipxe
# iPXE is a open source network boot firmware.
# It provides a full PXE implementation enhanced with additional features such as
# booting from HTTP, FTP, iSCSI SAN, Fibre Channel SAN, Wireless, WAN or Infiniband
#
# http://ipxe.org/
#
# We use it to rapidly compile & debug metalsvm on real hardware.
# This script is fetched and executed by iPXE. Thus enables easy changes in the boot
# procedure without recompiling iPXE and reflashing your USB thumbdrive or network boot ROM.
kernel http://134.130.62.174:8080/metalsvm.elf
module http://134.130.62.174:8080/tools/initrd.img
boot