Compare commits

..

No commits in common. "x64_paging" and "master" have entirely different histories.

69 changed files with 16731 additions and 4413 deletions

1
.gitignore vendored
View file

@ -17,7 +17,6 @@ tools/make_initrd
tools/scc_bootinfo.asm
newlib/examples/hello
newlib/examples/memtest
newlib/examples/fork
newlib/examples/jacobi
newlib/examples/echo
newlib/examples/tests

View file

@ -1,13 +1,8 @@
NAME = metalsvm
# For 64bit support, you have define BIT as 64
# Note: do not forget to 'make veryclean' after changing BIT!!!
BIT = 64
ARCH = x86
SMP = 2
DEBUG = 1
TOPDIR = $(shell pwd)
ARCH = x86
# For 64bit support, you have define BIT as 64
BIT=32
NAME = metalsvm
LWIPDIRS = lwip/src/arch lwip/src/api lwip/src/core lwip/src/core/ipv4 lwip/src/netif
DRIVERDIRS = drivers/net drivers/char
KERNDIRS = libkern kernel mm fs apps arch/$(ARCH)/kernel arch/$(ARCH)/mm arch/$(ARCH)/scc $(LWIPDIRS) $(DRIVERDIRS)
@ -35,64 +30,35 @@ RANLIB_FOR_TARGET = $(CROSSCOMPREFIX)ranlib
STRIP_FOR_TARGET = $(CROSSCOMPREFIX)strip
READELF_FOR_TARGET = $(CROSSCOMPREFIX)readelf
# Tools
MAKE = make
RM = rm -rf
NASM = nasm
# For 64bit code, you have to use qemu-system-x86_64
QEMU = qemu-system-i386
GDB = gdb
ifeq ($(BIT), 32)
QEMU = qemu-system-i386
else ifeq ($(BIT), 64)
QEMU = qemu-system-x86_64
endif
INCLUDE = -I$(TOPDIR)/include \
-I$(TOPDIR)/arch/$(ARCH)/include \
-I$(TOPDIR)/lwip/src/include \
-I$(TOPDIR)/lwip/src/include/ipv4 \
-I$(TOPDIR)/drivers
GDBFLAGS = -x script.gdb
QEMUFLAGS = -smp $(SMP) -serial tcp::12346,server \
-nographic -monitor stdio \
-net nic,model=rtl8139 \
-net user,hostfwd=tcp::12345-:4711
NASMFLAGS = -felf$(BIT) -g -i$(TOPDIR)/include/metalsvm/
CFLAGS = -m$(BIT) -Wall -ffreestanding $(INCLUDE) $(STACKPROT)
# For 64bit support, you have to define -felf64 instead of -felf32
NASMFLAGS = -felf32 -g -i$(TOPDIR)/include/metalsvm/
INCLUDE = -I$(TOPDIR)/include -I$(TOPDIR)/arch/$(ARCH)/include -I$(TOPDIR)/lwip/src/include -I$(TOPDIR)/lwip/src/include/ipv4 -I$(TOPDIR)/drivers
# For 64bit support, you have to define "-m64 -mno-red-zone" instead of "-m32 -march=i586"
# Compiler options for final code
CFLAGS = -g -m32 -march=i586 -Wall -O2 -fstrength-reduce -fomit-frame-pointer -finline-functions -ffreestanding $(INCLUDE) $(STACKPROT)
# Compiler options for debuging
#CFLAGS = -g -O -m32 -march=i586 -Wall -fomit-frame-pointer -ffreestanding $(INCLUDE) $(STACKPROT)
ARFLAGS = rsv
LDFLAGS = -T link$(BIT).ld -z max-page-size=0x1000 --defsym __BUILD_DATE=$(shell date +'%Y%m%d') --defsym __BUILD_TIME=$(shell date +'%H%M%S')
LDFLAGS = -T link$(BIT).ld -z max-page-size=4096 --defsym __BUILD_DATE=$(shell date +'%Y%m%d') --defsym __BUILD_TIME=$(shell date +'%H%M%S')
STRIP_DEBUG = --strip-debug
KEEP_DEBUG = --only-keep-debug
# Do not change to elf64!
# The Multiboot spec can only boot elf32 binaries
OUTPUT_FORMAT = -O elf32-i386
CFLAGS_FOR_NEWLIB = -m$(BIT) $(STACKPROT)
LDFLAGS_FOR_NEWLIB = -m$(BIT)
CFLAGS_FOR_TOOLS = -m$(BIT) -O2 -Wall
# For 64bit support, you have to define -m64 instead of "-m32 -march=i586"
CFLAGS_FOR_NEWLIB = -m32 -march=i586 -O2 $(STACKPROT)
# For 64bit support, you have to define -m64 instead of "-m32 -march=i586"
LDFLAGS_FOR_NEWLIB = -m32 -march=i586
# For 64bit support, you have to define -m64 instead of "-m32"
CFLAGS_FOR_TOOLS = -m32 -O2 -Wall
LDFLAGS_FOR_TOOLS =
NASMFLAGS_FOR_NEWLIB = -felf$(BIT)
# Setup debugging flags here
ifndef DEBUG
CFLAGS += -O2 -fomit-frame-pointer -fstrength-reduce -finline-functions
CFLAGS_FOR_NEWLIB += -O2
else
CFLAGS += -O0 -gdwarf-2
CFLAGS_FOR_NEWLIB += -O0 -gdwarf-2
endif
ifeq ($(BIT), 32)
CFLAGS += -march=i586
CFLAGS_FOR_NEWLIB += -march=i586
LDFLAGS_FOR_NEWLIB += -march=i586
else ifeq ($(BIT), 64)
CFLAGS += -mno-red-zone
endif
# For 64bit support, you have to define -felf64 instead of -felf32
NASMFLAGS_FOR_NEWLIB = -felf32
# Prettify output
V = 0
@ -102,15 +68,11 @@ ifeq ($V,0)
endif
default: all
all: newlib tools $(NAME).elf
newlib:
$(MAKE) ARCH=$(ARCH) BIT=$(BIT) \
LDFLAGS="$(LDFLAGS_FOR_NEWLIB)" \
CFLAGS="$(CFLAGS_FOR_NEWLIB)" \
NASMFLAGS="$(NASMFLAGS_FOR_NEWLIB)" \
CC_FOR_TARGET=$(CC_FOR_TARGET) \
$(MAKE) ARCH=$(ARCH) BIT=$(BIT) LDFLAGS="$(LDFLAGS_FOR_NEWLIB)" CFLAGS="$(CFLAGS_FOR_NEWLIB)" NASMFLAGS="$(NASMFLAGS_FOR_NEWLIB)" CC_FOR_TARGET=$(CC_FOR_TARGET) \
CXX_FOR_TARGET=$(CXX_FOR_TARGET) \
GCC_FOR_TARGET=$(GCC_FOR_TARGET) \
AR_FOR_TARGET=$(AR_FOR_TARGET) \
@ -124,7 +86,7 @@ newlib:
READELF_FOR_TARGET=$(READELF_FOR_TARGET) -C newlib
tools:
$(MAKE) ARCH=$(ARCH) BIT=$(BIT) CFLAGS="$(CFLAGS_FOR_TOOLS)" LDFLAGS="$(LDFLAGS_FOR_TOOLS)" -C tools
$(MAKE) CFLAGS="$(CFLAGS_FOR_TOOLS)" LDFLAGS="$(LDFLAGS_FOR_TOOLS)" -C tools
$(NAME).elf:
$Q$(LD_FOR_TARGET) $(LDFLAGS) -o $(NAME).elf $^
@ -134,28 +96,23 @@ $(NAME).elf:
$Q$(OBJCOPY_FOR_TARGET) $(STRIP_DEBUG) $(OUTPUT_FORMAT) $(NAME).elf
qemu: newlib tools $(NAME).elf
$(QEMU) $(QEMUFLAGS) -kernel metalsvm.elf -initrd tools/initrd.img
$(QEMU) -monitor stdio -smp 2 -net nic,model=rtl8139 -net user,hostfwd=tcp::12345-:4711 -net dump -kernel metalsvm.elf -initrd tools/initrd.img
debug: newlib tools $(NAME).elf
$(TERM) -e $(GDB) $(GDBFLAGS) &
$(TERM) -e telnet localhost 12346 &
$(TERM) -e telnet localhost 12345 &
$(QEMU) $(QEMUFLAGS) -s -S -kernel metalsvm.elf -initrd tools/initrd.img
qemudbg: newlib tools $(NAME).elf
$(QEMU) -s -S -smp 2 -net nic,model=rtl8139 -net user,hostfwd=tcp::12345-:4711 -net dump -kernel metalsvm.elf -initrd tools/initrd.img
hwdebug: newlib tools $(NAME).elf
killall -qw mongoose cu || true
mongoose -p 8080 &
hwreset push reset 300
cu -s 115200 -l /dev/ttyUSB0
gdb: $(NAME).elf
make qemudbg > /dev/null &
$(GDB) -x script.gdb
clean:
$Q$(RM) $(NAME).elf $(NAME).sym *~ core core.*
$Q$(RM) $(NAME).elf $(NAME).sym *~
$Q$(MAKE) -C tools clean
@echo Cleaned.
veryclean: clean
$Q$(MAKE) -C newlib veryclean
@echo Very cleaned.
@echo Very cleaned
#depend:
# for i in $(SUBDIRS); do $(MAKE) -k -C $$i depend; done
@ -167,15 +124,16 @@ veryclean: clean
$Q$(CPP_FOR_TARGET) -MF $*.dep -MT $*.o -MM -D__KERNEL__ $(CFLAGS) $<
include/metalsvm/config.inc: include/metalsvm/config.h
@echo "; This file is generated automatically from the config.h file." > $@
@echo "; Before editing this, you should consider editing config.h." >> $@
@sed -nre 's/^[\t ]*#define[\t ]+([a-z_0-9]+)([\t ]+.*)*/%define \1/ip' $< >> $@
@sed -nre 's/^[\t ]*#define[\t ]+([a-z_0-9]+)[\t ]+([a-z_0-9.]+)([\t ]+.*)*/%define \1 \2/ip' $< >> $@
@echo "; This file is generated automatically from the config.h file." > include/metalsvm/config.inc
@echo "; Before editing this, you should consider editing config.h." >> include/metalsvm/config.inc
@awk '/^#define MAX_CORES/{ print "%define MAX_CORES", $$3 }' include/metalsvm/config.h >> include/metalsvm/config.inc
@awk '/^#define KERNEL_STACK_SIZE/{ print "%define KERNEL_STACK_SIZE", $$3 }' include/metalsvm/config.h >> include/metalsvm/config.inc
@awk '/^#define CONFIG_VGA/{ print "%define CONFIG_VGA", $$3 }' include/metalsvm/config.h >> include/metalsvm/config.inc
%.o : %.asm include/metalsvm/config.inc
@echo [ASM] $@
$Q$(NASM) $(NASMFLAGS) -o $@ $<
.PHONY: default all clean qemu qemudbg gdb debug newlib tools
.PHONY: default all clean emu gdb newlib tools
include $(addsuffix /Makefile,$(SUBDIRS))

View file

@ -1,4 +1,4 @@
C_source := tests.c echo.c netio.c jacobi.c laplace.c gfx_client.c gfx_generic.c memory.c membench.c
C_source := tests.c echo.c netio.c jacobi.c laplace.c gfx_client.c gfx_generic.c
MODULE := apps
include $(TOPDIR)/Makefile.inc

View file

@ -1,175 +0,0 @@
/*
* Copyright 2014 Steffen Vogel, Chair for Operating Systems,
* RWTH Aachen University
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* This file is part of MetalSVM.
*/
/**
* @author Steffen Vogel <steffen.vogel@rwth-aachen.de>
*
* Based on:
* www.cs.berkeley.edu/~richie/bebop/notes/matmul/files/membench/
*/
#include <metalsvm/stdio.h>
#include <metalsvm/vma.h>
#include <metalsvm/malloc.h>
#include <asm/processor.h>
#include <asm/pmc.h>
#define SAMPLES 1000
#define CLKS 1000000000L
#define STRIDE_MIN 1
#define RANGE_MIN (32) /* 32 Byte */
#define RANGE_MAX (32*1024*1024) /* 32 MiB */
#define FLUSH 1
static inline void warmup(char* memory)
{
#ifdef FLUSH
tlb_flush();
flush_cache();
#else
int index;
for (index = 0; index < RANGE_MAX; index++) {
memory[index] = 0;
}
#endif
}
int membench(void* arg)
{
kprintf("======= Starting membench\n");
/* Init PMCs */
struct pmc_caps* cap = pmc_init();
kprintf("PMC architecural version: %u\n", cap->version);
kprintf("There are %u general purpose PMCs (%u bit wide) available\n", cap->gp_count, cap->gp_width);
kprintf("There are %u fixed function PMCs (%u bit wide) available\n", cap->ff_count, cap->ff_width);
int i;
uint16_t tests[][2] = {
#if 0
{PMC_EVT_MEM_LOAD_RETIRED_DTLB_MISS, 0}
{PMC_EVT_MEM_LOAD_RETIRED_L1D_MISS, 0}
{PMC_EVT_MEM_LOAD_RETIRED_L2_MISS, 0}
#elif 0
{PMC_EVT_PAGE_WALK_CLKS, PMC_EVT_PAGE_WALK_COUNT}
#else
{PMC_EVT_DTLB_MISS_ANY, PMC_EVT_DTLB_MISS_LD},
{PMC_EVT_DTLB_MISS_ANY, PMC_EVT_DTLB_MISS_L0_LD},
{PMC_EVT_DTLB_MISS_ANY, PMC_EVT_DTLB_MISS_ST},
#endif
};
char *memory = palloc(RANGE_MAX, 0);
kprintf("Allocated test memory: %#lx bytes at %p\n", RANGE_MAX, memory);
kprintf("#%12s%12s%12s%12s%12s%12s\n", "range", "stride", "steps", "runs", "reads", "results");
irq_disable();
/* Setup PMCs */
pmc_stop_all();
pmc_ff_config(1, PMC_FIXED_OS); // CPU_CLK_UNHALTED.CORE
/* Variables for PMC values */
uint64_t gp[2], ff[3];
uint64_t gp_ovh[2], ff_ovh[3];
int64_t gp_real[2], ff_real[3];
uint64_t run, steps, tsteps, index, runs, reads;
uint64_t range, stride;
volatile char temp;
/* Run the timing experiments */
for (range = RANGE_MIN; range <= RANGE_MAX; range *= 2) {
for (stride = STRIDE_MIN; stride < range; stride *= 2) {
runs = SAMPLES * stride / range + 1;
for (i = 0; i < sizeof(tests) / 4; i++) {
pmc_gp_config(0, tests[i][0], PMC_EVTSEL_OS | PMC_EVTSEL_EN, 0, 0);
pmc_gp_config(1, tests[i][1], PMC_EVTSEL_OS | PMC_EVTSEL_EN, 0, 0);
steps = 0;
tsteps = 0;
/* Time the loop with strided access + loop overhead */
warmup(memory);
pmc_reset_all();
pmc_start_all();
do {
for (run = 0; run < runs; run++) {
for (index = 0; index < range; index += stride) {
temp = memory[index];
}
}
steps++;
} while (pmc_ff_read(1) < CLKS);
pmc_stop_all();
gp[0] = pmc_gp_read(0);
gp[1] = pmc_gp_read(1);
ff[1] = pmc_ff_read(1);
/* Try to time just the overheads */
warmup(memory);
pmc_reset_all();
pmc_start_all();
do {
for (run = 0; run < runs; run++) {
for (index = 0; index < range; index += stride) {
temp++;
}
}
tsteps++;
} while (tsteps < steps);
pmc_stop_all();
gp_ovh[0] = pmc_gp_read(0);
gp_ovh[1] = pmc_gp_read(1);
ff_ovh[1] = pmc_ff_read(1);
gp_real[0] = gp[0] - gp_ovh[0];
gp_real[1] = gp[1] - gp_ovh[1];
ff_real[1] = ff[1] - ff_ovh[1];
reads = runs * steps * range / stride;
if (i == 0)
kprintf("%12llu%12llu%12llu%12llu%12llu%12llu", range, stride, steps, runs, reads, ff_real[1]);
kprintf("%12llu", gp_real[0]);
kprintf("%12llu", gp_real[1]);
kprintf("\t");
}
kprintf("\n");
}
kprintf("\n");
}
irq_enable();
return 0;
}

View file

@ -1,445 +0,0 @@
/*
* Copyright 2014 Steffen Vogel, Chair for Operating Systems,
* RWTH Aachen University
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* This file is part of MetalSVM.
*/
/**
* @author Steffen Vogel <steffen.vogel@rwth-aachen.de>
*/
#include <metalsvm/stdlib.h>
#include <metalsvm/stdio.h>
#include <metalsvm/stdarg.h>
#include <metalsvm/memory.h>
#include <metalsvm/time.h>
#include <metalsvm/tasks.h>
#include <metalsvm/vma.h>
#include <metalsvm/malloc.h>
#include <asm/page.h>
#include <asm/irqflags.h>
#include <asm/processor.h>
#include <asm/pmc.h>
#define ITERATIONS 1000
#define PAGE_COUNT 40
#define SIZE (PAGE_COUNT*PAGE_SIZE)
#define VIRT_FROM_ADDR 0x50000000 // Userspace
#define VIRT_TO_ADDR 0x30000000 // Kernelspace
extern atomic_int32_t total_page;
extern atomic_int32_t total_allocated_pages;
extern atomic_int32_t total_available_pages;
/** @brief Simple helper to format our test results */
static void test(size_t expr, char *fmt, ...)
{
void _putchar(int c, void *arg) { kputchar(c); } // for kvprintf
static int c = 1;
va_list ap;
va_start(ap, fmt);
kprintf("%s #%u:\t", (expr) ? "PASSED" : "FAILED", c++);
kvprintf(fmt, _putchar, NULL, 10, ap);
kputs("\n");
va_end(ap);
if (!expr)
abort();
}
/** @brief Linear feedback shift register PRNG */
static uint16_t rand()
{
static uint16_t lfsr = 0xACE1u;
static uint16_t bit;
bit = ((lfsr >> 0) ^ (lfsr >> 2) ^ (lfsr >> 3) ^ (lfsr >> 5) ) & 1;
return lfsr = (lfsr >> 1) | (bit << 15);
}
/** @brief BSD sum algorithm ('sum' Unix command) and used by QEmu */
uint16_t checksum(size_t start, size_t end)
{
size_t addr;
uint16_t sum;
for(addr = start, sum = 0; addr < end; addr++) {
uint8_t val = *((uint8_t *) addr);
sum = (sum >> 1) | (sum << 15);
sum += val;
}
return sum;
}
static int paging_stage2(void *arg);
/** @brief Test of the paging subsystem
*
* We will map a single physical memory region to two virtual regions.
* When writing to the first one, we should be able to read the same contents
* from the second one.
*/
static void paging(void)
{
size_t c, sum;
size_t *p1, *p2;
size_t virt_from, virt_to;
size_t phys;
size_t t;
int ret;
int flags;
// disable irqs to prevent context switches for rdtsc measurement
flags = irq_nested_disable();
// show original page maps
t = rdtsc();
page_dump(PG_XD | PG_GLOBAL | PG_USER | PG_RW);
kprintf("delta_t = %lu\n", rdtsc() - t);
t = rdtsc();
page_stats(1); // reset accessed and dirty bits
kprintf("delta_t = %lu\n", rdtsc() - t);
irq_nested_enable(flags);
kprintf("bookkeeping pages:\n");
kprintf(" - total:\t%lu\n", atomic_int32_read(&total_pages));
kprintf(" - alloc:\t%lu\n", atomic_int32_read(&total_allocated_pages));
kprintf(" - avail:\t%lu\n", atomic_int32_read(&total_available_pages));
// allocate physical page frames
phys = get_pages(PAGE_COUNT);
test(phys, "get_pages(%lu) = %#lx", PAGE_COUNT, phys);
// create first mapping
virt_from = map_region(VIRT_FROM_ADDR, phys, PAGE_COUNT, MAP_USER_SPACE);
test(virt_from, "map_region(%#lx, %#lx, %lu, %#x) = %#lx", VIRT_FROM_ADDR, phys, PAGE_COUNT, MAP_USER_SPACE, virt_from);
// check address translation
phys = virt_to_phys(virt_from);
test(phys, "virt_to_phys(%#lx) = %#lx", virt_from, phys);
page_dump(PG_XD | PG_GLOBAL | PG_USER | PG_RW);
// test set_page_flags()
ret = set_page_flags(virt_from, PAGE_COUNT, MAP_CODE);
test(!ret, "set_page_flags(%#lx, %u, %x)", virt_from, PAGE_COUNT, MAP_USER_SPACE|MAP_CODE); // now executable
page_dump(PG_XD | PG_GLOBAL | PG_USER | PG_RW);
// write test data
p1 = (size_t *) virt_from;
for (c = 0; c < SIZE/sizeof(size_t); c++) {
p1[c] = c;
}
// create second mapping pointing to the same page frames
virt_to = map_region(VIRT_TO_ADDR, phys, PAGE_COUNT, 0);
test(virt_to, "map_region(%#lx, %#lx, %lu, %#x) = %#lx", VIRT_TO_ADDR, phys, PAGE_COUNT, 0, virt_to);
// check address translation
phys = virt_to_phys(virt_to);
test(phys, "virt_to_phys(%#lx) = %#lx", virt_to, phys);
page_dump(PG_XD | PG_GLOBAL | PG_USER | PG_RW);
// check if both mapped areas are equal
p2 = (size_t *) virt_to;
for (c = 0; c < SIZE/sizeof(size_t); c++) {
if (p1[c] != p2[c])
test(0, "data mismatch: *(%p) != *(%p)", &p1[c], &p2[c]);
}
test(1, "data is equal");
// try to remap without MAP_REMAP
virt_to = map_region(VIRT_TO_ADDR, phys+PAGE_SIZE, PAGE_COUNT, 0);
test(!virt_to, "map_region(%#lx, %#lx, %lu, %#x) = %#lx (without MAP_REMAP flag)", VIRT_TO_ADDR, phys+PAGE_SIZE, PAGE_COUNT, 0, virt_to);
// try to remap with MAP_REMAP
virt_to = map_region(VIRT_TO_ADDR, phys+PAGE_SIZE, PAGE_COUNT, MAP_REMAP);
test(virt_to, "map_region(%#lx, %#lx, %lu, %#x) = %#lx (with MAP_REMAP flag)", VIRT_TO_ADDR, phys+PAGE_SIZE, PAGE_COUNT, MAP_REMAP, virt_to);
// check if data is not equal anymore (we remapped with +PAGE_SIZE offset)
p1 = (size_t *) (virt_from + PAGE_SIZE);
for (c = 0; c < (SIZE-PAGE_SIZE)/sizeof(size_t); c++) {
if (p1[c] != p2[c])
test(0, "data mismatch at *(%p) != *(%p)", &p1[c], &p2[c]);
}
test(1, "data is equal");
// try to remap with MAP_REMAP
virt_to = map_region(VIRT_TO_ADDR, phys, PAGE_COUNT, MAP_REMAP);
test(virt_to, "map_region(%#lx, %#lx, %lu, %#x) = %#lx (with MAP_REMAP flag)", VIRT_TO_ADDR, phys, PAGE_COUNT, MAP_REMAP, virt_to);
// test unmapping
ret = unmap_region(VIRT_FROM_ADDR, PAGE_COUNT);
test(!ret, "unmap_region(%#lx, %lu) = %u", VIRT_FROM_ADDR, PAGE_COUNT, ret);
page_dump(PG_XD | PG_GLOBAL | PG_USER | PG_RW);
// calc checksum
sum = checksum(virt_to, virt_to + SIZE);
test(sum == 23196, "checksum(%p, %p) = %lu", virt_to, virt_to + SIZE, sum);
size_t cr3 = read_cr3();
kprintf("cr3 old = %#lx\n", cr3);
create_kernel_task(0, paging_stage2, &sum, NORMAL_PRIO);
wait(&ret);
test(!ret, "paging stage 2 returned with code = %i", ret);
}
static int paging_stage2(void *arg)
{
size_t old, new;
kprintf("PAGING: entering stage 2...\n");
size_t cr3 = read_cr3();
kprintf("cr3 new = %#lx\n", cr3);
old = *((size_t *) arg);
kprintf("old sum: %lu\n", old);
page_dump(PG_XD | PG_GLOBAL | PG_USER | PG_RW);
new = checksum(VIRT_TO_ADDR, VIRT_TO_ADDR + SIZE);
test(old == new, "checksum(%p, %p) = %lu", VIRT_TO_ADDR, VIRT_TO_ADDR + SIZE, new);
return 0;
}
/** @brief Test of the VMA allocator */
static void vma(void)
{
int ret;
vma_dump();
// vma_alloc
size_t a1 = vma_alloc(SIZE, VMA_HEAP);
test(a1, "vma_alloc(%#x, %#x) = %#lx", SIZE, VMA_HEAP, a1);
size_t a2 = vma_alloc(SIZE, VMA_HEAP|VMA_USER);
test(a2 != 0, "vma_alloc(%#x, %#x) = %#lx", SIZE, VMA_HEAP|VMA_USER, a2);
vma_dump();
// vma_free
ret = vma_free(a1, a1+SIZE);
test(ret >= 0, "vma_free(%#lx, %#lx) = %i", a1, a1+SIZE, ret);
ret = vma_free(a2, a2+SIZE);
test(ret >= 0, "vma_free(%#lx, %#lx) = %i", a2, a2+SIZE, ret);
vma_dump();
// vma_add
ret = vma_add(VIRT_FROM_ADDR, VIRT_FROM_ADDR+SIZE, VMA_HEAP|VMA_USER);
test(ret >= 0, "vma_add(%#lx, %#lx, %#x) = %u", VIRT_FROM_ADDR, VIRT_FROM_ADDR+SIZE, VMA_HEAP|VMA_USER, ret);
ret = vma_add(VIRT_FROM_ADDR+SIZE, VIRT_FROM_ADDR+2*SIZE, VMA_HEAP|VMA_USER);
test(ret >= 0, "vma_add(%#lx, %#lx, %#x) = %u", VIRT_FROM_ADDR+SIZE, VIRT_FROM_ADDR+2*SIZE, VMA_HEAP|VMA_USER, ret);
ret = vma_add(VIRT_FROM_ADDR-SIZE, VIRT_FROM_ADDR, VMA_HEAP|VMA_USER);
test(ret >= 0, "vma_add(%#lx, %#lx, %#x) = %u", VIRT_FROM_ADDR-SIZE, VIRT_FROM_ADDR, VMA_HEAP|VMA_USER, ret);
vma_dump();
// vma_free
ret = vma_free(VIRT_FROM_ADDR-SIZE, VIRT_FROM_ADDR);
test(ret >= 0, "vma_free(%#lx, %#lx) = %u", VIRT_FROM_ADDR-SIZE, VIRT_FROM_ADDR, ret);
ret = vma_free(VIRT_FROM_ADDR+SIZE, VIRT_FROM_ADDR+2*SIZE);
test(ret >= 0, "vma_free(%#lx, %#lx) = %u", VIRT_FROM_ADDR+SIZE, VIRT_FROM_ADDR+2*SIZE, ret);
ret = vma_free(VIRT_FROM_ADDR, VIRT_FROM_ADDR+SIZE);
test(ret >= 0, "vma_free(%#lx, %#lx) = %u", VIRT_FROM_ADDR, VIRT_FROM_ADDR+SIZE, ret);
vma_dump();
}
/** @brief Test of the kernel malloc allocator */
static void malloc(void)
{
int i;
int* p[20];
int* a;
// kmalloc() test
buddy_dump();
a = kmalloc(SIZE);
test(a != NULL, "kmalloc(%lu) = %p", SIZE, a);
buddy_dump();
// simple write/read test
for (i=0; i<SIZE/sizeof(int); i++)
a[i] = i;
for (i=0; i<SIZE/sizeof(int); i++) {
if (a[i] != i)
test(0, "data mismatch: *(%p) != %lu", &a[i], i);
}
test(1, "data is equal");
// kfree() test
kfree(a);
test(1, "kfree(%p)", a);
buddy_dump();
// some random malloc/free patterns to stress the buddy system
for (i=0; i<20; i++) {
uint16_t sz = rand();
p[i] = kmalloc(sz);
test(p[i] != NULL, "kmalloc(%u) = %p", sz, p[i]);
}
buddy_dump();
for (i=0; i<20; i++) {
kfree(p[i]);
test(1, "kfree(%p)", p[i]);
}
buddy_dump();
}
/** @brief A memory benchmark for page table walks and TLB misses */
int bench(void)
{
// init hardware performance counters
struct pmc_caps* cap = pmc_init();
if (cap->version == 0x21) { // QEmu returns garbage
kputs("QEMU does not support PMCs.. skipping benchmark!\n");
return -1;
}
kprintf("PMC architecural version: %u\n", cap->version);
kprintf("There are %u general purpose PMCs (%u bit wide) available\n", cap->gp_count, cap->gp_width);
kprintf("There are %u fixed function PMCs (%u bit wide) available\n", cap->ff_count, cap->ff_width);
// setup PMCs
pmc_stop_all();
pmc_gp_config(0, PMC_EVT_PAGE_WALK_CLKS, PMC_EVTSEL_OS | PMC_EVTSEL_EN, 0, 0);
pmc_gp_config(1, PMC_EVT_PAGE_WALK_COUNT, PMC_EVTSEL_OS | PMC_EVTSEL_EN, 0, 0);
size_t phyaddr = get_page();
size_t viraddr;
size_t pages = 512*511;
size_t virbase = 2*KERNEL_SPACE;
kprintf("virbase %#llx KERNEL_SPACE %#llx\n", virbase, KERNEL_SPACE);
for (viraddr = virbase; viraddr < virbase+pages*PAGE_SIZE; viraddr += PAGE_SIZE) {
kprintf("map at %#llx\n", viraddr);
size_t ret = map_region(viraddr, phyaddr, 1, MAP_KERNEL_SPACE);
if (ret != viraddr) {
kprintf("map failed at %#llx\n", viraddr);
break;
}
}
int i;
for (i=0; i < ITERATIONS; i++) {
tlb_flush();
pmc_reset_all();
pmc_start_all();
for (viraddr = virbase; viraddr < virbase+pages*PAGE_SIZE; viraddr += PAGE_SIZE) {
char * p = (char *) viraddr;
(*p)++;
}
pmc_stop_all();
uint64_t clks = pmc_gp_read(0);
uint64_t count = pmc_gp_read(1);
kprintf("%llu\n", 1000000 * clks / count);
}
return 0;
}
int smp(void* arg)
{
kprintf("Hello from Core %d\n", smp_id());
page_dump(PG_XD | PG_GLOBAL | PG_USER | PG_RW);
return 33;
}
/** @brief This is a simple procedure to test memory management subsystem */
int memory(void* arg)
{
int ret;
tid_t id;
#if 0
size_t t0, t1, t2, t3;
size_t pages;
for (pages = 1; pages < (1 << 25); pages++) {
t0 = rdtsc();
size_t ret = map_region((1 << 28), 0x1000, pages, MAP_KERNEL_SPACE);
t1 = rdtsc();
if (!ret)
break;
t2 = rdtsc();
ret = unmap_region((1 << 28), pages);
t3 = rdtsc();
kprintf("%llu\t%llu\t%llu\n", pages, t1-t0, t3-t2);
}
kprintf("======== USER: malloc test...\n");
char* argv[] = {"/bin/memtest", "25", "10"};
ret = create_user_task(&id, argv[0], argv);
test(!ret, "calling %s %s %s with id = %i, ret = %i", argv[0], argv[1], argv[2], id, ret);
wait(&ret);
test(!ret, "userspace task returned with code = %d", ret);
return 0;
kprintf("======== PAGING: test started...\n");
paging();
kprintf("======== VMA: test started...\n");
vma();
kprintf("======== MALLOC: test started...\n");
malloc();
kprintf("======== USER: test fork...\n");
char* argv2[] = {"/bin/fork", NULL};
ret = create_user_task(&id, argv2[0], argv2);
test(!ret, "calling %s with id = %i, ret = %i", argv2[0], id, ret);
wait(&ret);
test(!ret, "userspace task returned with code = %d", ret);
#endif
kprintf("======== BENCH: memory and TLB benchmark started...\n");
bench();
kprintf("======== SMP: test multicore...\n");
ret = create_kernel_task_on_core(&id, smp, NULL, NORMAL_PRIO, 1);
wait(&ret);
test(!ret, "smp task returned with code = %d", ret);
kprintf("======== All tests finished successfull...\n");
return 0;
}

View file

@ -43,8 +43,6 @@
int laplace(void* arg);
int jacobi(void* arg);
int memory(void* arg);
int membench(void* arg);
void echo_init(void);
void netio_init(void);
@ -746,13 +744,8 @@ int test_init(void)
create_user_task(NULL, "/bin/jacobi", jacobi_argv);
//create_user_task_on_core(NULL, "/bin/jacobi", jacobi_argv, 1);
#endif
#ifdef START_MEMORY
create_kernel_task(NULL, memory, NULL, NORMAL_PRIO);
#endif
#ifdef START_MEMBENCH
create_kernel_task(NULL, membench, NULL, NORMAL_PRIO);
#endif
#if defined(START_MMNIF_TEST) && defined(CONFIG_LWIP) && LWIP_SOCKET
#ifdef START_MMNIF_TEST
#if defined(CONFIG_LWIP) && LWIP_SOCKET
if (RCCE_IAM == 0) {
kprintf("Start /bin/server...\n");
create_user_task(NULL, "/bin/server", server_argv);
@ -761,6 +754,7 @@ int test_init(void)
kprintf("Start /bin/client...\n");
create_user_task(NULL, "/bin/client", client_argv);
}
#endif
#endif
return 0;

View file

@ -46,7 +46,6 @@
//#define START_HELLO
//#define START_TESTS
//#define START_JACOBI
//#define START_MEMORY
//#define START_CHIEFTEST

View file

@ -34,7 +34,7 @@
// ____ _ _
// / ___| _ _ _ __ ___ | |__ ___ | |___
// \___ \| | | | '_ ` _ \| '_ \ / _ \| / __|
// ___) | |_| | | | | | | |_) | (_) | \__
// ___) | |_| | | | | | | |_) | (_) | \__ \
// |____/ \__, |_| |_| |_|_.__/ \___/|_|___/
// |___/
//
@ -253,7 +253,7 @@
// _____ _ _
// | ___| _ _ __ ___| |_(_) ___ _ __ ___
// | |_ | | | | '_ \ / __| __| |/ _ \| '_ \/ __|
// | _|| |_| | | | | (__| |_| | (_) | | | \__
// | _|| |_| | | | | (__| |_| | (_) | | | \__ \
// |_| \__,_|_| |_|\___|\__|_|\___/|_| |_|___/
//
// #########################################################################################

View file

@ -100,6 +100,11 @@ inline static void outportl(unsigned short _port, unsigned int _data)
asm volatile("outl %1, %0"::"dN"(_port), "a"(_data));
}
inline static void uart_putchar(unsigned char _data)
{
outportb(0x2F8, _data);
}
/**
* read a byte from CMOS
* @param offset CMOS offset

View file

@ -35,12 +35,10 @@
#ifdef CONFIG_MULTIBOOT
/// Does the bootloader provide mem_* fields?
#define MULTIBOOT_INFO_MEM (1 << 0)
/// Does the bootloader provide a list of modules?
#define MULTIBOOT_INFO_MODS (1 << 3)
/// Does the bootloader provide a full memory map?
#define MULTIBOOT_INFO_MEM_MAP (1 << 6)
/* are there modules to do something with? */
#define MULTIBOOT_INFO_MODS 0x00000008
/* is there a full memory map? */
#define MULTIBOOT_INFO_MEM_MAP 0x00000040
typedef uint16_t multiboot_uint16_t;
typedef uint32_t multiboot_uint32_t;
@ -116,6 +114,7 @@ struct multiboot_info
multiboot_uint16_t vbe_interface_off;
multiboot_uint16_t vbe_interface_len;
};
typedef struct multiboot_info multiboot_info_t;
struct multiboot_mmap_entry

View file

@ -21,7 +21,6 @@
* @file arch/x86/include/asm/page.h
* @brief Definitions and functions related to paging
* @author Stefan Lankes
* @author Steffen Vogel <steffen.vogel@rwth-aachen.de>
*
* This file defines the interface for paging as like structures related to paging.
*/
@ -32,107 +31,86 @@
#include <metalsvm/stddef.h>
#include <metalsvm/stdlib.h>
/// Page offset bits
#define PAGE_BITS 12
/// The size of a single page in bytes
#define PAGE_SIZE ( 1L << PAGE_BITS)
#ifdef CONFIG_X86_32
/// Total operand width in bits
#define BITS 32
/// Linear/virtual address width
#define VIRT_BITS BITS
/// Physical address width (we dont support PAE)
#define PHYS_BITS BITS
/// Page map bits
#define PAGE_MAP_BITS 10
/// Number of page map indirections
#define PAGE_MAP_LEVELS 2
/// Mask the page address without page map flags
#define PAGE_MASK 0xFFFFF000
#elif defined(CONFIG_X86_64)
/// Total operand width in bits
#define BITS 64
/// Linear/virtual address width
#define VIRT_BITS 48
/// Physical address width (maximum value)
#define PHYS_BITS 52
/// Page map bits
#define PAGE_MAP_BITS 9
/// Number of page map indirections
#define PAGE_MAP_LEVELS 4
/// Mask the page address without page map flags
#define PAGE_MASK 0x000FFFFFFFFFF000
#endif
/// The number of entries in a page map table
#define PAGE_MAP_ENTRIES (1L << PAGE_MAP_BITS)
// Base addresses of the self-mapped pagetables
#ifdef CONFIG_X86_32
#define PAGE_MAP_PGD 0xFFFFF000
#define PAGE_MAP_PGT 0xFFC00000
#elif defined(CONFIG_X86_64)
#define PAGE_MAP_PML4 0xFFFFFFFFFFFFF000
#define PAGE_MAP_PDPT 0xFFFFFFFFFFE00000
#define PAGE_MAP_PGD 0xFFFFFFFFC0000000
#define PAGE_MAP_PGT 0xFFFFFF8000000000
#endif
/// Align to next page
#define PAGE_FLOOR(addr) (((addr) + PAGE_SIZE - 1) & PAGE_MASK)
/// Align to page
#define PAGE_CEIL(addr) ( (addr) & PAGE_MASK)
// Canonical address format
#ifdef CONFIG_X86_32
#define CANONICAL(addr) (addr)
#elif defined(CONFIG_X86_64)
#define CANONICAL(addr) sign_extend(addr, VIRT_BITS)
#endif
#define _PAGE_BIT_PRESENT 0 /* is present */
#define _PAGE_BIT_RW 1 /* writeable */
#define _PAGE_BIT_USER 2 /* userspace addressable */
#define _PAGE_BIT_PWT 3 /* page write through */
#define _PAGE_BIT_PCD 4 /* page cache disabled */
#define _PAGE_BIT_ACCESSED 5 /* was accessed (raised by CPU) */
#define _PAGE_BIT_DIRTY 6 /* was written to (raised by CPU) */
#define _PAGE_BIT_PSE 7 /* 4 MB (or 2MB) page */
#define _PAGE_BIT_PAT 7 /* on 4KB pages */
#define _PAGE_BIT_GLOBAL 8 /* Global TLB entry PPro+ */
#define _PAGE_BIT_SVM_STRONG 9 /* mark a virtual address range as used by the SVM system */
#define _PAGE_BIT_SVM_LAZYRELEASE 10 /* mark a virtual address range as used by the SVM system */
#define _PAGE_BIT_SVM_INIT 11 /* mark if the MBP proxy is used */
/// Page is present
#define PG_PRESENT (1 << 0)
#define PG_PRESENT (1 << _PAGE_BIT_PRESENT)
/// Page is read- and writable
#define PG_RW (1 << 1)
#define PG_RW (1 << _PAGE_BIT_RW)
/// Page is addressable from userspace
#define PG_USER (1 << 2)
#define PG_USER (1 << _PAGE_BIT_USER)
/// Page write through is activated
#define PG_PWT (1 << 3)
#define PG_PWT (1 << _PAGE_BIT_PWT)
/// Page cache is disabled
#define PG_PCD (1 << 4)
#define PG_PCD (1 << _PAGE_BIT_PCD)
/// Page was recently accessed (set by CPU)
#define PG_ACCESSED (1 << 5)
#define PG_ACCESSED (1 << _PAGE_BIT_ACCESSED)
/// Page is dirty due to recentwrite-access (set by CPU)
#define PG_DIRTY (1 << 6)
/// Huge page: 4MB (or 2MB, 1GB)
#define PG_PSE (1 << 7)
#define PG_DIRTY (1 << _PAGE_BIT_DIRTY)
/// Big page: 4MB (or 2MB)
#define PG_PSE (1 << _PAGE_BIT_PSE)
/// Page is part of the MPB (SCC specific entry)
#define PG_MPE PG_PSE
/// Page attribute table
#define PG_PAT PG_PSE
#define PG_MPE PG_PSE
/// Global TLB entry (Pentium Pro and later)
#define PG_GLOBAL (1 << 8)
#define PG_GLOBAL (1 << _PAGE_BIT_GLOBAL)
/// Pattern flag
#define PG_PAT (1 << _PAGE_BIT_PAT)
/// This virtual address range is used by SVM system as marked
#define PG_SVM (1 << 9)
#define PG_SVM_STRONG PG_SVM
#define PG_SVM PG_SVM_STRONG
#define PG_SVM_STRONG (1 << _PAGE_BIT_SVM_STRONG)
/// This virtual address range is used by SVM system as marked
#define PG_SVM_LAZYRELEASE (1 << 10)
#define PG_SVM_LAZYRELEASE (1 << _PAGE_BIT_SVM_LAZYRELEASE)
/// Currently, no page frame is behind this page (only the MBP proxy)
#define PG_SVM_INIT (1 << 11)
/// Disable execution for this page
#define PG_XD (1L << 63)
#define PG_SVM_INIT (1 << _PAGE_BIT_SVM_INIT)
/// This is a whole set of flags (PRESENT,RW,ACCESSED,DIRTY) for kernelspace tables
#define PG_TABLE (PG_PRESENT|PG_RW|PG_XD)
#define KERN_TABLE (PG_PRESENT|PG_RW|PG_ACCESSED|PG_DIRTY)
/// This is a whole set of flags (PRESENT,RW,ACCESSED,DIRTY,USER) for userspace tables
#define USER_TABLE (PG_PRESENT|PG_RW|PG_ACCESSED|PG_DIRTY|PG_USER)
/// This is a whole set of flags (PRESENT,RW,GLOBAL) for kernelspace pages
#define PG_PAGE (PG_PRESENT|PG_RW|PG_GLOBAL|PG_XD)
#define KERN_PAGE (PG_PRESENT|PG_RW|PG_GLOBAL)
/// This is a whole set of flags (PRESENT,RW,USER) for userspace pages
#define USER_PAGE (PG_PRESENT|PG_RW|PG_USER)
/** @brief A single entry in a page map
#if __SIZEOF_POINTER__ == 4
#define PGT_ENTRIES 1024
#elif __SIZEOF_POINTER__ == 8
#define PGT_ENTRIES 512
#endif
/** @brief Page table structure
*
* Usually used as a pointer to a mapped page map entry.
* This structure keeps page table entries.\n
* On a 32bit system, a page table consists normally of 1024 entries.
*/
typedef size_t page_entry_t;
typedef struct page_table
{
/// Page table entries are unsigned 32bit integers.
size_t entries[PGT_ENTRIES];
} page_table_t __attribute__ ((aligned (4096)));
/** @brief Page directory structure
*
* This structure keeps page directory entries.\
* On a 32bit system, a page directory consists normally of 1024 entries.
*/
typedef struct page_dir
{
/// Page dir entries are unsigned 32bit integers.
size_t entries[PGT_ENTRIES];
} page_dir_t __attribute__ ((aligned (4096)));
/** @brief Converts a virtual address to a physical
*
@ -141,6 +119,28 @@ typedef size_t page_entry_t;
*/
size_t virt_to_phys(size_t viraddr);
/** @brief Allocates a virtual address space range of npages
*
* The address range with special flags (if given) will have the size of n pages.
*
* @param npages The range in page-granularity
* @param flags further page flags
*
* @return The new range's address
*/
size_t vm_alloc(uint32_t npages, uint32_t flags);
/** @brief Frees a range in the virtual address space
*
* @param addr Address of the range
* @param npages Size of the range in pages
*
* @return
* - 0 on success
* - -EINVAL (-22) on failure.
*/
int vm_free(size_t addr, uint32_t npages);
/** @brief Unmap the physical memory at a specific virtual address
*
* All Page table entries within this range will be marked as not present
@ -192,29 +192,29 @@ int arch_paging_init(void);
*
* @return Returns the address of the boot task's page dir array.
*/
page_entry_t* get_boot_page_map(void);
page_dir_t* get_boot_pgd(void);
/** @brief Setup a new page directory for a new user-level task
*
* @param task Pointer to the task-specific task_t structure
* @param copy If true: copy userspace pages and tables
* @param copy If true: PGD will be a copy of the kernel's address space PGD
*
* @return
* - counter of allocated page tables
* - -ENOMEM (-12) on failure
*/
int copy_page_map(struct task* task, int copy);
int create_pgd(task_t* task, int copy);
/** @brief Deletes all user page map structures of the current task
/** @brief Delete page directory and its page tables
*
* All allocated physical page frames are released in the bitmap
* The task->page_map is replaces by the boot_page_map()
* Puts page tables and page directory back to buffer and
* sets the task's page directory pointer to NULL
*
* @return
* - 0 on success
* - -EINVAL (-22) on failure (in case PGD is still the boot-pgd).
*/
int drop_page_map(void);
int drop_pgd(void);
/** @brief Change the page permission in the page tables of the current task
*
@ -229,18 +229,6 @@ int drop_page_map(void);
* - 0 on success
* - -EINVAL (-22) on failure.
*/
int set_page_flags(size_t viraddr, uint32_t npages, int flags);
/** @brief Dump mapped memory
*
* @param mask Only watch for changes in these page flags (PG_PRESENT is set by default)
*/
void page_dump(size_t mask);
/** @brief Print stats about page flags
*
* @param reset Reset accessed and dirty bits in page tables
*/
void page_stats(int reset);
int change_page_permissions(size_t start, size_t end, uint32_t flags);
#endif

View file

@ -1,161 +0,0 @@
/*
* Copyright 2014 Steffen Vogel, Chair for Operating Systems,
* RWTH Aachen University
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* This file is part of MetalSVM.
*/
/**
* @file arch/x86/include/asm/page_helpers.h
* @brief Some small helper functions declared as static inline
* @author Stefan Lankes
* @author Steffen Vogel <steffen.vogel@rwth-aachen.de>
*/
#ifndef __ARCH_PAGE_HELPERS_H__
#define __ARCH_PAGE_HELPERS_H__
#include <metalsvm/page.h>
/** @brief Sign extending a integer
*
* @param addr The integer to extend
* @param bits The width if addr which should be extended
* @return The extended integer
*/
static inline size_t sign_extend(ssize_t addr, int bits)
{
int shift = BITS - bits;
return (addr << shift) >> shift; // sign bit gets copied during arithmetic right shift
}
/** @brief Get the base address of the child table
*
* @param entry The parent entry
* @return The child entry
*/
static inline page_entry_t* get_child_entry(page_entry_t *entry)
{
size_t child = (size_t) entry;
child <<= PAGE_MAP_BITS;
return (page_entry_t*) CANONICAL(child);
}
/** @brief Get the base address of the parent entry
*
* @param entry The child entry
* @return The parent entry
*/
static inline page_entry_t* get_parent_entry(page_entry_t *entry)
{
ssize_t parent = (size_t) entry;
parent >>= PAGE_MAP_BITS;
parent |= PAGE_MAP_PGT;
parent &= ~(sizeof(size_t) - 1); // align to page_entry_t
return (page_entry_t*) CANONICAL(parent);
}
/** @brief Get the corresponding page map entry to a given virtual address
*
* Please note: this implementation requires that the tables are mapped
* at the end of VAS!
*/
static inline page_entry_t* virt_to_entry(ssize_t addr, int level)
{
addr >>= PAGE_MAP_BITS;
addr |= PAGE_MAP_PGT;
addr >>= level * PAGE_MAP_BITS;
addr &= ~(sizeof(size_t) - 1); // align to page_entry_t
return (page_entry_t*) CANONICAL(addr);
}
/** @brief Get the corresponding virtual address to a page map entry */
static inline size_t entry_to_virt(page_entry_t* entry, int level)
{
size_t addr = (size_t) entry;
addr <<= (level+1) * PAGE_MAP_BITS;
return CANONICAL(addr);
}
/** @brief Converts a virtual address to a physical
*
* A non mapped virtual address causes a pagefault!
*
* @param addr Virtual address to convert
* @return physical address
*/
inline size_t virt_to_phys(size_t addr)
{
page_entry_t* entry = virt_to_entry(addr, 0); // get the PGT entry
size_t off = addr & ~PAGE_MASK; // offset within page
size_t phy = *entry & PAGE_MASK; // physical page frame number
return phy | off;
}
/** @brief Update page table bits (PG_*) by using arch independent flags (MAP_*) */
static size_t page_bits(int flags)
{
size_t bits = PG_PRESENT | PG_RW | PG_XD | PG_GLOBAL;
if (flags & MAP_NO_ACCESS) bits &= ~PG_PRESENT;
if (flags & MAP_READ_ONLY) bits &= ~PG_RW;
#ifdef CONFIG_X86_64
if (flags & MAP_CODE) bits &= ~PG_XD;
#endif
if (flags & MAP_USER_SPACE) bits &= ~PG_GLOBAL;
if (flags & MAP_USER_SPACE) bits |= PG_USER;
if (flags & MAP_WT) bits |= PG_PWT;
if (flags & MAP_NO_CACHE) bits |= PG_PCD;
if (flags & MAP_MPE) bits |= PG_MPE;
if (flags & MAP_SVM_INIT) bits |= PG_SVM_INIT;
if (flags & MAP_SVM_LAZYRELEASE) bits |= PG_SVM_LAZYRELEASE;
if (flags & MAP_SVM_STRONG) bits |= PG_SVM_STRONG;
return bits;
}
// TODO: test
size_t get_page_flags(size_t viraddr)
{
page_entry_t* entry = virt_to_entry(viraddr, 0);
size_t flags = *entry & ~PAGE_MASK;
int i;
for (i=1; i<PAGE_MAP_LEVELS; i++) {
entry = virt_to_entry(viraddr, i);
#ifdef CONFIG_X86_64
flags |= (*entry & PG_XD);
#endif
flags &= (*entry & PG_USER) | ~PG_USER;
flags &= (*entry & PG_RW) | ~PG_RW;
flags &= (*entry & PG_USER) | ~PG_USER;
}
return flags;
}
#endif

View file

@ -1,252 +0,0 @@
/*
* Copyright 2013 Steffen Vogel, Chair for Operating Systems,
* RWTH Aachen University
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* This file is part of MetalSVM.
*/
/**
* @author Steffen Vogel
* @file arch/x86/include/pmc.h
* @brief Simple interface to IA32 Performance Monitor Counters
*
* This implementation is in parts specific for Intel Core 2 Duo Processors!
*/
#ifndef _ARCH_PMC_H_
#define _ARCH_PMC_H_
#include <stddef.h>
// PMC MSR addresses
#define MSR_PERF_GLOBAL_STATUS 0x38E // global counter control facilities
#define MSR_PERF_GLOBAL_CTRL 0x38F
#define MSR_PERF_GLOBAL_OVF_CTRL 0x390
#define IA32_PERF_CAPABILITIES 0x345
#define IA32_PERFEVTSEL(i) (0x186 + i) // general purpose PMC configuration register
#define IA32_PMC(i) (0x0C1 + i) // general purpose PMC counter register
#define IA32_A_PMC(i) (0x4C1 + i) // general purpose alias PMC counter register for full width writes
#define MSR_PERF_FIXED_CTR(i) (0x309 + i) // fixed function PMC counter register
#define MSR_PERF_FIXED_CTR_CTRL 0x38D // fixed functiion PMC configuration register
#define PMC_FIXED_OS (1 << 0)
#define PMC_FIXED_USR (1 << 1)
#define PMV_FIXED_PMI (1 << 3)
/* For Intel Core 2 Duo the MSR_PERF_FIXED_CTRs are mapped as followed:
* MSR_PERF_FIXED_CTR(0) => INST_RETIRED.ANY
* MSR_PERF_FIXED_CTR(1) => CPU_CLK_UNHALTED.CORE
* MSR_PERF_FIXED_CTR(2) => CPU_CLK_UNHALTED.REF */
// architectural flags for IA32_PERFEVTSEL
#define PMC_EVTSEL_CMASK 24 // counter mask [31:24]
#define PMC_EVTSEL_UMASK 8 // unit mask [15:8]
#define PMC_EVTSEL_INC (1 << 23) // invert counter mask
#define PMC_EVTSEL_EN (1 << 22) // enable counters
#define PMC_EVTSEL_ANY (1 << 21) // any thread (from version 3 on)
#define PMC_EVTSEL_INT (1 << 20) // APIC interrupt enable
#define PMC_EVTSEL_PC (1 << 19) // pin control
#define PMC_EVTSEL_E (1 << 18) // edge detect
#define PMC_EVTSEL_OS (1 << 17) // operating system mode
#define PMC_EVTSEL_USR (1 << 16) // user mode
// Core 2 Duo non-architecural flags for IA32_PERFEVTSEL (bus snooping)
#define PMC_EVTSEL_HITM (1 << 11) // HITM response
#define PMC_EVTSEL_HIT (1 << 9) // HIT response
#define PMV_EVTSEL_CLEAN (1 << 8) // CLEAN response
// architecutral PMC events CPUID.0AH.EBX[6:0]
#define PMC_EVT_UNHALTED_CORE_CLKS 0x003C // UnHalted Core Cycles
#define PMC_EVT_UNHALTED_REF_CLKS 0x013C // UnHalted Reference Cycles
#define PMC_EVT_INST_RET 0x00C0 // Instruction Retired
#define PMC_EVT_LLC_REF 0x4F2E // LLC Reference
#define PMC_EVT_LLC_MISS 0x412E // LLC Misses
#define PMC_EVT_BRANCH_RET 0x00C4 // Branch Instruction Retired
#define PMC_EVT_BRANCH_MISS_RET 0x00C5 // Branch Miss Retired
// Core 2 Duo non-architecural PMC events
#define PMC_EVT_DTLB_MISS_ANY 0x0108 // Memory accesses that missed the TLB
#define PMC_EVT_DTLB_MISS_LD 0x0208 // DTLB misses due to load operations
#define PMC_EVT_DTLB_MISS_L0_LD 0x0408 // Level 0: DTLB misses due to load operations
#define PMC_EVT_DTLB_MISS_ST 0x0808 // DTLB misses due to store operations
#define PMC_EVT_ITLB_FLUSH 0x4082 // ITLB flushes
#define PMC_EVT_ITLB_MISS 0x1282 // ITLB misses (either large or small page)
#define PMC_EVT_ITLB_MISS_RET 0x00C9 // Retired instructions that missed the ITLB
#define PMC_EVT_ITLB_MISS_SMALL 0x0282 // ITLB small page misses
#define PMC_EVT_ITLB_MISS_LARGE 0x1082 // ITLB large page misses
#define PMC_EVT_PAGE_WALK_COUNT 0x010C // Number of page-walks executed
#define PMC_EVT_PAGE_WALK_CLKS 0x020C // Duration of page-walks in core cycles
#define PMC_EVT_MEM_LOAD_RETIRED_L1D_MISS 0x01CB // Retired loads that miss the L1 data cache (precise event)
#define PMC_EVT_MEM_LOAD_RETIRED_L1D_LINE_MISS 0x02CB // L1 data cache line missed by retired loads (precise event)
#define PMC_EVT_MEM_LOAD_RETIRED_L2_MISS 0x04CB // Retired loads that miss the L2 cache (precise event)
#define PMC_EVT_MEM_LOAD_RETIRED_L2_LINE_MISS 0x08CB // L2 cache line missed by retired loads (precise event)
#define PMC_EVT_MEM_LOAD_RETIRED_DTLB_MISS 0x10CB // Retired loads that miss the DTLB (precise event)
struct pmc {
uint8_t id;
void (*start)();
void (*stop)();
void (*reset)();
void (*write)(uint64_t val);
uint64_t (*read)();
};
struct pmc_caps {
/// Architecural PM version (CPUID.0AH:EAX[7:0])
uint8_t version;
/// Number of available General Purpose PMCs (CPUID.0AH:EAX[15:8])
uint8_t gp_count;
/// Number of available Fixed Function PMCs (CPUID.0AH.EDX[4:0])
uint8_t ff_count;
/// Counter bit width of General Purpose PMCs (CPUID.0AH:EAX[23:16])
uint8_t gp_width;
/// Counter bit width of Fixed Function PMCs (CPUID.0AH.EDX[12:5])
uint8_t ff_width;
/// Bit mask of supported architecural PMC events (CPUID.0AH.EBX[6:0])
uint32_t arch_events;
/// IA32_PERF_CAPABILITIES MSR
uint64_t msr;
};
/** @brief Queries the CPU about available Performance Monitoring capabilities
*
* @return A pointer to the capabilities struct
**/
struct pmc_caps* pmc_init();
/** @brief Setups and stops the general purpose PMCs
*
* @param i The counter number to configure (positive for gp PMCs, negative for ff PMCs)
* @param event A combined event number including the unit mask (PMC_EVT_*)
* @param flags Flags for the IA32_PERFEVTSEL registers (PMC_EVTSEL_*)
* @param umask A seperate Unitmask ORed with event
* @param cmask A optional counter mask value
* @return
* - 0 on success
* - else failure (invalid counter or flags)
*/
int pmc_gp_config(uint8_t i, uint16_t event, uint32_t flags, uint8_t umask, uint8_t cmask);
/** @brief Setups and stops the fixed function PMCs
*
* @param i The counter number to configure (positive for gp PMCs, negative for ff PMCs)
* @param flags Flags for the MSR_PERF_FIXED_CTR_CTRL register
* @return
* - 0 on success
* - else failure (invalid counter or flags)
*/
int pmc_ff_config(uint8_t i, uint8_t flags);
/** @brief Start a single general purpose PMC
*
* @param i The counter number
* @return
* - 0 on success
* - -EINVAL on invalid counter number
*/
inline int pmc_gp_start(uint8_t i);
/** @brief Stop a single general purpose PMC
*
* @param i The counter number
* @return
* - 0 on success
* - -EINVAL on invalid counter number
*/
inline int pmc_gp_stop(uint8_t i);
/** @brief Start a single fixed function PMC
*
* @param i The counter number
* @return
* - 0 on success
* - -EINVAL on invalid counter number
*/
inline int pmc_ff_start(uint8_t i);
/** @brief Stop a single fixed function PMC
*
* @param i The counter number
* @return
* - 0 on success
* - -EINVAL on invalid counter number
*/
inline int pmc_ff_stop(uint8_t i);
/** @brief Start all PMCs at the same time
*
* @return
* - 0 on success
* - -EINVAL on invalid counter number
*/
inline int pmc_start_all();
/** @brief Stop all PMCs at the same time
*
* @return
* - 0 on success
* - -EINVAL on invalid counter number
*/
inline int pmc_stop_all();
/** @brief Reset all PMCs to zero
*
* @return
* - 0 on success
* - -EINVAL on invalid counter number
*/
inline int pmc_reset_all();
/** @brief Read a single general purpose PMC
*
* @param i The counter number
* @return The counter value (see struct pmc_caps.gp_width)
*/
inline uint64_t pmc_gp_read(uint8_t i);
/** @brief Read a single fixed function PMC
*
* @param i The counter number
* @return The counter value (see struct pmc_caps.gp_width)
*/
inline uint64_t pmc_ff_read(uint8_t i);
/** @brief Write a single general purpose PMC value
*
* Not all architectures support full width writes to the PMCs.
* If bit 13 (FW_WRITE) in struct pmc_caps.msr is not set the PMC
* is updated with the 32 bit sign extended version of val!
*
* @param i The counter number
* @param val The counter value (see struct pmc_caps.gp_width)
*/
inline int pmc_gp_write(uint8_t i, uint64_t val);
/** @brief Write a single fixed function PMC value
*
* @param i The counter number
* @param val The counter value (see struct pmc_caps.ff_width)
*/
inline int pmc_ff_write(uint8_t i, uint64_t val);
#endif

View file

@ -39,124 +39,22 @@
extern "C" {
#endif
// CPUID.01H.EDX feature list
// feature list 1
#define CPU_FEATURE_FPU (1 << 0)
#define CPU_FEATUE_PSE (1 << 3)
#define CPU_FEATURE_MSR (1 << 5)
#define CPU_FEATURE_PAE (1 << 6)
#define CPU_FEATURE_APIC (1 << 9)
#define CPU_FEATURE_PGE (1 << 13)
#define CPU_FEATURE_PAT (1 << 16)
#define CPU_FEATURE_PSE36 (1 << 17)
#define CPU_FEATURE_MMX (1 << 23)
#define CPU_FEATURE_FXSR (1 << 24)
#define CPU_FEATURE_SSE (1 << 25)
#define CPU_FEATURE_SSE2 (1 << 26)
// CPUID.01H.ECX feature list
// feature list 2
#define CPU_FEATURE_X2APIC (1 << 21)
#define CPU_FEATURE_AVX (1 << 28)
#define CPU_FEATURE_HYPERVISOR (1 << 31)
// CPUID.80000001H:EDX feature list
#define CPU_FEATURE_NX (1 << 20)
#define CPU_FEATURE_1GBHP (1 << 26)
#define CPU_FEATURE_LM (1 << 29)
// x86 control registers
/// Protected Mode Enable
#define CR0_PE (1 << 0)
/// Monitor coprocessor
#define CR0_MP (1 << 1)
/// Enable FPU emulation
#define CR0_EM (1 << 2)
/// Task switched
#define CR0_TS (1 << 3)
/// Extension type of coprocessor
#define CR0_ET (1 << 4)
/// Enable FPU error reporting
#define CR0_NE (1 << 5)
/// Enable write protected pages
#define CR0_WP (1 << 16)
/// Enable alignment checks
#define CR0_AM (1 << 18)
/// Globally enables/disable write-back caching
#define CR0_NW (1 << 29)
/// Globally disable memory caching
#define CR0_CD (1 << 30)
/// Enable paging
#define CR0_PG (1 << 31)
/// Virtual 8086 Mode Extensions
#define CR4_VME (1 << 0)
/// Protected-mode Virtual Interrupts
#define CR4_PVI (1 << 1)
/// Disable Time Stamp Counter register (rdtsc instruction)
#define CR4_TSD (1 << 2)
/// Enable debug extensions
#define CR4_DE (1 << 3)
/// Enable hugepage support
#define CR4_PSE (1 << 4)
/// Enable physical address extension
#define CR4_PAE (1 << 5)
/// Enable machine check exceptions
#define CR4_MCE (1 << 6)
/// Enable global pages
#define CR4_PGE (1 << 7)
/// Enable Performance-Monitoring Counter
#define CR4_PCE (1 << 8)
/// Enable Operating system support for FXSAVE and FXRSTOR instructions
#define CR4_OSFXSR (1 << 9)
/// Enable Operating System Support for Unmasked SIMD Floating-Point Exceptions
#define CR4_OSXMMEXCPT (1 << 10)
/// Enable Virtual Machine Extensions, see Intel VT-x
#define CR4_VMXE (1 << 13)
/// Enable Safer Mode Extensions, see Trusted Execution Technology (TXT)
#define CR4_SMXE (1 << 14)
/// Enables the instructions RDFSBASE, RDGSBASE, WRFSBASE, and WRGSBASE
#define CR4_FSGSBASE (1 << 16)
/// Enables process-context identifiers
#define CR4_PCIDE (1 << 17)
/// Enable XSAVE and Processor Extended States
#define CR4_OSXSAVE (1 << 18)
/// Enable Supervisor Mode Execution Protection
#define CR4_SMEP (1 << 20)
/// Enable Supervisor Mode Access Protection
#define CR4_SMAP (1 << 21)
// x86-64 specific MSRs
/// extended feature register
#define MSR_EFER 0xc0000080
/// legacy mode SYSCALL target
#define MSR_STAR 0xc0000081
/// long mode SYSCALL target
#define MSR_LSTAR 0xc0000082
/// compat mode SYSCALL target
#define MSR_CSTAR 0xc0000083
/// EFLAGS mask for syscall
#define MSR_SYSCALL_MASK 0xc0000084
/// 64bit FS base
#define MSR_FS_BASE 0xc0000100
/// 64bit GS base
#define MSR_GS_BASE 0xc0000101
/// SwapGS GS shadow
#define MSR_KERNEL_GS_BASE 0xc0000102
// MSR EFER bits
#define EFER_SCE (1 << 0)
#define EFER_LME (1 << 8)
#define EFER_LMA (1 << 10)
#define EFER_NXE (1 << 11)
#define EFER_SVME (1 << 12)
#define EFER_LMSLE (1 << 13)
#define EFER_FFXSR (1 << 14)
#define EFER_TCE (1 << 15)
typedef struct {
uint32_t feature1, feature2, feature3;
uint32_t addr_width;
uint32_t feature1, feature2;
} cpu_info_t;
extern cpu_info_t cpu_info;
@ -209,16 +107,6 @@ inline static uint32_t on_hypervisor(void)
return (cpu_info.feature2 & CPU_FEATURE_HYPERVISOR);
}
inline static uint32_t has_pge(void)
{
return (cpu_info.feature1 & CPU_FEATURE_PGE);
}
inline static uint32_t has_nx(void)
{
return (cpu_info.feature3 & CPU_FEATURE_NX);
}
/** @brief Read out time stamp counter
*
* The rdtsc asm command puts a 64 bit time stamp value
@ -228,15 +116,9 @@ inline static uint32_t has_nx(void)
*/
inline static uint64_t rdtsc(void)
{
#ifdef CONFIG_X86_32
uint64_t x;
asm volatile ("rdtsc" : "=A" (x));
return x;
#elif defined(CONFIG_X86_64)
uint64_t lo, hi;
asm volatile ("rdtsc" : "=a"(lo), "=d"(hi) );
return (hi << 32 | lo);
#endif
}
/** @brief Flush cache
@ -391,7 +273,7 @@ int ipi_tlb_flush(void);
/** @brief Flush a specific page entry in TLB
* @param addr The (virtual) address of the page to flush
*/
static inline void tlb_flush_one_page(size_t addr)
static inline void tlb_flush_one_page(uint32_t addr)
{
asm volatile("invlpg (%0)" : : "r"(addr) : "memory");
#if MAX_CORES > 1
@ -400,7 +282,7 @@ static inline void tlb_flush_one_page(size_t addr)
* => User-level applications run only on one
* and we didn't flush the TLB of the other cores
*/
if (addr < KERNEL_SPACE)
if (addr <= KERNEL_SPACE)
ipi_tlb_flush();
#endif
}
@ -411,7 +293,7 @@ static inline void tlb_flush_one_page(size_t addr)
*/
static inline void tlb_flush(void)
{
size_t val = read_cr3();
uint32_t val = read_cr3();
if (val)
write_cr3(val);

View file

@ -26,6 +26,13 @@
extern "C" {
#endif
/** @brief Copy a physical page to another physical destination
*
* @param dest Destination address
* @param src Source address
*/
void copy_page_physical(void* dest, const void * src);
#ifdef HAVE_ARCH_MEMCPY
#ifdef CONFIG_ROCKCREEK

View file

@ -97,19 +97,17 @@ static inline int register_task(void)
*
* @return 0 in any case
*/
static inline int jump_to_user_code(size_t ep, size_t stack)
static inline int jump_to_user_code(uint32_t ep, uint32_t stack)
{
asm volatile ("push $0x23; push %0; push $0x1B; push %1" :: "r"(stack), "r"(ep)); // fake stack, see Intel Reference Manual, Vol 1, 6.3.6
#ifdef CONFIG_X86_32
asm volatile ("mov %0, %%ds; mov %0, %%fs; mov %0, %%gs; mov %0, %%es" :: "r"(0x23)); // update segment registers
asm volatile ("lret" ::: "cc"); // far return to user level code
#elif defined (CONFIG_X86_64)
asm volatile ("lretq" ::: "cc"); // far return to user level code
#endif
asm volatile ("mov %0, %%ds; mov %0, %%fs; mov %0, %%gs; mov %0, %%es" :: "r"(0x23));
asm volatile ("push $0x23; push %0; push $0x1B; push %1" :: "r"(stack), "r"(ep));
asm volatile ("lret" ::: "cc");
return 0;
#else
return -22;
#endif
}
#ifdef __cplusplus

View file

@ -1,74 +0,0 @@
/*
* Copyright 2010 Steffen Vogel, Chair for Operating Systems,
* RWTH Aachen University
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* This file is part of MetalSVM.
*/
/**
* @author Steffen Vogel
* @file arch/x86/include/asm/uart.h
* @brief COM port related code
*/
#ifndef __ARCH_UART_H__
#define __ARCH_UART_H__
#include <metalsvm/stddef.h>
#ifdef __cplusplus
extern "C" {
#endif
#ifdef CONFIG_UART
/** @brief Initialize VGA output and clear the screen */
void uart_init(void);
/** @brief Send a single character to the uart
*
* @return The original input character casted to int
*/
void uart_putchar(char c);
/** @brief Receive a single character on the uart
*
* @return The original input character casted to int
*/
char uart_getchar(void);
/** @brief Simple string output on screen.
*
* If you want a new line you will have to "\\n".
*
* @return Length of output in bytes
*/
void uart_puts(const char *str);
/** @brief Simple string output on screen.
*
* If you want a new line you will have to "\\n".
*
* @return Length of output in bytes
*/
int uart_gets(char *str, size_t len);
#endif
#ifdef __cplusplus
}
#endif
#endif

View file

@ -1,4 +1,4 @@
C_source := gdt.c kb.c timer.c irq.c isrs.c idt.c vga.c uart.c multiboot.c apic.c pci.c processor.c pmc.c
C_source := gdt.c kb.c timer.c irq.c isrs.c idt.c vga.c multiboot.c apic.c pci.c processor.c
ASM_source := entry$(BIT).asm string$(BIT).asm
MODULE := arch_x86_kernel

View file

@ -27,7 +27,7 @@
#include <metalsvm/init.h>
#include <metalsvm/page.h>
#include <metalsvm/spinlock.h>
#include <metalsvm/memory.h>
#include <metalsvm/mmu.h>
#include <metalsvm/tasks.h>
#include <asm/irq.h>
#include <asm/idt.h>
@ -387,21 +387,19 @@ void smp_start(uint32_t id)
kprintf("Application processor %d is entering its idle task\n", apic_cpu_id());
#ifdef CONFIG_X86_32
// initialization for x86_64 is done in smp_entry()
// initialize default cpu features
cpu_init();
#endif
// use the same gdt like the boot processors
gdt_flush();
// install IDT
idt_install();
// On 64bit system, paging is already enabled
#ifdef CONFIG_X86_32
/* enable paging */
write_cr3((size_t) get_boot_page_map());
write_cr3((size_t)get_boot_pgd());
i = read_cr0();
i = i | (1 << 31);
write_cr0(i);
@ -462,8 +460,7 @@ int smp_init(void)
for(i=1; (i<ncores) && (i<MAX_CORES); i++)
{
/*
* Dirty hack aka. runtime linking:
* Copy 16bit startup code (see tools/smp_setup.asm)
* dirty hack: Copy 16bit startup code (see tools/smp_setup.asm)
* to a 16bit address. Wakeup the other cores via IPI. They start
* at this address in real mode, switch to protected and finally
* they jump to smp_main.
@ -479,9 +476,9 @@ int smp_init(void)
#ifdef CONFIG_X86_32
*((uint32_t*) (bootaddr+j)) = (uint32_t) smp_start;
kprintf("Set entry point of the application processors at 0x%x\n", (uint32_t) smp_start);
#elif defined(CONFIG_X86_64)
#else
*((uint32_t*) (bootaddr+j)) = (uint32_t) smp_entry;
kprintf("Set entry point of the application processors at 0x%x\n", (uint32_t) smp_entry);
kprintf("Set entry point of the application processors at 0x%lx\n", (size_t) smp_entry);
#endif
}
@ -495,13 +492,12 @@ int smp_init(void)
*((uint32_t*) (bootaddr+j)) = (uint32_t) esp;
if ((int) esp < 0)
kprintf("Invalid stack value\n");
kprintf("Set stack of the application processors to 0x%x\n", esp);
}
}
//kprintf("Size of the boot_code: %d\n", sizeof(boot_code));
err = wakeup_ap((uint32_t) bootaddr, i);
//kprintf("size of the boot_code %d\n", sizeof(boot_code));
err = wakeup_ap((uint32_t)bootaddr, i);
if (err)
kprintf("Unable to wakeup application processor %d: %d\n", i, err);
@ -557,33 +553,30 @@ int map_apic(void)
lapic = map_region(0 /*lapic*/, lapic, 1, MAP_KERNEL_SPACE|MAP_NO_CACHE);
if (BUILTIN_EXPECT(!lapic, 0))
return -ENXIO;
#else
if (lapic != (size_t)&kernel_start - 0x1000) {
kprintf("Upps! Kernel has to remap LAPIC!\n");
lapic = map_region(0 /*lapic*/, lapic, 1, MAP_KERNEL_SPACE|MAP_NO_CACHE);
if (BUILTIN_EXPECT(!lapic, 0))
return -ENXIO;
}
#endif
vma_add(LAPIC_ADDR, LAPIC_ADDR + PAGE_SIZE, VMA_READ|VMA_WRITE);
vma_add(PAGE_CEIL((size_t) apic_config),
PAGE_FLOOR((size_t) apic_config + sizeof(apic_config_table_t)),
VMA_READ|VMA_WRITE|VMA_EXECUTE|VMA_CACHEABLE);
vma_add(PAGE_CEIL((size_t) apic_mp),
PAGE_FLOOR((size_t) apic_mp + sizeof(apic_mp_t)),
VMA_READ|VMA_WRITE|VMA_EXECUTE|VMA_CACHEABLE);
kprintf("Mapped LAPIC at 0x%x\n", lapic);
if (ioapic) {
ioapic = (ioapic_t*) map_region(IOAPIC_ADDR, (size_t) ioapic, 1, MAP_KERNEL_SPACE|MAP_NO_CACHE);
vma_add(IOAPIC_ADDR, IOAPIC_ADDR + PAGE_SIZE, VMA_READ|VMA_WRITE);
kprintf("Mapped IOAPIC at %p\n", ioapic);
kprintf("IOAPIC version: 0x%x\n", ioapic_version());
kprintf("Max Redirection Entry: %u\n", ioapic_max_redirection_entry());
size_t old = 0;
ioapic = (ioapic_t*) map_region(0 /*(size_t)ioapic*/, (size_t) ioapic, 1, MAP_KERNEL_SPACE|MAP_NO_CACHE);
kprintf("Mapped IOAPIC at 0x%x\n", ioapic);
// map all processor entries
size_t old = 0;
for(i=0; i<MAX_CORES; i++) {
if (apic_processors[i] && (old != (((size_t) apic_processors[i]) & PAGE_MASK))) {
old = map_region(((size_t) apic_processors[i]) & PAGE_MASK, ((size_t) apic_processors[i]) & PAGE_MASK, 1, MAP_REMAP|MAP_KERNEL_SPACE|MAP_NO_CACHE);
}
if (apic_processors[i] && (old != (((size_t)apic_processors[i]) & 0xFFFFF000)))
old = map_region(((size_t) apic_processors[i]) & 0xFFFFF000, ((size_t) apic_processors[i]) & 0xFFFFF000, 1, MAP_REMAP|MAP_KERNEL_SPACE|MAP_NO_CACHE);
}
kprintf("IOAPIC version: 0x%x\n", ioapic_version());
kprintf("Max Redirection Entry: %u\n", ioapic_max_redirection_entry());
}
return 0;
@ -723,19 +716,20 @@ static int apic_probe(void)
goto found_mp;
}
}
}
mmap++;
}
}
#endif
found_mp:
#endif
found_mp:
if (!apic_mp)
goto no_mp;
kprintf("Found MP config table at 0x%x\n", apic_mp);
kprintf("System uses MultiProcessing Specification 1.%u\n", apic_mp->version);
kprintf("MultiProcessing features 1: %u\n", apic_mp->features[0]);
kprintf("System uses Multiprocessing Specification 1.%u\n", apic_mp->version);
kprintf("MP features 1: %u\n", apic_mp->features[0]);
if (apic_mp->features[0]) {
kputs("Currently, MetalSVM supports only multiprocessing via the MP config tables!\n");
@ -743,75 +737,67 @@ found_mp:
}
apic_config = (apic_config_table_t*) ((size_t) apic_mp->mp_config);
if (!apic_config || strncmp((void*) &apic_config->signature, "PCMP", 4) != 0) {
if (!apic_config || strncmp((void*) &apic_config->signature, "PCMP", 4) !=0) {
kputs("Invalid MP config table\n");
goto no_mp;
}
kprintf("MultiProcessing OEM: %.8s\n", apic_config->oem_id);
kprintf("MultiProcessing product: %.12s\n", apic_config->product_id);
addr = (size_t) apic_config;
addr += sizeof(apic_config_table_t);
if (addr % 4)
addr += 4 - addr % 4;
// search the ISA bus => required to redirect the IRQs
for(i=0; i<apic_config->entry_count; i++) {
switch(*((uint8_t*) addr)) {
case 0:
addr += 20;
break;
case 1: {
apic_bus_entry_t* mp_bus;
mp_bus = (apic_bus_entry_t*) addr;
if (mp_bus->name[0] == 'I' && mp_bus->name[1] == 'S' &&
mp_bus->name[2] == 'A')
isa_bus = i;
}
default:
addr += 8;
}
}
addr = (size_t) apic_config;
addr += sizeof(apic_config_table_t);
if (addr % 4)
addr += 4 - addr % 4; // align to dword
addr += 4 - addr % 4;
for(i=0, count=0; i<apic_config->entry_count; i++) {
uint8_t type = * (uint8_t*) addr;
switch (type) {
case 0: { // CPU
apic_processor_entry_t* cpu = (apic_processor_entry_t*) addr;
if (i < MAX_CORES) {
if (cpu->cpu_flags & 0x01) // enabled?
apic_processors[i] = cpu;
if (cpu->cpu_flags & 0x03) // enabled boot processor?
boot_processor = i;
}
kprintf("Found CPU %d with id = %#4x, version = %#4x, signature = %#6x, feature = %#6x\n",
count, cpu->id, cpu->version, cpu->cpu_signature, cpu->cpu_feature);
count++;
}
addr += 20;
break;
case 1: { // BUS
apic_bus_entry_t* bus = (apic_bus_entry_t*) addr;
kprintf("Found %.6s bus with id %u\n", bus->name, bus->bus_id);
if (!strncmp(bus->name, "ISA", 3))
isa_bus = bus->bus_id;
}
addr += 8;
break;
case 2: {// IO APIC
apic_io_entry_t* io_entry = (apic_io_entry_t*) addr;
ioapic = (ioapic_t*) ((size_t) io_entry->addr);
kprintf("Found IOAPIC at 0x%x\n", ioapic);
}
addr += 8;
break;
case 3: {// IO INT
apic_ioirq_entry_t* extint = (apic_ioirq_entry_t*) addr;
if (extint->src_bus == isa_bus) {
irq_redirect[extint->src_irq] = extint->dest_intin;
kprintf("Redirect interrupt %u -> %u\n", extint->src_irq, extint->dest_intin);
}
}
addr += 8;
break;
case 4: // Local INT
default:
addr += 8;
}
if (*((uint8_t*) addr) == 0) { // cpu entry
if (i < MAX_CORES) {
apic_processors[i] = (apic_processor_entry_t*) addr;
if (!(apic_processors[i]->cpu_flags & 0x01)) // is the processor usable?
apic_processors[i] = NULL;
else if (apic_processors[i]->cpu_flags & 0x02)
boot_processor = i;
}
count++;
addr += 20;
} else if (*((uint8_t*) addr) == 2) { // IO_APIC
apic_io_entry_t* io_entry = (apic_io_entry_t*) addr;
ioapic = (ioapic_t*) ((size_t) io_entry->addr);
addr += 8;
kprintf("Found IOAPIC at 0x%x\n", ioapic);
} else if (*((uint8_t*) addr) == 3) { // IO_INT
apic_ioirq_entry_t* extint = (apic_ioirq_entry_t*) addr;
if (extint->src_bus == isa_bus) {
irq_redirect[extint->src_irq] = extint->dest_intin;
kprintf("Redirect irq %u -> %u\n", extint->src_irq, extint->dest_intin);
}
addr += 8;
} else addr += 8;
}
kprintf("Found %u cores\n", count);
if (count > MAX_CORES) {
kputs("Found too many cores! Increase the macro MAX_CORES!\n");
goto no_mp;
@ -836,8 +822,8 @@ check_lapic:
kprintf("Found APIC at 0x%x\n", lapic);
#ifdef CONFIG_X86_64
// On a x64 system, we already mapped the LAPIC at LAPIC_ADDR
lapic = LAPIC_ADDR;
// On a x64 system, we already map the lapic below the kernel
lapic = (size_t)&kernel_start - 0x1000;
#endif
kprintf("Maximum LVT Entry: 0x%x\n", apic_lvt_entries());
kprintf("APIC Version: 0x%x\n", apic_version());

View file

@ -29,6 +29,7 @@
SECTION .mboot
global start
start:
mov byte [msg], 'H'
jmp stublet
; This part MUST be 4byte aligned, so we solve that issue using 'ALIGN 4'
@ -37,10 +38,10 @@ mboot:
; Multiboot macros to make a few lines more readable later
MULTIBOOT_PAGE_ALIGN equ 1<<0
MULTIBOOT_MEMORY_INFO equ 1<<1
; MULTIBOOT_AOUT_KLUDGE equ 1<<16
; MULTIBOOT_AOUT_KLUDGE equ 1<<16
MULTIBOOT_HEADER_MAGIC equ 0x1BADB002
MULTIBOOT_HEADER_FLAGS equ MULTIBOOT_PAGE_ALIGN | MULTIBOOT_MEMORY_INFO ; | MULTIBOOT_AOUT_KLUDGE
MULTIBOOT_CHECKSUM equ -(MULTIBOOT_HEADER_MAGIC + MULTIBOOT_HEADER_FLAGS)
MULTIBOOT_CHECKSUM equ -(MULTIBOOT_HEADER_MAGIC + MULTIBOOT_HEADER_FLAGS)
EXTERN code, bss, end
; This is the GRUB Multiboot header. A boot signature
@ -48,6 +49,8 @@ mboot:
dd MULTIBOOT_HEADER_FLAGS
dd MULTIBOOT_CHECKSUM
msg db "?ello from MetalSVM kernel!!", 0
SECTION .text
ALIGN 4
stublet:
@ -67,7 +70,7 @@ stublet:
; jump to the boot processors's C code
extern main
call main
jmp $ ; infinitive loop
jmp $
global cpu_init
cpu_init:
@ -109,7 +112,7 @@ global read_ip
read_ip:
mov eax, [esp+4]
pop DWORD [eax] ; Get the return address
add esp, 4 ; Dirty Hack! read_ip cleanup the stack
add esp, 4 ; Dirty Hack! read_ip cleanup the stacl
jmp [eax] ; Return. Can't use RET because return
; address popped off the stack.

View file

@ -30,7 +30,7 @@ extern kernel_end
extern apic_mp
; We use a special name to map this section at the begin of our kernel
; => Multiboot needs its magic number at the beginning of the kernel
; => Multiboot needs its magic number at the begin of the kernel
SECTION .mboot
global start
start:
@ -42,19 +42,19 @@ mboot:
; Multiboot macros to make a few lines more readable later
MULTIBOOT_PAGE_ALIGN equ 1<<0
MULTIBOOT_MEMORY_INFO equ 1<<1
; MULTIBOOT_AOUT_KLUDGE equ 1<<16
; MULTIBOOT_AOUT_KLUDGE equ 1<<16
MULTIBOOT_HEADER_MAGIC equ 0x1BADB002
MULTIBOOT_HEADER_FLAGS equ MULTIBOOT_PAGE_ALIGN | MULTIBOOT_MEMORY_INFO ; | MULTIBOOT_AOUT_KLUDGE
MULTIBOOT_CHECKSUM equ -(MULTIBOOT_HEADER_MAGIC + MULTIBOOT_HEADER_FLAGS)
MULTIBOOT_CHECKSUM equ -(MULTIBOOT_HEADER_MAGIC + MULTIBOOT_HEADER_FLAGS)
EXTERN code, bss, end
; This is the GRUB Multiboot header. A boot signature
dd MULTIBOOT_HEADER_MAGIC
dd MULTIBOOT_HEADER_FLAGS
dd MULTIBOOT_CHECKSUM
ALIGN 4
; we need already a valid GDT to switch in the 64bit mode
; we need already a valid GDT to switch in the 64bit modus
GDT64: ; Global Descriptor Table (64-bit).
.Null: equ $ - GDT64 ; The null descriptor.
dw 0 ; Limit (low).
@ -81,90 +81,112 @@ GDT64: ; Global Descriptor Table (64-bit).
dw $ - GDT64 - 1 ; Limit.
dq GDT64 ; Base.
times 256 DD 0 ; Stack for booting
times 256 DD 0
startup_stack:
PAGE_MAP_ENTRIES equ (1<<9)
PAGE_SIZE equ (1<<12)
SECTION .data
; Create default page tables for the 64bit kernel
global boot_pml4
ALIGN PAGE_SIZE ; of course, the page tables have to be page aligned
boot_pml4 times PAGE_MAP_ENTRIES DQ 0
boot_pdpt times PAGE_MAP_ENTRIES DQ 0
boot_pgd times PAGE_MAP_ENTRIES DQ 0
boot_pgt times (KERNEL_SPACE/PAGE_SIZE) DQ 0
; create default page tables for the 64bit kernel
global boot_pgd ; aka PML4
ALIGN 4096 ; of course, the page tables have to be page aligned
NOPTS equ 512
boot_pgd times 512 DQ 0
boot_pdpt times 512 DQ 0
boot_pd times 512 DQ 0
boot_pt times (NOPTS*512) DQ 0
SECTION .text
ALIGN 8
%if MAX_CORES > 1
global smp_entry
smp_entry:
; Initialize cpu features
call cpu_init
; Initialize cr3 register
mov edi, boot_pml4
; enable caching, disable paging and fpu emulation
and eax, 0x1ffffffb
; ...and turn on FPU exceptions
or eax, 0x22
mov cr0, eax
; clears the current pgd entry
xor eax, eax
mov cr3, eax
; at this stage, we disable the SSE support
mov eax, cr4
and eax, 0xfffbf9ff
mov cr4, eax
; initialize page table
mov edi, boot_pgd
mov cr3, edi
; Enable PAE
; we need to enable PAE modus
mov eax, cr4
or eax, 1 << 5
mov cr4, eax
; Enable longmode (compatibility mode)
; switch to the compatibility mode (which is part of long mode)
mov ecx, 0xC0000080
rdmsr
or eax, (1 << 8) | (1 << 11) ; IA32_EFER.LME = 1, IA32_EFER.NXE = 1
or eax, 1 << 8
wrmsr
; Enable paging
; enable paging
mov eax, cr0
or eax, 1 << 31 | 1 << 0 ; Set the PG-bit, which is the 31nd bit, and the PE-bit, which is the 0th bit.
mov cr0, eax ; According to the multiboot spec the PE-bit has to be set by bootloader already!
or eax, 1 << 31 | 1 << 0 ; Set the PG-bit, which is the 31nd bit, and the PM-bit, which is the 0th bit.
mov cr0, eax
; Jump to 64-bit longmode
mov edi, [esp+4] ; Set argumet for smp_start
mov edi, [esp+4] ; set argumet for smp_start
lgdt [GDT64.Pointer] ; Load the 64-bit global descriptor table.
jmp GDT64.Code:smp_start64 ; Set the code segment and enter 64-bit long mode.
jmp $ ; endless loop
%endif
; Search MP Floating Pointer Structure
search_mps:
search_apic:
push ebp
mov ebp, esp
push ecx
xor eax, eax
mov ecx, [ebp+8]
.l1:
L1:
cmp [ecx], DWORD 0x5f504d5f ; MP_FLT_SIGNATURE
jne .l2
jne L2
mov al, BYTE [ecx+9]
cmp eax, 4
ja .l2
ja L2
mov al, BYTE [ecx+11]
cmp eax, 0
jne .l2
jne L2
mov eax, ecx
jmp .l3
jmp L3
.l2:
L2:
add ecx, 4
cmp ecx, [ebp+12]
jb .l1
jb L1
xor eax, eax
.l3:
L3:
pop ecx
pop ebp
ret
check_longmode:
; Check for cpuid instruction
ALIGN 4
stublet:
mov esp, startup_stack-4
push ebx ; save pointer to the multiboot structure
mov eax, cr0
; enable caching, disable paging and fpu emulation
and eax, 0x1ffffffb
; ...and turn on FPU exceptions
or eax, 0x22
mov cr0, eax
; clears the current pgd entry
xor eax, eax
mov cr3, eax
; at this stage, we disable the SSE support
mov eax, cr4
and eax, 0xfffbf9ff
mov cr4, eax
; do we have the instruction cpuid?
pushfd
pop eax
mov ecx, eax
@ -176,22 +198,59 @@ check_longmode:
push ecx
popfd
xor eax, ecx
jz .unsupported
; Check for extended cpu features (cpuid > 0x80000000)
jz Linvalid
; cpuid > 0x80000000?
mov eax, 0x80000000
cpuid
cmp eax, 0x80000001
jb .unsupported ; It is less, there is no long mode.
; Check if longmode is supported
jb Linvalid ; It is less, there is no long mode.
; do we have a long mode?
mov eax, 0x80000001
cpuid
test edx, 1 << 29 ; Test if the LM-bit, which is bit 29, is set in the D-register.
jz .unsupported ; They aren't, there is no long mode.
ret
.unsupported:
jmp $
jz Linvalid ; They aren't, there is no long mode.
check_lapic:
; initialize page table
mov edi, boot_pgd
mov cr3, edi
; So lets make PML4T[0] point to the PDPT and so on:
mov DWORD [edi], boot_pdpt ; Set the double word at the destination index to pdpt.
or DWORD [edi], 0x00000003 ; Set present and writeable bit
mov edi, boot_pdpt
mov DWORD [edi], boot_pd ; Set the double word at the destination index to pd.
or DWORD [edi], 0x00000003 ; Set present and writeable bit
mov edi, boot_pd
mov ebx, boot_pt
mov ecx, NOPTS
L0:
mov DWORD [edi], ebx ; Set the double word at the destination index to pt.
or DWORD [edi], 0x00000003 ; Set present and writeable bit
add edi, 8
add ebx, 0x1000
loop L0
%ifdef CONFIG_VGA
; map the VGA address into the virtual address space
mov edi, 0xB8000
shr edi, 9 ; (edi >> 12) * 8
add edi, boot_pt
mov ebx, 0xB8000
or ebx, 0x00000013
mov DWORD [edi], ebx
%endif
; map multiboot structure into the virtual address space
mov edi, [esp]
and edi, 0xFFFFF000
shr edi, 9 ; (edi >> 12) * 8
add edi, boot_pt
mov ebx, [esp]
and ebx, 0xFFFFF000
or ebx, 0x00000003
mov DWORD [edi], ebx
; check if lapic is available
push eax
push ebx
push ecx
@ -200,196 +259,123 @@ check_lapic:
cpuid
and edx, 0x200
cmp edx, 0
je .unsupported
; Map lapic at 0xFEE00000
mov edi, LAPIC_ADDR
je no_lapic
; map lapic at 0xFEE00000 below the kernel
mov edi, kernel_start - 0x1000
shr edi, 9 ; (edi >> 12) * 8
add edi, boot_pgt
mov ebx, 0xFEE00000 ; LAPIC base address
or ebx, 0x113 ; set present, global, writable and cache disable bits
add edi, boot_pt
mov ebx, 0xFEE00000
or ebx, 0x00000013
mov DWORD [edi], ebx
mov DWORD [edi+4], 0x80000000 ; set execution disable bit in higher half
.unsupported:
no_lapic:
pop edx
pop ecx
pop ebx
pop eax
ret
cpu_init:
mov eax, cr0
; Enable caching, disable paging and fpu emulation
and eax, 0x1ffffffb
; ...and turn on FPU exceptions
or eax, 0x22
mov cr0, eax
; Clear the current pgd entry
xor eax, eax
mov cr3, eax
; at this stage, we disable the SSE support
mov eax, cr4
and eax, 0xfffbf9ff
mov cr4, eax
ret
; Identity map a single page at address eax
identity_page:
push edi
push ebx
mov edi, eax
and edi, 0xFFFFF000 ; page align in lower half
shr edi, 9 ; (edi >> 12) * 8 (index for boot_pgt)
add edi, boot_pgt
mov ebx, eax
and ebx, 0xFFFFF000 ; page align lower half
or ebx, 0x113 ; set present, global, writable and cache disable bits
mov DWORD [edi], ebx
mov DWORD [edi+4], 0x80000000 ; set execution disable bit in higher half
pop ebx
pop edi
ret
ALIGN 4
stublet:
mov esp, startup_stack-4
; Save pointer to the Multiboot structure
push ebx
; Initialize cpu features
call cpu_init
; Check if longmode is supported
call check_longmode
; Check if lapic is available
call check_lapic
; Find MP Floating Pointer structure
; search APIC
push DWORD 0x100000
push DWORD 0xF0000
call search_mps
call search_apic
add esp, 8
cmp eax, 0
jne map_mps
jne La
push DWORD 0xA0000
push DWORD 0x9F000
call search_mps
call search_apic
add esp, 8
cmp eax, 0
je map_vga
je Lb
map_mps:
; Map MP Floating Pointer structure
La:
; map MP Floating Pointer Structure
mov DWORD [apic_mp], eax
call identity_page
; Map MP Configuration table
mov eax, [eax+4] ; Offset for physical address of MP table
call identity_page
map_vga:
%ifdef CONFIG_VGA
; Map VGA textmode plane
mov eax, 0xB8000
call identity_page
%endif
map_multiboot:
; Map Multiboot structure
mov eax, [esp] ; Pointer is still on the stack
call identity_page
; Map Multiboot memory map
test DWORD [eax], (1 << 6) ; check if mmap_* fields are valid
jz init_paging
mov eax, [eax+48]
call identity_page
init_paging:
mov edi, boot_pml4
mov cr3, edi
mov DWORD [edi], boot_pdpt
or DWORD [edi], 0x103 ; Set present, global and writable flags
mov edi, boot_pdpt
mov DWORD [edi], boot_pgd
or DWORD [edi], 0x103 ; Set present, global and writable flags
mov edi, boot_pgd
mov ebx, boot_pgt
mov ecx, PAGE_MAP_ENTRIES ; Map all boot_pgt to the kernel space
.l1:
mov edi, eax
and edi, 0xFFFFF000
shr edi, 9 ; (edi >> 12) * 8
add edi, boot_pt
mov ebx, eax
and ebx, 0xFFFFF000
or ebx, 0x00000013
mov DWORD [edi], ebx
or DWORD [edi], 0x103 ; Set present, global and writable flags
add edi, 8
add ebx, 0x1000
loop .l1
map_kernel:
; map mp_config
mov edi, [eax+4]
and edi, 0xFFFFF000
shr edi, 9 ; (edi >> 12) * 8
add edi, boot_pt
mov ebx, [eax+4]
and ebx, 0xFFFFF000
or ebx, 0x00000013
mov DWORD [edi], ebx
Lb:
mov edi, kernel_start
shr edi, 9 ; (edi >> 12) * 8 (index for boot_pgt)
add edi, boot_pgt
shr edi, 9 ; (kernel_start >> 12) * 8
add edi, boot_pt
mov ebx, kernel_start
or ebx, 0x103 ; set present, global and writable flags
or ebx, 0x00000003
mov ecx, kernel_end ; determine kernel size in number of pages
sub ecx, kernel_start
shr ecx, 12
inc ecx
.l1:
mov DWORD [edi], ebx
Lc:
mov DWORD [edi], ebx ; Set the double word at the destination index to the B-register.
add edi, 8
add ebx, 0x1000
loop .l1
loop Lc
; Enable PAE
; we need to enable PAE modus
mov eax, cr4
or eax, 1 << 5
mov cr4, eax
; Enable longmode (compatibility mode)
; switch to the compatibility mode (which is part of long mode)
mov ecx, 0xC0000080
rdmsr
or eax, (1 << 8) | (1 << 11) ; IA32_EFER.LME = 1, IA32_EFER.NXE = 1
or eax, 1 << 8
wrmsr
; Enable paging
; enable paging
mov eax, cr0
or eax, (1 << 31) | (1 << 0) ; Set the PG-bit, which is the 31nd bit, and the PE-bit, which is the 0th bit.
or eax, 1 << 31 | 1 << 0 ; Set the PG-bit, which is the 31nd bit, and the PM-bit, which is the 0th bit.
mov cr0, eax
; Jump to 64-bit longmode
pop ebx ; Restore pointer to multiboot structure
lgdt [GDT64.Pointer] ; Load the 64-bit global descriptor table.
jmp GDT64.Code:start64 ; Set the code segment and enter 64-bit long mode.
pop ebx ; restore pointer to multiboot structure
lgdt [GDT64.Pointer] ; Load the 64-bit global descriptor table.
jmp GDT64.Code:start64 ; Set the code segment and enter 64-bit long mode.
Linvalid:
jmp $
[BITS 64]
start64:
; Initialize segment registers
; initialize segment registers
mov ax, GDT64.Data
mov ds, ax
mov es, ax
mov fs, ax
mov gs, ax
mov ss, ax
; Set default stack pointer
; set default stack pointer
mov rsp, boot_stack
add rsp, KERNEL_STACK_SIZE-16
; Interpret multiboot information
; interpret multiboot information
extern multiboot_init
mov rdi, rbx
call multiboot_init
; Jump to the boot processors's C code
; jump to the boot processors's C code
extern main
call main
jmp $
%if MAX_CORES > 1
smp_start64:
; Initialize segment registers
; initialize segment registers
mov ax, GDT64.Data
mov ds, ax
mov es, ax
@ -397,12 +383,29 @@ smp_start64:
mov gs, ax
mov ss, ax
; Jump to the boot processors's C code
; jump to the boot processors's C code
extern smp_start
call smp_start
jmp $
%endif
global cpu_init
cpu_init:
; mov eax, cr0
; enable caching, disable paging and fpu emulation
; and eax, 0x1ffffffb
; ...and turn on FPU exceptions
; or eax, 0x22
; mov cr0, eax
; clears the current pgd entry
; xor eax, eax
; mov cr3, eax
; at this stage, we disable the SSE support
; mov eax, cr4
; and eax, 0xfffbf9ff
; mov cr4, eax
; ret
; This will set up our new segment registers and is declared in
; C as 'extern void gdt_flush();'
global gdt_flush
@ -411,6 +414,13 @@ gdt_flush:
lgdt [gp]
ret
; determines the current instruction pointer (after the jmp)
global read_eip
read_eip:
pop rax ; Get the return address
jmp rax ; Return. Can't use RET because return
; address popped off the stack.
; In just a few pages in this tutorial, we will add our Interrupt
; Service Routines (ISRs) right here!
global isr0
@ -732,41 +742,41 @@ extern syscall_handler
; used to realize system calls
isrsyscall:
cli ; disable interrupts during prologue
; save caller saved registers
push r15
push r14
push r13
push r12
push r11
push r10
push r9
push r8
push rdi
push rsi
push rbp
push rsp
push rbx
push rdx
push rcx
push rax
; set kernel data segmenets
mov ax, 0x10
mov ds, ax
; x86-64 ABI calling convention (see newlib/src/libgloss/syscall.h)
mov r8, rbx
mov r9, rax
mov rax, 0 ; we've not used vector registers for this va_arg call
sti ; enable interrupts during syscall
mov rdi, rsp
call syscall_handler
cli ; disable interrupts during prologue
; restore caller saved registers
pop rax
pop rcx
pop rdx
pop rbx
add rsp, 8
pop rbp
pop rsi
pop rdi
pop r8
pop r9
pop r10
pop r11
pop r12
pop r13
pop r14
iretq
global irq0

View file

@ -50,7 +50,7 @@ size_t* get_current_stack(void)
#endif
// use new page table
write_cr3(virt_to_phys((size_t) curr_task->page_map));
write_cr3(virt_to_phys((size_t)curr_task->pgd));
return curr_task->last_stack_pointer;
}
@ -59,7 +59,7 @@ int arch_fork(task_t* task)
{
struct state* state;
task_t* curr_task = per_core(current_task);
size_t state_size;
size_t esp, state_size;
if (BUILTIN_EXPECT(!task, 0))
return -EINVAL;
@ -78,7 +78,6 @@ int arch_fork(task_t* task)
memcpy(task->stack, curr_task->stack, KERNEL_STACK_SIZE);
#ifdef CONFIG_X86_32
size_t esp;
asm volatile ("mov %%esp, %0" : "=m"(esp));
esp -= (size_t) curr_task->stack;
esp += (size_t) task->stack;
@ -108,48 +107,8 @@ int arch_fork(task_t* task)
// This will be the entry point for the new task. read_ip cleanups the stack
asm volatile ("push %0; call read_ip" :: "r"(&state->eip) : "%eax");
#else
size_t rsp;
asm volatile ("mov %%rsp, %0" : "=m"(rsp));
rsp -= (size_t) curr_task->stack;
rsp += (size_t) task->stack;
state = (struct state*) (rsp - state_size);
//memset(state, 0x00, state_size);
asm volatile ("push %rax");
asm volatile ("push %rcx");
asm volatile ("push %rdx");
asm volatile ("push %rbx");
asm volatile ("push %rbp");
asm volatile ("push %rsi");
asm volatile ("push %rdi");
asm volatile ("push %r8");
asm volatile ("push %r9");
asm volatile ("push %r10");
asm volatile ("push %r11");
asm volatile ("pop %0" : "=m"(state->r11));
asm volatile ("pop %0" : "=m"(state->r10));
asm volatile ("pop %0" : "=m"(state->r9));
asm volatile ("pop %0" : "=m"(state->r8));
asm volatile ("pop %0" : "=m"(state->rdi));
asm volatile ("pop %0" : "=m"(state->rsi));
asm volatile ("pop %0" : "=m"(state->rbp));
asm volatile ("pop %0" : "=m"(state->rbx));
asm volatile ("pop %0" : "=m"(state->rdx));
asm volatile ("pop %0" : "=m"(state->rcx));
asm volatile ("pop %0" : "=m"(state->rax));
state->rsp = rsp;
task->last_stack_pointer = (size_t*) state;
state->int_no = 0xB16B00B5;
state->error = 0xC03DB4B3;
state->cs = 0x08;
state->ss = 0x10;
asm volatile ("pushf; pop %0" : "=m"(state->rflags)); // store the current RFLAGS
asm volatile ("leaq (%%rip), %0;": "=r"(state->rip)); // store current instruction pointer
state->rflags |= (1 << 9); // enable interrupts
#warning Currently, not supported!
return -1;
#endif
return 0;
@ -307,7 +266,7 @@ void gdt_install(void)
gdt_set_gate(2, 0, limit,
GDT_FLAG_RING0 | GDT_FLAG_SEGMENT | GDT_FLAG_DATASEG | GDT_FLAG_PRESENT,
GDT_FLAG_4K_GRAN | mode);
/*
* Create code segement for userspace applications (ring 3)
*/

View file

@ -192,8 +192,8 @@ static const char *exception_messages[] = {
"Breakpoint", "Into Detected Overflow", "Out of Bounds", "Invalid Opcode",
"No Coprocessor", "Double Fault", "Coprocessor Segment Overrun", "Bad TSS",
"Segment Not Present", "Stack Fault", "General Protection Fault", "Page Fault",
"Unknown Interrupt", "Math Fault", "Alignment Check", "Machine Check",
"SIMD Floating-Point", "Virtualization", "Reserved", "Reserved", "Reserved",
"Unknown Interrupt", "Coprocessor Fault", "Alignment Check", "Machine Check",
"Reserved", "Reserved", "Reserved", "Reserved", "Reserved",
"Reserved", "Reserved", "Reserved", "Reserved", "Reserved", "Reserved",
"Reserved", "Reserved" };
@ -208,18 +208,13 @@ static const char *exception_messages[] = {
static void fault_handler(struct state *s)
{
if (s->int_no < 32) {
task_t* task = per_core(current_task);
kputs(exception_messages[s->int_no]);
#ifdef CONFIG_X86_32
kprintf("%s Exception (%d) at cs:eip = %#x:%#lx, core = %u, task = %u, error = %#x\n",
"Register state: eflags = %#lx, eax = %#lx, ebx = %#lx, ecx = %#lx, edx = %#lx, edi = %#lx, esi = %#lx, ebp = %#llx, esp = %#lx\n",
exception_messages[s->int_no], s->int_no, s->cs, s->eip, CORE_ID, task->id, s->error,
s->eflags, s->eax, s->ebx, s->ecx, s->edx, s->edi, s->esi, s->ebp, s->esp);
kprintf(" Exception (%d) at 0x%x:0x%x on core %u, error code 0x%x, eflags 0x%x\n",
s->int_no, s->cs, s->eip, CORE_ID, s->error, s->eflags);
#elif defined(CONFIG_X86_64)
kprintf("%s Exception (%d) at cs:rip = %#x:%#lx, core = %u, task = %u, error = %#lx\n"
"Register state: rflags = %#lx, rax = %#lx, rbx = %#lx, rcx = %#lx, rdx = %#lx, rdi = %#lx, rsi = %#lx, rbp = %#llx, rsp = %#lx\n",
exception_messages[s->int_no], s->int_no, s->cs, s->rip, CORE_ID, task->id, s->error,
s->rflags, s->rax, s->rbx, s->rcx, s->rdx, s->rdi, s->rsi, s->rbp, s->rsp);
kprintf(" Exception (%d) at 0x%llx:0x%llx on core %u, error code 0x%llx, rflags 0x%llx\n",
s->int_no, s->cs, s->rip, CORE_ID, s->error, s->rflags);
#endif
/* Now, we signalize that we have handled the interrupt */

View file

@ -37,7 +37,7 @@ void kb_init(size_t size, tid_t tid) {
}
void kb_finish(void) {
kfree(kb_buffer.buffer);
kfree(kb_buffer.buffer, (kb_buffer.maxsize * sizeof(char)));
kb_buffer.buffer = NULL;
kb_buffer.size = 0;
kb_buffer.maxsize = 0;

View file

@ -1,206 +0,0 @@
/*
* Copyright 2013 Steffen Vogel, Chair for Operating Systems,
* RWTH Aachen University
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* This file is part of MetalSVM.
*/
/**
* @author Steffen Vogel
* @file arch/x86/kernel/pmc.c
* @brief Simple interface to IA32 Performance Monitor Counters
*
* This implementation is in parts specific for Intel Core 2 Duo Processors!
* General purpose PMCS => pmc_gp_*()
* Fixed function PMCs => pmc_ff_*()
*/
#include <errno.h>
#include <asm/pmc.h>
#include <asm/processor.h>
static struct pmc_caps caps = { 0 };
struct pmc_caps* pmc_init()
{
if (!caps.version) {
uint32_t a, b, c, d;
cpuid(0x0A, &a, &b, &c, &d);
caps.version = (a >> 0) & 0xff;
caps.gp_count = (a >> 8) & 0xff;
caps.gp_width = (a >> 16) & 0xff;
caps.ff_count = (d >> 0) & 0x1f;
caps.ff_width = (d >> 5) & 0xff;
caps.arch_events = (b >> 0) & 0x3f;
// check if IA32_PERF_CAPABILITIES MSR is available
if (caps.version >= 2) {
cpuid(0x01, &a, &b, &c, &d);
if (c & (1 << 15 /* PDCM */))
caps.msr = rdmsr(IA32_PERF_CAPABILITIES);
}
}
return &caps;
}
int pmc_gp_config(uint8_t i, uint16_t event, uint32_t flags, uint8_t umask, uint8_t cmask)
{
if (BUILTIN_EXPECT(i > caps.gp_count, 0))
return -EINVAL;
uint64_t evtsel = flags | event;
evtsel |= (cmask << PMC_EVTSEL_CMASK) | (umask << PMC_EVTSEL_UMASK);
wrmsr(IA32_PERFEVTSEL(i), evtsel);
wrmsr(IA32_PMC(i), 0); // reset counter
return 0;
}
int pmc_ff_config(uint8_t i, uint8_t flags)
{
if (BUILTIN_EXPECT(i > caps.ff_count, 0))
return -EINVAL;
uint64_t ctrl = rdmsr(MSR_PERF_FIXED_CTR_CTRL);
ctrl &= ~(0x0f << i*4); // clear flags
ctrl |= (flags & 0xf) << i*4;
wrmsr(MSR_PERF_FIXED_CTR_CTRL, ctrl);
return 0;
}
inline int pmc_gp_start(uint8_t i)
{
if (BUILTIN_EXPECT(i > caps.gp_count, 0))
return -EINVAL;
wrmsr(IA32_PERFEVTSEL(i), rdmsr(IA32_PERFEVTSEL(i)) | PMC_EVTSEL_EN);
return 0;
}
inline int pmc_gp_stop(uint8_t i)
{
if (BUILTIN_EXPECT(i > caps.gp_count, 0))
return -EINVAL;
wrmsr(IA32_PERFEVTSEL(i), rdmsr(IA32_PERFEVTSEL(i)) & ~PMC_EVTSEL_EN);
return 0;
}
inline int pmc_ff_start(uint8_t i)
{
if (BUILTIN_EXPECT(i > caps.ff_count, 0))
return -EINVAL;
// TODO
return 0;
}
inline int pmc_ff_stop(uint8_t i)
{
if (BUILTIN_EXPECT(i > caps.ff_count, 0))
return -EINVAL;
// TODO
return 0;
}
inline int pmc_start_all()
{
if (BUILTIN_EXPECT(caps.version < 2, 0))
return -EINVAL;
wrmsr(MSR_PERF_GLOBAL_CTRL, -1L);
return 0;
}
inline int pmc_stop_all()
{
if (BUILTIN_EXPECT(caps.version < 2, 0))
return -EINVAL;
wrmsr(MSR_PERF_GLOBAL_CTRL, 0);
return 0;
}
inline int pmc_reset_all()
{
if (BUILTIN_EXPECT(caps.version < 2, 0))
return -EINVAL;
int i;
for (i = 0; i < caps.gp_count; i++)
pmc_gp_write(i, 0);
for (i = 0; i < caps.ff_count; i++)
pmc_ff_write(i, 0);
return 0;
}
inline uint64_t pmc_gp_read(uint8_t i)
{
if (BUILTIN_EXPECT(i > caps.gp_count, 0))
return 0;
return rdmsr(IA32_PMC(i));
return 0;
}
inline uint64_t pmc_ff_read(uint8_t i)
{
if (BUILTIN_EXPECT(i > caps.ff_count, 0))
return 0;
return rdmsr(MSR_PERF_FIXED_CTR(i));
return 0;
}
inline int pmc_gp_write(uint8_t i, uint64_t val)
{
if (BUILTIN_EXPECT(i > caps.gp_count, 0))
return -EINVAL;
if (caps.version >= 2 && caps.msr & (1 << 13 /* FW_WRITE */))
wrmsr(IA32_A_PMC(i), val);
else
wrmsr(IA32_PMC(i), val);
return 0;
}
inline int pmc_ff_write(uint8_t i, uint64_t val)
{
if (BUILTIN_EXPECT(i > caps.ff_count, 0))
return -EINVAL;
wrmsr(MSR_PERF_FIXED_CTR(i), val);
return 0;
}

View file

@ -85,49 +85,27 @@ static void fpu_init_fxsr(union fpu_state* fpu)
fx->mxcsr = 0x1f80;
}
cpu_info_t cpu_info = { 0, 0, 0, 0 };
cpu_info_t cpu_info = { 0, 0 };
static uint32_t cpu_freq = 0;
int cpu_detection(void)
{
uint32_t a, b, c, d;
uint32_t a, b;
size_t cr4;
uint8_t first_time = 0;
if (!cpu_info.feature1) {
first_time = 1;
cpuid(1, &a, &b, &cpu_info.feature2, &cpu_info.feature1);
cpuid(0x80000001, &a, &b, &c, &cpu_info.feature3);
cpuid(0x80000008, &cpu_info.addr_width, &b, &c, &d);
}
if (first_time) {
kprintf("Paging features: %s%s%s%s%s%s%s%s\n",
(cpu_info.feature1 & CPU_FEATUE_PSE) ? "PSE (2/4Mb) " : "",
(cpu_info.feature1 & CPU_FEATURE_PAE) ? "PAE " : "",
(cpu_info.feature1 & CPU_FEATURE_PGE) ? "PGE " : "",
(cpu_info.feature1 & CPU_FEATURE_PAT) ? "PAT " : "",
(cpu_info.feature1 & CPU_FEATURE_PSE36) ? "PSE36 " : "",
(cpu_info.feature3 & CPU_FEATURE_NX) ? "NX " : "",
(cpu_info.feature3 & CPU_FEATURE_1GBHP) ? "PSE (1Gb) " : "",
(cpu_info.feature3 & CPU_FEATURE_LM) ? "LM" : "");
kprintf("Physical adress-width: %u bits\n", cpu_info.addr_width & 0xff);
kprintf("Linear adress-width: %u bits\n", (cpu_info.addr_width >> 8) & 0xff);
}
cr4 = read_cr4();
if (has_fxsr())
cr4 |= CR4_OSFXSR;
cr4 |= 0x200; // set the OSFXSR bit
if (has_sse())
cr4 |= CR4_OSXMMEXCPT;
if (has_pge())
cr4 |= CR4_PGE;
cr4 |= 0x400; // set the OSXMMEXCPT bit
write_cr4(cr4);
if (has_nx())
wrmsr(MSR_EFER, rdmsr(MSR_EFER) | EFER_NXE);
if (first_time && has_sse())
wmb = sfence;
@ -152,6 +130,7 @@ int cpu_detection(void)
}
if (first_time && on_hypervisor()) {
uint32_t c, d;
char vendor_id[13];
kprintf("MetalSVM is running on a hypervisor!\n");
@ -165,7 +144,7 @@ int cpu_detection(void)
kprintf("Hypervisor Vendor Id: %s\n", vendor_id);
kprintf("Maximum input value for hypervisor CPUID info: 0x%x\n", a);
}
return 0;
}

View file

@ -42,4 +42,38 @@ L3:
pop rax
ret
%if 0
; The following function is derived from JamesM's kernel development tutorials
; (http://www.jamesmolloy.co.uk/tutorial_html/)
global copy_page_physical
copy_page_physical:
push esi ; According to __cdecl, we must preserve the contents of ESI
push edi ; and EDI.
pushf ; push EFLAGS, so we can pop it and reenable interrupts
; later, if they were enabled anyway.
cli ; Disable interrupts, so we aren't interrupted.
; Load these in BEFORE we disable paging!
mov edi, [esp+12+4] ; Destination address
mov esi, [esp+12+8] ; Source address
mov edx, cr0 ; Get the control register...
and edx, 0x7fffffff ; and...
mov cr0, edx ; Disable paging.
cld
mov ecx, 0x400 ; 1024*4bytes = 4096 bytes = page size
rep movsd ; copy page
mov edx, cr0 ; Get the control register again
or edx, 0x80000000 ; and...
mov cr0, edx ; Enable paging.
popf ; Pop EFLAGS back.
pop edi ; Get the original value of EDI
pop esi ; and ESI back.
ret
%endif
SECTION .note.GNU-stack noalloc noexec nowrite progbits

View file

@ -1,72 +0,0 @@
/*
* Copyright 2011 Steffen Vogel, Chair for Operating Systems,
* RWTH Aachen University
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* This file is part of MetalSVM.
*/
#include <asm/uart.h>
#include <asm/io.h>
/**
* @author Steffen Vogel <steffen.vogel@rwth-aachen.de>
*/
static inline int is_transmit_empty()
{
return inportb(UART_PORT + 5) & 0x20;
}
static inline int received()
{
return inportb(UART_PORT + 5) & 1;
}
void uart_init()
{
outportb(UART_PORT + 1, 0x00); // Disable all interrupts
outportb(UART_PORT + 3, 0x80); // Enable DLAB (set baud rate divisor)
outportb(UART_PORT + 0, 0x0C); // Set divisor to 12 (lo byte) 9600 baud
outportb(UART_PORT + 1, 0x00); // (hi byte)
outportb(UART_PORT + 3, 0x03); // 8 bits, no parity, one stop bit (8N1)
outportb(UART_PORT + 2, 0xC7); // Enable FIFO, clear them, with 14-byte threshold
outportb(UART_PORT + 4, 0x0B); // IRQs enabled, RTS/DSR set
}
char uart_getchar()
{
while (!received());
return inportb(UART_PORT);
}
void uart_putchar(char chr)
{
while (!is_transmit_empty());
outportb(UART_PORT, chr);
}
void uart_puts(const char* str)
{
while (*str) uart_putchar(*(str++));
}
int uart_gets(char* str, size_t len)
{
size_t ret = 0;
while (ret < len)
str[ret] = uart_getchar();
return ret;
}

View file

@ -1,4 +1,4 @@
C_source := page.c svm.c
C_source := page$(BIT).c svm.c
MODULE := arch_x86_mm
include $(TOPDIR)/Makefile.inc

View file

@ -1,722 +0,0 @@
/*
* Copyright 2014 Steffen Vogel, Chair for Operating Systems,
* RWTH Aachen University
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* This file is part of MetalSVM.
*/
#include <metalsvm/stddef.h>
#include <metalsvm/stdio.h>
#include <metalsvm/stdlib.h>
#include <metalsvm/memory.h>
#include <metalsvm/vma.h>
#include <metalsvm/string.h>
#include <metalsvm/spinlock.h>
#include <metalsvm/processor.h>
#include <metalsvm/tasks.h>
#include <metalsvm/errno.h>
#include <metalsvm/page.h>
#include <asm/page_helpers.h>
#include <asm/irq.h>
#include <asm/multiboot.h>
#include <asm/apic.h>
/**
* @author Steffen Vogel <steffen.vogel@rwth-aachen.de>
*/
/*
* Virtual Memory Layout of the standard configuration
* (1 GB kernel space)
*
* 0x0000000000000000 - 0x00000000000FFFFF: reserved for IO devices (16MB)
* 0x0000000000100000 - 0x00000000008C2000: Kernel (~8MB)
* 0x00000000008c3000 - 0x0000000000973000: Init Ramdisk (~2MB)
*
* 0x0001000000000000 - 0xffff000000000000: Memory hole (48 bit VAS limitation)
*
* 0xFFFFFE8000000000 - 0xFFFFFEFFFFFFFFFF: Page map dest for copy_page_map() (512GB)
* 0xFFFFFF0000000000 - 0xFFFFFF7FFFFFFFFF: Page map source for copy_page_map() (512GB)
* 0xFFFFFF8000000000 - 0xFFFFFFFFFFFFFFFF: Self-referenced page maps of the current task (512GB)
*/
/// Boot task's page map (setup by entryXX.asm)
extern page_entry_t boot_pml4[PAGE_MAP_ENTRIES];
/// Kernel space page map lock
static spinlock_t kslock = SPINLOCK_INIT;
/// Mapping of self referenced page map (at the end of the VAS)
// TODO: find a more generic initialization
#ifdef CONFIG_X86_32
static page_entry_t* const current_map = (page_entry_t*) (1 * PAGE_MAP_PGD);
static page_entry_t* const src_map = (page_entry_t*) (2 * PAGE_MAP_PGD);
static page_entry_t* const dest_map = (page_entry_t*) (3 * PAGE_MAP_PGD);
#elif defined(CONFIG_X86_64)
static page_entry_t* const current_map = (page_entry_t*) (1 * PAGE_MAP_PML4);
static page_entry_t* const src_map = (page_entry_t*) (2 * PAGE_MAP_PML4);
static page_entry_t* const dest_map = (page_entry_t*) (3 * PAGE_MAP_PML4);
#endif
#ifdef CONFIG_X86_32
static page_entry_t boot_pgd[PAGE_MAP_ENTRIES];
#endif
page_entry_t* get_boot_page_map(void)
{
#ifdef CONFIG_X86_32
return boot_pgd;
#elif defined(CONFIG_X86_64)
return boot_pml4;
#endif
}
void page_dump(size_t mask)
{
task_t* task = per_core(current_task);
mask |= PG_PRESENT;
size_t flags = 0;
size_t start = 0;
size_t end;
void print(size_t start, size_t end, size_t flags) {
size_t size = end - start;
kprintf("%#018lx-%#018lx %#14x %c%c%c%c%c%c\n", start, end, size,
(mask & flags & PG_XD) ? '-' : 'x',
(mask & flags & PG_GLOBAL) ? 'g' : '-',
(mask & flags & PG_DIRTY) ? 'd' : '-',
(mask & flags & PG_ACCESSED) ? 'a' : '-',
(mask & flags & PG_USER) ? 'u' : '-',
(mask & flags & PG_RW) ? 'w' : '-'
);
}
void traverse(int level, page_entry_t* entry) {
page_entry_t* stop = entry + PAGE_MAP_ENTRIES;
for (; entry != stop; entry++) {
if (*entry & PG_PRESENT) {
if (level && !(*entry & PG_PSE)) // do "pre-order" traversal
// TODO: handle "inheritance" of page table flags (see get_page_flags())
traverse(level-1, get_child_entry(entry));
else {
if (!flags) {
flags = *entry & ~PAGE_MASK & mask;
start = entry_to_virt(entry, level);
}
else if (flags != (*entry & ~PAGE_MASK & mask)) {
end = entry_to_virt(entry, level);
print(start, end, flags);
flags = *entry & ~PAGE_MASK & mask;
start = end;
}
}
}
else if (flags) {
end = entry_to_virt(entry, level);
print(start, end, flags);
flags = 0;
}
}
}
// lock tables
spinlock_lock(&kslock);
spinlock_irqsave_lock(&task->page_lock);
kprintf("%-18s-%18s %14s %-6s\n", "start", "end", "size", "flags"); // header
traverse(PAGE_MAP_LEVELS-1, current_map);
if (flags) // workaround to print last mapping
print(start, 0L, flags);
// unlock tables
spinlock_irqsave_unlock(&task->page_lock);
spinlock_unlock(&kslock);
}
void page_stats(int reset)
{
task_t* task = per_core(current_task);
int i, stats[13] = { 0 };
const char* labels[] = { [0] = "present", "writable", "user accessable", "write through", "cache disabled", // IA-32 "legacy" bits
"accessed", "dirty", "huge pages", "global", "svm", "svm lazy", "svm init",
[12] = "exec disabled" // IA-32e / PAE bits
};
void traverse(int level, page_entry_t* entry) {
page_entry_t* stop = entry + PAGE_MAP_ENTRIES;
for (; entry != stop; entry++) {
if (*entry & PG_PRESENT) {
if (level && !(*entry & PG_PSE))
traverse(level-1, get_child_entry(entry));
else {
// increment stat counters
int i;
for (i=0; i<12; i++) { // IA-32 "legacy" bits
if (*entry & (1 << i))
stats[i]++;
}
#ifdef CONFIG_X86_64
for (i=0; i<1; i++) { // IA-32e / PAE bits
if (*entry & (1 << (63-i)))
stats[i+PAGE_BITS]++;
}
#endif
if (reset) { // reset accessed and dirty bits
*entry &= ~(PG_ACCESSED|PG_DIRTY);
tlb_flush_one_page(entry_to_virt(entry, level)); // see IA32 Vol3 4.8
}
}
}
}
}
// lock tables
spinlock_lock(&kslock);
spinlock_irqsave_lock(&task->page_lock);
traverse(PAGE_MAP_LEVELS-1, current_map);
// unlock tables
spinlock_irqsave_unlock(&task->page_lock);
spinlock_unlock(&kslock);
kprintf("total pages:\n");
for (i=0; i<13; i++)
kprintf(" - %s:%*lu\n", labels[i], 25-strlen(labels[i]), stats[i]);
}
int copy_page_map(task_t* new_task, int copy)
{
task_t* cur_task = per_core(current_task);
int traverse(int level, page_entry_t* src, page_entry_t* dest) {
page_entry_t* stop = src + PAGE_MAP_ENTRIES;
for (; src != stop; src++, dest++) {
if (*src & PG_PRESENT) {
if (*src & PG_USER) { // deep copy page frame
size_t phyaddr = get_page();
if (BUILTIN_EXPECT(!phyaddr, 0))
return -ENOMEM;
atomic_int32_inc(&cur_task->user_usage);
copy_page(phyaddr, *src & PAGE_MASK);
*dest = phyaddr | (*src & ~PAGE_MASK);
// do "pre-order" traversal
if (level && !(*src & PG_PSE)) {
int ret = traverse(level-1, get_child_entry(src),
get_child_entry(dest));
if (ret < 0)
return ret;
}
}
else // shallow copy kernel table
*dest = *src;
}
else // table does not exists
*dest = 0;
}
return 0;
}
page_entry_t* src_virt = (copy) ? cur_task->page_map : get_boot_page_map();
page_entry_t* dest_virt = (page_entry_t*) palloc(PAGE_SIZE, MAP_KERNEL_SPACE);
if (BUILTIN_EXPECT(!dest_virt, 0))
return -ENOMEM;
size_t src_phys = virt_to_phys((size_t) src_virt);
size_t dest_phys = virt_to_phys((size_t) dest_virt);
// lock tables
spinlock_lock(&kslock);
spinlock_irqsave_lock(&cur_task->page_lock);
kprintf("copy_page_map: copy = %u, src = %p (%p, %p), dest = %p (%p, %p)\n",
copy, src_virt, src_phys, src_map, dest_virt, dest_phys, dest_map); // TODO: remove
// temporary map src and dest tables
current_map[PAGE_MAP_ENTRIES-2] = (src_phys & PAGE_MASK) | (PG_TABLE & ~PG_RW); // source is read-only!
current_map[PAGE_MAP_ENTRIES-3] = (dest_phys & PAGE_MASK) | PG_TABLE;
//tlb_flush(); // ouch :(
int ret = traverse(PAGE_MAP_LEVELS-1, src_map, dest_map);
// setup self reference for new table
dest_map[PAGE_MAP_ENTRIES-1] = dest_phys | PG_TABLE;
// unmap temporary tables
current_map[PAGE_MAP_ENTRIES-2] = 0;
current_map[PAGE_MAP_ENTRIES-3] = 0;
dest_map[PAGE_MAP_ENTRIES-2] = 0;
dest_map[PAGE_MAP_ENTRIES-3] = 0;
tlb_flush(); // ouch :(
// unlock tables
spinlock_irqsave_unlock(&cur_task->page_lock);
spinlock_unlock(&kslock);
new_task->page_map = dest_virt;
return ret;
}
int drop_page_map(void)
{
task_t* task = per_core(current_task);
void traverse(int level, page_entry_t* entry) {
page_entry_t* stop = entry + PAGE_MAP_ENTRIES;
for (; entry != stop; entry++) {
if (*entry & PG_PRESENT) {
// do "post-order" traversal
if (level && !(*entry & PG_PSE))
traverse(level-1, get_child_entry(entry));
if (*entry & PG_USER) {
kprintf("drop_page_map: entry = %p. level = %u\n", entry, level);
if (put_page(*entry & PAGE_MASK))
atomic_int32_dec(&task->user_usage);
}
}
}
}
kprintf("drop_page_map: task = %u\n", task->id); // TODO: remove
// check assertions
if (BUILTIN_EXPECT(task->page_map == get_boot_page_map(), 0))
return -EINVAL;
if (BUILTIN_EXPECT(!task || !task->page_map, 0))
return -EINVAL;
// lock tables
spinlock_irqsave_lock(&task->page_lock);
kprintf("user_usage: %u (task = %u)\n", atomic_int32_read(&task->user_usage), task->id);
traverse(PAGE_MAP_LEVELS-1, current_map);
put_page((size_t) task->page_map);
// we replace the page table
task->page_map = get_boot_page_map();
tlb_flush();
// unlock tables
spinlock_irqsave_unlock(&task->page_lock);
return 0;
}
int set_page_flags(size_t viraddr, uint32_t npages, int flags)
{
task_t* task = per_core(current_task);
page_entry_t* first[PAGE_MAP_LEVELS];
page_entry_t* last[PAGE_MAP_LEVELS];
size_t bits = page_bits(flags);
size_t start = viraddr;
size_t end = start + npages * PAGE_SIZE;
void traverse(int level, page_entry_t* entry) {
page_entry_t* stop = entry + PAGE_MAP_ENTRIES;
for (; entry != stop; entry++) {
if (entry < last[level] && entry >= first[level]) {
if ((*entry & PG_PRESENT) && !(*entry & PG_PSE)) {
if (level) {
if (flags & MAP_USER_SPACE)
*entry |= PG_USER;
#ifdef CONFIG_X86_64
if (flags & MAP_CODE)
*entry &= ~PG_XD;
#endif
// do "pre-order" traversal
traverse(level-1, get_child_entry(entry));
}
else
*entry = (*entry & PAGE_MASK) | bits;
tlb_flush_one_page(entry_to_virt(entry, level));
}
}
}
}
// check assertions
if (BUILTIN_EXPECT(!task || !task->page_map, 0))
return 0;
// calc page tree boundaries
int i;
for (i=0; i<PAGE_MAP_LEVELS; i++) {
first[i] = virt_to_entry(start, i);
last[i] = virt_to_entry(end - 1, i) + 1; // exclusive
}
// lock tables
if (start < KERNEL_SPACE)
spinlock_lock(&kslock);
if (end >= KERNEL_SPACE)
spinlock_irqsave_lock(&task->page_lock);
traverse(PAGE_MAP_LEVELS-1, current_map);
// unlock tables
if (start < KERNEL_SPACE)
spinlock_unlock(&kslock);
if (end >= KERNEL_SPACE)
spinlock_irqsave_unlock(&task->page_lock);
return 0;
}
size_t map_region(size_t viraddr, size_t phyaddr, uint32_t npages, uint32_t flags)
{
task_t* task = per_core(current_task);
page_entry_t* first[PAGE_MAP_LEVELS];
page_entry_t* last[PAGE_MAP_LEVELS];
// TODO: this behaviour should be deprecated
if (!viraddr) {
int vma_flags = VMA_HEAP;
if (flags & MAP_USER_SPACE)
vma_flags |= VMA_USER;
viraddr = vma_alloc(npages * PAGE_SIZE, vma_flags);
}
size_t bits = page_bits(flags);
size_t start = viraddr;
size_t end = start + npages * PAGE_SIZE;
int traverse(int level, page_entry_t* entry) {
page_entry_t* stop = entry + PAGE_MAP_ENTRIES;
for (; entry != stop; entry++) {
if (entry < last[level] && entry >= first[level]) {
if (level) { // PGD, PDPT, PML4..
if (*entry & PG_PRESENT) {
if ((flags & MAP_USER_SPACE) && !(*entry & PG_USER)) {
/* We are altering entries which cover
* the kernel. So before changing them we need to
* make a private copy for the task */
size_t phyaddr = get_page();
if (BUILTIN_EXPECT(!phyaddr, 0))
return -ENOMEM;
atomic_int32_inc(&task->user_usage);
copy_page(phyaddr, *entry & PAGE_MASK);
*entry = phyaddr | (*entry & ~PAGE_MASK);
*entry &= ~PG_GLOBAL;
*entry |= PG_USER;
/* We just need to flush the table itself.
* TLB entries for the kernel remain valid
* because we've not changed them. */
tlb_flush_one_page(entry_to_virt(entry, 0));
}
}
else {
/* Theres no page map table available
* which covers the region. Therefore we will create a
* new table. */
size_t phyaddr = get_page();
if (BUILTIN_EXPECT(!phyaddr, 0))
return -ENOMEM;
if (flags & MAP_USER_SPACE)
atomic_int32_inc(&task->user_usage);
*entry = phyaddr | bits;
memset(get_child_entry(entry), 0x00, PAGE_SIZE); // fill with zeros
}
// do "pre-order" traversal if no hugepage
if (!(*entry & PG_PSE)) {
int ret = traverse(level-1, get_child_entry(entry));
if (ret < 0)
return ret;
}
}
else { // PGT
if ((*entry & PG_PRESENT) && !(flags & MAP_REMAP))
return -EINVAL;
*entry = phyaddr | bits;
if (flags & MAP_USER_SPACE)
atomic_int32_inc(&task->user_usage);
if (flags & MAP_REMAP)
tlb_flush_one_page(entry_to_virt(entry, level));
phyaddr += PAGE_SIZE;
}
}
}
return 0;
}
kprintf("map_region: map %u pages from %#lx to %#lx with flags: %#x\n", npages, viraddr, phyaddr, flags); // TODO: remove
if (BUILTIN_EXPECT(!task || !task->page_map, 0))
return 0;
// calc page tree boundaries
int i;
for (i=0; i<PAGE_MAP_LEVELS; i++) {
first[i] = virt_to_entry(start, i);
last[i] = virt_to_entry(end - 1, i) + 1; // exclusive
}
// lock tables
if (start < KERNEL_SPACE)
spinlock_lock(&kslock);
if (end >= KERNEL_SPACE)
spinlock_irqsave_lock(&task->page_lock);
int ret = traverse(PAGE_MAP_LEVELS-1, current_map);
// unlock tables
if (start < KERNEL_SPACE)
spinlock_unlock(&kslock);
if (end >= KERNEL_SPACE)
spinlock_irqsave_unlock(&task->page_lock);
return (ret) ? 0 : viraddr;
}
int unmap_region(size_t viraddr, uint32_t npages)
{
task_t* task = per_core(current_task);
page_entry_t* first[PAGE_MAP_LEVELS];
page_entry_t* last[PAGE_MAP_LEVELS];
size_t start = viraddr;
size_t end = start + npages * PAGE_SIZE;
kprintf("unmap_region: unmap %u pages from %#lx\n", npages, viraddr); // TODO: remove
/** @return number of page table entries which a present */
int traverse(int level, page_entry_t* entry) {
int used = 0;
page_entry_t* stop = entry + PAGE_MAP_ENTRIES;
for (; entry != stop; entry++) {
if (entry < last[level] && entry >= first[level]) {
if (level) { // PGD, PDPT, PML4
if ((*entry & PG_PRESENT) && !(*entry & PG_PSE)) {
// do "post-order" traversal if table is present and no hugepage
if (traverse(level-1, get_child_entry(entry)))
used++;
else { // child table is empty => delete it
*entry &= ~PG_PRESENT;
tlb_flush_one_page(entry_to_virt(entry, 0));
if (*entry & PG_USER) {
if (put_page(*entry & PAGE_MASK))
atomic_int32_dec(&task->user_usage);
}
}
}
}
else { // PGT
*entry &= ~PG_PRESENT;
tlb_flush_one_page(entry_to_virt(entry, level));
if (*entry & PG_USER)
atomic_int32_dec(&task->user_usage);
}
}
else {
if (*entry & PG_PRESENT)
used++;
}
}
return used;
}
if (BUILTIN_EXPECT(!task || !task->page_map, 0))
return 0;
// calc page tree boundaries
int i;
for (i=0; i<PAGE_MAP_LEVELS; i++) {
first[i] = virt_to_entry(start, i);
last[i] = virt_to_entry(end - 1, i) + 1; // exclusive
}
// lock tables
if (start < KERNEL_SPACE)
spinlock_lock(&kslock);
if (end >= KERNEL_SPACE)
spinlock_irqsave_lock(&task->page_lock);
traverse(PAGE_MAP_LEVELS-1, current_map);
// unlock tables
if (start < KERNEL_SPACE)
spinlock_unlock(&kslock);
if (end > KERNEL_SPACE)
spinlock_irqsave_unlock(&task->page_lock);
return 0;
}
static void pagefault_handler(struct state *s)
{
task_t* task = per_core(current_task);
size_t viraddr = read_cr2();
// on demand userspace heap mapping
if ((task->heap) && (viraddr >= task->heap->start) && (viraddr < task->heap->end)) {
viraddr &= PAGE_MASK;
size_t phyaddr = get_page();
if (BUILTIN_EXPECT(!phyaddr, 0)) {
kprintf("out of memory: task = %u\n", task->id);
goto default_handler;
}
viraddr = map_region(viraddr, phyaddr, 1, MAP_USER_SPACE);
if (BUILTIN_EXPECT(!viraddr, 0)) {
kprintf("map_region: could not map %#lx to %#lx, task = %u\n", viraddr, phyaddr, task->id);
put_page(phyaddr);
goto default_handler;
}
memset((void*) viraddr, 0x00, PAGE_SIZE); // fill with zeros
return;
}
default_handler:
kprintf("Page Fault Exception (%d) at cs:ip = %#x:%#lx, core = %u, task = %u, addr = %#lx, error = %#x [ %s %s %s %s %s ]\n",
s->int_no, s->cs,
#ifdef CONFIG_X86_32
s->eip,
#elif defined(CONFIG_X86_64)
s->rip,
#endif
CORE_ID, task->id, viraddr, s->error,
(s->error & 0x4) ? "user" : "supervisor",
(s->error & 0x10) ? "instruction" : "data",
(s->error & 0x2) ? "write" : ((s->error & 0x10) ? "fetch" : "read"),
(s->error & 0x1) ? "protection" : "not present",
(s->error & 0x8) ? "reserved bit" : "\b");
// TODO: move this to something like print_registers()
#ifdef CONFIG_X86_32
kprintf("Register state: eflags = %#lx, eax = %#lx, ebx = %#lx, ecx = %#lx, edx = %#lx, edi = %#lx, esi = %#lx, ebp = %#llx, esp = %#lx\n",
s->eflags, s->eax, s->ebx, s->ecx, s->edx, s->edi, s->esi, s->ebp, s->esp);
#elif defined(CONFIG_X86_64)
kprintf("Register state: rflags = %#lx, rax = %#lx, rbx = %#lx, rcx = %#lx, rdx = %#lx, rdi = %#lx, rsi = %#lx, rbp = %#llx, rsp = %#lx\n",
s->rflags, s->rax, s->rbx, s->rcx, s->rdx, s->rdi, s->rsi, s->rbp, s->rsp);
#endif
irq_enable();
abort();
}
int arch_paging_init(void)
{
uint32_t i, npages;
// replace default pagefault handler
irq_uninstall_handler(14);
irq_install_handler(14, pagefault_handler);
// setup recursive paging
page_entry_t* boot_map = get_boot_page_map();
boot_map[PAGE_MAP_ENTRIES-1] = (size_t) boot_map | PG_TABLE;
/*
* In longmode the kernel is already maped into the kernel space (see entry64.asm)
* this includes .data, .bss, .text, VGA, the multiboot & multiprocessing (APIC) structures
*/
#if MAX_CORES > 1
// reserve page for smp boot code
if (!map_region(SMP_SETUP_ADDR, SMP_SETUP_ADDR, 1, MAP_NO_CACHE | MAP_REMAP)) {
kputs("could not reserve page for smp boot code\n");
return -ENOMEM;
}
#endif
#ifdef CONFIG_MULTIBOOT
#if 0
// map reserved memory regions into the kernel space
if (mb_info && (mb_info->flags & MULTIBOOT_INFO_MEM_MAP)) {
multiboot_memory_map_t* mmap = (multiboot_memory_map_t*) mb_info->mmap_addr;
multiboot_memory_map_t* mmap_end = (void*) ((size_t) mb_info->mmap_addr + mb_info->mmap_length);
while (mmap < mmap_end) {
if (mmap->type != MULTIBOOT_MEMORY_AVAILABLE) {
npages = mmap->len / PAGE_SIZE;
if ((mmap->addr+mmap->len) % PAGE_SIZE)
npages++;
map_region(mmap->addr, mmap->addr, npages, MAP_NO_CACHE | MAP_REMAP);
}
mmap++;
}
}
#endif
/*
* Modules like the init ram disk are already loaded.
* Therefore, we map these modules into the kernel space.
*/
if (mb_info && (mb_info->flags & MULTIBOOT_INFO_MODS)) {
multiboot_module_t* mmodule = (multiboot_module_t*) ((size_t) mb_info->mods_addr);
npages = PAGE_FLOOR(mb_info->mods_count*sizeof(multiboot_module_t)) >> PAGE_BITS;
map_region((size_t) mmodule, (size_t) mmodule, npages, MAP_REMAP);
for(i=0; i<mb_info->mods_count; i++, mmodule++) {
// map physical address to the same virtual address
npages = PAGE_FLOOR(mmodule->mod_end - mmodule->mod_start) >> PAGE_BITS;
kprintf("Map module %s at %#x (%u pages)\n", (char*)(size_t) mmodule->cmdline, mmodule->mod_start, npages);
map_region((size_t) (mmodule->mod_start), (size_t) (mmodule->mod_start), npages, MAP_REMAP);
}
}
#endif
// we turned on paging => now, we are able to register our task
register_task();
// APIC registers into the kernel address space
map_apic();
return 0;
}

836
arch/x86/mm/page32.c Normal file
View file

@ -0,0 +1,836 @@
/*
* Copyright 2010 Stefan Lankes, Chair for Operating Systems,
* RWTH Aachen University
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* This file is part of MetalSVM.
*/
#include <metalsvm/stddef.h>
#include <metalsvm/stdio.h>
#include <metalsvm/stdlib.h>
#include <metalsvm/mmu.h>
#include <metalsvm/vma.h>
#include <metalsvm/string.h>
#include <metalsvm/page.h>
#include <metalsvm/spinlock.h>
#include <metalsvm/processor.h>
#include <metalsvm/tasks.h>
#include <metalsvm/errno.h>
#include <asm/irq.h>
#include <asm/multiboot.h>
#include <asm/apic.h>
#ifdef CONFIG_ROCKCREEK
#include <asm/RCCE_lib.h>
#include <asm/SCC_API.h>
#include <asm/svm.h>
#include <asm/icc.h>
#endif
/*
* Virtual Memory Layout of the standard configuration
* (1 GB kernel space)
*
* 0x00000000 - 0x000FFFFF: reserved for IO devices (16MB)
* 0x00100000 - 0x0DEADFFF: Kernel (size depends on the configuration) (221MB)
* 0x0DEAE000 - 0x3FFFEFFF: Kernel heap (801MB)
* 0x3FFFF000 - 0x3FFFFFFF: Page Tables are mapped in this region (4KB)
* (The last 256 entries belongs to kernel space)
*/
/*
* Note that linker symbols are not variables, they have no memory allocated for
* maintaining a value, rather their address is their value.
*/
extern const void kernel_start;
extern const void kernel_end;
// boot task's page directory and page directory lock
static page_dir_t boot_pgd = {{[0 ... PGT_ENTRIES-1] = 0}};
static page_table_t pgt_container = {{[0 ... PGT_ENTRIES-1] = 0}};
static page_table_t boot_pgt[KERNEL_SPACE/(1024*PAGE_SIZE)];
static spinlock_t kslock = SPINLOCK_INIT;
static int paging_enabled = 0;
page_dir_t* get_boot_pgd(void)
{
return &boot_pgd;
}
/*
* TODO: We create a full copy of the current task. Copy-On-Access will be the better solution.
*
* No PGD locking is needed because onls create_pgd use this function and holds already the
* PGD lock.
*/
inline static size_t copy_page_table(task_t* task, uint32_t pgd_index, page_table_t* pgt, int* counter)
{
uint32_t i;
page_table_t* new_pgt;
size_t phyaddr;
if (BUILTIN_EXPECT(!pgt, 0))
return 0;
new_pgt = kmalloc(sizeof(page_table_t));
if (!new_pgt)
return 0;
memset(new_pgt, 0x00, sizeof(page_table_t));
if (counter)
(*counter)++;
for(i=0; i<PGT_ENTRIES; i++) {
if (pgt->entries[i] & PAGE_MASK) {
if (!(pgt->entries[i] & PG_USER)) {
// Kernel page => copy only page entries
new_pgt->entries[i] = pgt->entries[i];
continue;
}
phyaddr = get_page();
if (!phyaddr)
continue;
if (counter)
(*counter)++;
copy_page_physical((void*)phyaddr, (void*) (pgt->entries[i] & PAGE_MASK));
new_pgt->entries[i] = phyaddr | (pgt->entries[i] & 0xFFF);
atomic_int32_inc(&task->user_usage);
}
}
phyaddr = virt_to_phys((size_t)new_pgt);
return phyaddr;
}
int create_pgd(task_t* task, int copy)
{
page_dir_t* pgd;
page_table_t* pgt;
page_table_t* pgt_container;
uint32_t i;
uint32_t index1, index2;
size_t viraddr, phyaddr;
int counter = 0;
task_t* curr_task = per_core(current_task);
if (BUILTIN_EXPECT(!paging_enabled, 0))
return -EINVAL;
// we already know the virtual address of the "page table container"
// (see file header)
pgt_container = (page_table_t*) ((KERNEL_SPACE - PAGE_SIZE) & PAGE_MASK);
// create new page directory for the new task
pgd = kmalloc(sizeof(page_dir_t));
if (!pgd)
return -ENOMEM;
memset(pgd, 0x00, sizeof(page_dir_t));
// create a new "page table container" for the new task
pgt = kmalloc(sizeof(page_table_t));
if (!pgt) {
kfree(pgd, sizeof(page_dir_t));
return -ENOMEM;
}
memset(pgt, 0x00, sizeof(page_table_t));
spinlock_lock(&kslock);
for(i=0; i<PGT_ENTRIES; i++) {
pgd->entries[i] = boot_pgd.entries[i];
// only kernel entries will be copied
if (pgd->entries[i] && !(pgd->entries[i] & PG_USER))
pgt->entries[i] = pgt_container->entries[i];
}
spinlock_unlock(&kslock);
// map page table container at the end of the kernel space
viraddr = (KERNEL_SPACE - PAGE_SIZE) & PAGE_MASK;
index1 = viraddr >> 22;
index2 = (viraddr >> 12) & 0x3FF;
// now, we create a self reference
pgd->entries[index1] = ((size_t) virt_to_phys((size_t) pgt) & PAGE_MASK)|KERN_TABLE;
pgt->entries[index2] = ((size_t) virt_to_phys((size_t) pgt) & PAGE_MASK)|KERN_PAGE;
task->pgd = pgd;
if (copy) {
spinlock_irqsave_lock(&curr_task->pgd_lock);
for (i=KERNEL_SPACE/(1024*PAGE_SIZE); i<1024; i++) {
if (!(curr_task->pgd->entries[i]))
continue;
if (!(curr_task->pgd->entries[i] & PG_USER))
continue;
phyaddr = copy_page_table(task, i, (page_table_t*) ((KERNEL_SPACE - 1024*PAGE_SIZE + i*PAGE_SIZE) & PAGE_MASK), &counter);
if (phyaddr) {
pgd->entries[i] = (phyaddr & PAGE_MASK) | (curr_task->pgd->entries[i] & 0xFFF);
pgt->entries[i] = (phyaddr & PAGE_MASK) | KERN_PAGE;
}
}
spinlock_irqsave_unlock(&curr_task->pgd_lock);
}
return counter;
}
/*
* drops all page frames and the PGD of a user task
*/
int drop_pgd(void)
{
page_dir_t* pgd = per_core(current_task)->pgd;
size_t phy_pgd = virt_to_phys((size_t) pgd);
task_t* task = per_core(current_task);
uint32_t i;
if (BUILTIN_EXPECT(pgd == &boot_pgd, 0))
return -EINVAL;
spinlock_irqsave_lock(&task->pgd_lock);
for(i=0; i<PGT_ENTRIES; i++) {
if (pgd->entries[i] & PG_USER) {
put_page(pgd->entries[i] & PAGE_MASK);
pgd->entries[i] = 0;
}
}
// freeing the page directory
put_page(phy_pgd);
task->pgd = NULL;
spinlock_irqsave_unlock(&task->pgd_lock);
return 0;
}
size_t virt_to_phys(size_t viraddr)
{
task_t* task = per_core(current_task);
uint32_t index1, index2;
page_table_t* pgt;
size_t ret = 0;
if (!paging_enabled)
return viraddr;
if (BUILTIN_EXPECT(!task || !task->pgd, 0))
return 0;
spinlock_irqsave_lock(&task->pgd_lock);
index1 = viraddr >> 22;
index2 = (viraddr >> 12) & 0x3FF;
if (!(task->pgd->entries[index1] & PAGE_MASK))
goto out;
pgt = (page_table_t*) ((KERNEL_SPACE - 1024*PAGE_SIZE + index1*PAGE_SIZE) & PAGE_MASK);
if (!pgt || !(pgt->entries[index2]))
goto out;
ret = pgt->entries[index2] & PAGE_MASK; // determine page frame
ret = ret | (viraddr & 0xFFF); // add page offset
out:
//kprintf("vir %p to phy %p\n", viraddr, ret);
spinlock_irqsave_unlock(&task->pgd_lock);
return ret;
}
size_t map_region(size_t viraddr, size_t phyaddr, uint32_t npages, uint32_t flags)
{
task_t* task = per_core(current_task);
page_table_t* pgt;
size_t index, i;
size_t ret;
if (BUILTIN_EXPECT(!task || !task->pgd, 0))
return 0;
if (BUILTIN_EXPECT(!paging_enabled && (viraddr != phyaddr), 0))
return 0;
if (flags & MAP_KERNEL_SPACE)
spinlock_lock(&kslock);
else
spinlock_irqsave_lock(&task->pgd_lock);
if (!viraddr) {
viraddr = vm_alloc(npages, flags);
if (BUILTIN_EXPECT(!viraddr, 0)) {
kputs("map_adress: found no valid virtual address\n");
ret = 0;
goto out;
}
}
ret = viraddr;
//kprintf("map %d pages from %p to %p\n", npages, phyaddr, ret);
for(i=0; i<npages; i++, viraddr+=PAGE_SIZE, phyaddr+=PAGE_SIZE) {
index = viraddr >> 22;
if (!(task->pgd->entries[index])) {
page_table_t* pgt_container;
pgt = (page_table_t*) get_pages(1);
if (BUILTIN_EXPECT(!pgt, 0)) {
kputs("map_address: out of memory\n");
ret = 0;
goto out;
}
// set the new page table into the directory
if (flags & MAP_USER_SPACE)
task->pgd->entries[index] = (uint32_t)pgt|USER_TABLE;
else
task->pgd->entries[index] = (uint32_t)pgt|KERN_TABLE;
// if paging is already enabled, we need to use the virtual address
if (paging_enabled)
// we already know the virtual address of the "page table container"
// (see file header)
pgt_container = (page_table_t*) ((KERNEL_SPACE - PAGE_SIZE) & PAGE_MASK);
else
pgt_container = (page_table_t*) (task->pgd->entries[(KERNEL_SPACE - PAGE_SIZE) >> 22] & PAGE_MASK);
if (BUILTIN_EXPECT(!pgt_container, 0)) {
kputs("map_address: internal error\n");
ret = 0;
goto out;
}
// map the new table into the address space of the kernel space
pgt_container->entries[index] = ((size_t) pgt)|KERN_PAGE;
// clear the page table
if (paging_enabled)
memset((void*) ((KERNEL_SPACE - 1024*PAGE_SIZE + index*PAGE_SIZE) & PAGE_MASK), 0x00, PAGE_SIZE);
else
memset(pgt, 0x00, PAGE_SIZE);
} else pgt = (page_table_t*) (task->pgd->entries[index] & PAGE_MASK);
/* convert physical address to virtual */
if (paging_enabled)
pgt = (page_table_t*) ((KERNEL_SPACE - 1024*PAGE_SIZE + index*PAGE_SIZE) & PAGE_MASK);
index = (viraddr >> 12) & 0x3FF;
if (pgt->entries[index] && !(flags & MAP_REMAP)) {
kprintf("0x%x is already mapped\n", viraddr);
ret = 0;
goto out;
}
if (flags & MAP_USER_SPACE)
pgt->entries[index] = USER_PAGE|(phyaddr & PAGE_MASK);
else
pgt->entries[index] = KERN_PAGE|(phyaddr & PAGE_MASK);
if (flags & MAP_NO_CACHE)
pgt->entries[index] |= PG_PCD;
#ifdef CONFIG_ROCKCREEK
if (flags & MAP_MPE)
pgt->entries[index] |= PG_MPE;
#endif
if (flags & MAP_SVM_STRONG)
#ifndef SVM_WB
pgt->entries[index] |= PG_SVM_STRONG|PG_PWT;
#else
pgt->entries[index] |= PG_SVM;
#endif
if (flags & MAP_SVM_LAZYRELEASE)
pgt->entries[index] |= PG_SVM_LAZYRELEASE|PG_PWT;
if (flags & MAP_SVM_INIT)
pgt->entries[index] |= PG_SVM_INIT;
if (flags & MAP_NO_ACCESS)
pgt->entries[index] &= ~PG_PRESENT;
if (flags & MAP_WT)
pgt->entries[index] |= PG_PWT;
if (flags & MAP_USER_SPACE)
atomic_int32_inc(&task->user_usage);
tlb_flush_one_page(viraddr);
}
out:
if (flags & MAP_KERNEL_SPACE)
spinlock_unlock(&kslock);
else
spinlock_irqsave_unlock(&task->pgd_lock);
return ret;
}
int change_page_permissions(size_t start, size_t end, uint32_t flags)
{
uint32_t index1, index2, newflags;
size_t viraddr = start & 0xFFFFF000;
size_t phyaddr;
page_table_t* pgt;
page_dir_t* pgd;
task_t* task = per_core(current_task);
if (BUILTIN_EXPECT(!paging_enabled, 0))
return -EINVAL;
pgd = per_core(current_task)->pgd;
if (BUILTIN_EXPECT(!pgd, 0))
return -EINVAL;
spinlock_irqsave_lock(&task->pgd_lock);
while (viraddr < end)
{
index1 = viraddr >> 22;
index2 = (viraddr >> 12) & 0x3FF;
while ((viraddr < end) && (index2 < 1024)) {
pgt = (page_table_t*) (page_table_t*) ((KERNEL_SPACE - 1024*PAGE_SIZE + index1*PAGE_SIZE) & PAGE_MASK);
if (pgt && pgt->entries[index2]) {
phyaddr = pgt->entries[index2] & PAGE_MASK;
newflags = pgt->entries[index2] & 0xFFF; // get old flags
if (!(newflags & PG_SVM_INIT)) {
if ((newflags & PG_SVM_STRONG) && !(newflags & PG_PRESENT) && (flags & (VMA_READ|VMA_WRITE) && !(flags & VMA_NOACCESS)))
newflags |= PG_PRESENT;
else if ((newflags & PG_SVM_STRONG) && (newflags & PG_PRESENT) && (flags & VMA_NOACCESS))
newflags &= ~PG_PRESENT;
}
// update flags
if (!(flags & VMA_WRITE)) {
newflags &= ~PG_RW;
#ifdef CONFIG_ROCKCREEK
if (newflags & (PG_SVM_STRONG|PG_SVM_LAZYRELEASE))
newflags &= ~PG_MPE;
#endif
} else {
newflags |= PG_RW;
#ifdef CONFIG_ROCKCREEK
if (newflags & (PG_SVM_STRONG|PG_SVM_LAZYRELEASE))
newflags |= PG_MPE;
#endif
}
pgt->entries[index2] = (newflags & 0xFFF) | (phyaddr & PAGE_MASK);
tlb_flush_one_page(viraddr);
}
index2++;
viraddr += PAGE_SIZE;
}
}
spinlock_irqsave_unlock(&task->pgd_lock);
return 0;
}
/*
* Use the first fit algorithm to find a valid address range
*
* TODO: O(n) => bad performance, we need a better approach
*/
size_t vm_alloc(uint32_t npages, uint32_t flags)
{
task_t* task = per_core(current_task);
uint32_t index1, index2, j;
size_t viraddr, i, ret = 0;
size_t start, end;
page_table_t* pgt;
if (BUILTIN_EXPECT(!task || !task->pgd || !paging_enabled, 0))
return 0;
if (flags & MAP_KERNEL_SPACE) {
start = (((size_t) &kernel_end) + PAGE_SIZE) & PAGE_MASK;
end = (KERNEL_SPACE - 2*PAGE_SIZE) & PAGE_MASK; // we need 1 PAGE for our PGTs
} else {
start = KERNEL_SPACE & PAGE_MASK;
end = PAGE_MASK;
}
if (BUILTIN_EXPECT(!npages, 0))
return 0;
if (flags & MAP_KERNEL_SPACE)
spinlock_lock(&kslock);
else
spinlock_irqsave_lock(&task->pgd_lock);
viraddr = i = start;
j = 0;
do {
index1 = i >> 22;
index2 = (i >> 12) & 0x3FF;
pgt = (page_table_t*) ((KERNEL_SPACE - 1024*PAGE_SIZE + index1*PAGE_SIZE) & PAGE_MASK);
if (!pgt || !(pgt->entries[index2])) {
i+=PAGE_SIZE;
j++;
} else {
// restart search
j = 0;
viraddr = i + PAGE_SIZE;
i = i + PAGE_SIZE;
}
} while((j < npages) && (i<=end));
if ((j >= npages) && (viraddr < end))
ret = viraddr;
if (flags & MAP_KERNEL_SPACE)
spinlock_unlock(&kslock);
else
spinlock_irqsave_unlock(&task->pgd_lock);
return ret;
}
int unmap_region(size_t viraddr, uint32_t npages)
{
task_t* task = per_core(current_task);
uint32_t i;
uint32_t index1, index2;
page_table_t* pgt;
if (BUILTIN_EXPECT(!task || !task->pgd || !paging_enabled, 0))
return -EINVAL;
if (viraddr <= KERNEL_SPACE)
spinlock_lock(&kslock);
else
spinlock_irqsave_lock(&task->pgd_lock);
for(i=0; i<npages; i++, viraddr+=PAGE_SIZE)
{
index1 = viraddr >> 22;
index2 = (viraddr >> 12) & 0x3FF;
pgt = (page_table_t*) ((KERNEL_SPACE - 1024*PAGE_SIZE + index1*PAGE_SIZE) & PAGE_MASK);
if (!pgt)
continue;
pgt->entries[index2] &= ~PG_PRESENT;
if (viraddr > KERNEL_SPACE)
atomic_int32_dec(&task->user_usage);
tlb_flush_one_page(viraddr);
}
if (viraddr <= KERNEL_SPACE)
spinlock_unlock(&kslock);
else
spinlock_irqsave_unlock(&task->pgd_lock);
return 0;
}
int vm_free(size_t viraddr, uint32_t npages)
{
task_t* task = per_core(current_task);
uint32_t i;
uint32_t index1, index2;
page_table_t* pgt;
if (BUILTIN_EXPECT(!task || !task->pgd || !paging_enabled, 0))
return -EINVAL;
if (viraddr <= KERNEL_SPACE)
spinlock_lock(&kslock);
else
spinlock_irqsave_lock(&task->pgd_lock);
for(i=0; i<npages; i++, viraddr+=PAGE_SIZE)
{
index1 = viraddr >> 22;
index2 = (viraddr >> 12) & 0x3FF;
pgt = (page_table_t*) ((KERNEL_SPACE - 1024*PAGE_SIZE + index1*PAGE_SIZE) & PAGE_MASK);
if (!pgt)
continue;
pgt->entries[index2] = 0;
tlb_flush_one_page(viraddr);
}
if (viraddr <= KERNEL_SPACE)
spinlock_unlock(&kslock);
else
spinlock_irqsave_unlock(&task->pgd_lock);
return 0;
}
int print_paging_tree(size_t viraddr)
{
task_t* task = per_core(current_task);
uint32_t index1, index2;
page_dir_t* pgd = NULL;
page_table_t* pgt = NULL;
if (BUILTIN_EXPECT(!viraddr, 0))
return -EINVAL;
index1 = viraddr >> 22;
index2 = (viraddr >> 12) & 0x3FF;
spinlock_irqsave_lock(&task->pgd_lock);
kprintf("Paging dump of address 0x%x\n", viraddr);
pgd = task->pgd;
kprintf("\tPage directory entry %u: ", index1);
if (pgd) {
kprintf("0x%0x\n", pgd->entries[index1]);
pgt = (page_table_t*) (pgd->entries[index1] & PAGE_MASK);
} else
kputs("invalid page directory\n");
/* convert physical address to virtual */
if (paging_enabled && pgt)
pgt = (page_table_t*) (KERNEL_SPACE - 1024*PAGE_SIZE + index1*PAGE_SIZE);
kprintf("\tPage table entry %u: ", index2);
if (pgt)
kprintf("0x%x\n", pgt->entries[index2]);
else
kputs("invalid page table\n");
spinlock_irqsave_unlock(&task->pgd_lock);
return 0;
}
static void pagefault_handler(struct state *s)
{
task_t* task = per_core(current_task);
page_dir_t* pgd = task->pgd;
page_table_t* pgt = NULL;
size_t viraddr = read_cr2();
size_t phyaddr;
#ifdef CONFIG_ROCKCREEK
uint32_t index1, index2;
#endif
if ((viraddr >= task->start_heap) && (viraddr <= task->end_heap) && (viraddr > KERNEL_SPACE)) {
viraddr = viraddr & PAGE_MASK;
phyaddr = get_page();
if (BUILTIN_EXPECT(!phyaddr, 0))
goto default_handler;
if (map_region(viraddr, phyaddr, 1, MAP_USER_SPACE) == viraddr) {
memset((void*) viraddr, 0x00, PAGE_SIZE);
return;
}
kprintf("Could not map 0x%x at 0x%x\n", phyaddr, viraddr);
put_page(phyaddr);
}
#ifdef CONFIG_ROCKCREEK
// does our SVM system need to handle this page fault?
index1 = viraddr >> 22;
index2 = (viraddr >> 12) & 0x3FF;
if (!pgd || !(pgd->entries[index1] & PAGE_MASK))
goto default_handler;
pgt = (page_table_t*) ((KERNEL_SPACE - 1024*PAGE_SIZE + index1*PAGE_SIZE) & PAGE_MASK);
if (!pgt || !(pgt->entries[index2]))
goto default_handler;
if (pgt->entries[index2] & PG_SVM_INIT) {
if (BUILTIN_EXPECT(!svm_alloc_page(viraddr, pgt), 1))
return;
else
goto default_handler;
}
if (pgt->entries[index2] & PG_SVM_STRONG)
if (BUILTIN_EXPECT(!svm_access_request(viraddr), 1))
return;
#endif
default_handler:
kprintf("PAGE FAULT: Task %u got page fault at %p (irq %d, cs:eip 0x%x:0x%x)\n", task->id, viraddr, s->int_no, s->cs, s->eip);
kprintf("Register state: eax = 0x%x, ebx = 0x%x, ecx = 0x%x, edx = 0x%x, edi = 0x%x, esi = 0x%x, ebp = 0x%x, esp = 0x%xi, ds = 0x%x, es = 0x%x\n",
s->eax, s->ebx, s->ecx, s->edx, s->edi, s->esi, s->ebp, s->esp, s->ds, s->es);
irq_enable();
abort();
}
int arch_paging_init(void)
{
uint32_t i, npages, index1, index2;
page_table_t* pgt;
size_t viraddr;
// uninstall default handler and install our own
irq_uninstall_handler(14);
irq_install_handler(14, pagefault_handler);
// Create a page table to reference to the other page tables
pgt = &pgt_container;
// map this table at the end of the kernel space
viraddr = KERNEL_SPACE - PAGE_SIZE;
index1 = viraddr >> 22;
index2 = (viraddr >> 12) & 0x3FF;
// now, we create a self reference
per_core(current_task)->pgd->entries[index1] = (((size_t) pgt) & PAGE_MASK)|KERN_TABLE;
pgt->entries[index2] = ((size_t) pgt & 0xFFFFF000)|KERN_PAGE;
// create the other PGTs for the kernel space
for(i=0; i<KERNEL_SPACE/(1024*PAGE_SIZE)-1; i++) {
size_t phyaddr = boot_pgt+i;
memset((void*) phyaddr, 0x00, sizeof(page_table_t));
per_core(current_task)->pgd->entries[i] = (phyaddr & PAGE_MASK)|KERN_TABLE;
pgt->entries[i] = (phyaddr & PAGE_MASK)|KERN_PAGE;
}
/*
* Set the page table and page directory entries for the kernel. We map the kernel's physical address
* to the same virtual address.
*/
npages = ((size_t) &kernel_end - (size_t) &kernel_start) >> PAGE_SHIFT;
if ((size_t)&kernel_end & (PAGE_SIZE-1))
npages++;
map_region((size_t)&kernel_start, (size_t)&kernel_start, npages, MAP_KERNEL_SPACE);
#if MAX_CORES > 1
// Reserve page for smp boot code
if (!map_region(SMP_SETUP_ADDR, SMP_SETUP_ADDR, 1, MAP_KERNEL_SPACE|MAP_NO_CACHE)) {
kputs("could not reserve page for smp boot code\n");
return -ENOMEM;
}
#endif
#ifdef CONFIG_VGA
// map the video memory into the kernel space
map_region(VIDEO_MEM_ADDR, VIDEO_MEM_ADDR, 1, MAP_KERNEL_SPACE|MAP_NO_CACHE);
#endif
#ifdef CONFIG_MULTIBOOT
/*
* of course, mb_info has to map into the kernel space
*/
if (mb_info)
map_region((size_t) mb_info & PAGE_MASK, (size_t) mb_info & PAGE_MASK, 1, MAP_KERNEL_SPACE);
#if 0
/*
* Map reserved memory regions into the kernel space
*/
if (mb_info && (mb_info->flags & MULTIBOOT_INFO_MEM_MAP)) {
multiboot_memory_map_t* mmap = (multiboot_memory_map_t*) mb_info->mmap_addr;
multiboot_memory_map_t* mmap_end = (void*) ((size_t) mb_info->mmap_addr + mb_info->mmap_length);
while (mmap < mmap_end) {
if (mmap->type != MULTIBOOT_MEMORY_AVAILABLE) {
npages = mmap->len / PAGE_SIZE;
if ((mmap->addr+mmap->len) % PAGE_SIZE)
npages++;
map_region(mmap->addr, mmap->addr, npages, MAP_KERNEL_SPACE|MAP_NO_CACHE);
}
mmap++;
}
}
#endif
/*
* Modules like the init ram disk are already loaded.
* Therefore, we map these moduels into the kernel space.
*/
if (mb_info && (mb_info->flags & MULTIBOOT_INFO_MODS)) {
multiboot_module_t* mmodule = (multiboot_module_t*) ((size_t) mb_info->mods_addr);
npages = mb_info->mods_count * sizeof(multiboot_module_t) >> PAGE_SHIFT;
if (mb_info->mods_count * sizeof(multiboot_module_t) & (PAGE_SIZE-1))
npages++;
map_region((size_t) mb_info->mods_addr, (size_t) mb_info->mods_addr, npages, MAP_KERNEL_SPACE);
for(i=0; i<mb_info->mods_count; i++, mmodule++) {
// map physical address to the same virtual address
npages = (mmodule->mod_end - mmodule->mod_start) >> PAGE_SHIFT;
if (mmodule->mod_end & (PAGE_SIZE-1))
npages++;
kprintf("Map module %s at 0x%x (%u pages)\n", (char*) mmodule->cmdline, mmodule->mod_start, npages);
map_region((size_t) mmodule->mod_start, (size_t) mmodule->mod_start, npages, MAP_KERNEL_SPACE);
}
}
#endif
#ifdef CONFIG_ROCKCREEK
// map SCC's bootinfo
viraddr = map_region(SCC_BOOTINFO, SCC_BOOTINFO, 1, MAP_KERNEL_SPACE);
kprintf("Map SCC's bootinfos at 0x%x\n", viraddr);
// map SCC's configuration registers
viraddr = map_region(CRB_X0_Y0, CRB_X0_Y0, (CRB_OWN-CRB_X0_Y0+16*1024*1024) >> PAGE_SHIFT, MAP_KERNEL_SPACE|MAP_NO_CACHE);
kprintf("Map configuration registers at 0x%x\n", viraddr);
// map SCC's message passing buffers
viraddr = map_region(MPB_X0_Y0, MPB_X0_Y0, (MPB_OWN-MPB_X0_Y0+16*1024*1024) >> PAGE_SHIFT, MAP_KERNEL_SPACE|MAP_MPE);
kprintf("Map message passing buffers at 0x%x\n", viraddr);
// map the FPGA registers
viraddr = map_region(FPGA_BASE, FPGA_BASE, 0x10000 >> PAGE_SHIFT, MAP_KERNEL_SPACE|MAP_NO_CACHE);
kprintf("Map FPGA regsiters at 0x%x\n", viraddr);
#endif
/* enable paging */
write_cr3((uint32_t) &boot_pgd);
i = read_cr0();
i = i | (1 << 31);
write_cr0(i);
paging_enabled = 1;
#ifdef CONFIG_ROCKCREEK
// map the initial ramdisk
npages = bootinfo->size >> PAGE_SHIFT;
if (bootinfo->size & (PAGE_SIZE-1))
npages++;
viraddr = map_region(0, bootinfo->addr, npages, MAP_KERNEL_SPACE);
kprintf("Map initrd from 0x%x to 0x%x (size %u bytes)\n", bootinfo->addr, viraddr, bootinfo->size);
bootinfo->addr = viraddr;
#endif
/*
* we turned on paging
* => now, we are able to register our task
*/
register_task();
// APIC registers into the kernel address space
map_apic();
return 0;
}

650
arch/x86/mm/page64.c Normal file
View file

@ -0,0 +1,650 @@
/*
* Copyright 2012 Stefan Lankes, Chair for Operating Systems,
* RWTH Aachen University
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* This file is part of MetalSVM.
*/
#include <metalsvm/stddef.h>
#include <metalsvm/stdio.h>
#include <metalsvm/stdlib.h>
#include <metalsvm/mmu.h>
#include <metalsvm/vma.h>
#include <metalsvm/string.h>
#include <metalsvm/page.h>
#include <metalsvm/spinlock.h>
#include <metalsvm/processor.h>
#include <metalsvm/tasks.h>
#include <metalsvm/errno.h>
#include <asm/irq.h>
#include <asm/multiboot.h>
#include <asm/apic.h>
#ifdef CONFIG_ROCKCREEK
#include <asm/RCCE_lib.h>
#include <asm/SCC_API.h>
#include <asm/svm.h>
#include <asm/icc.h>
#endif
/*
* Virtual Memory Layout of the standard configuration
* (1 GB kernel space)
*
* 0x00000000 - 0x000FFFFF: reserved for IO devices (16MB)
* 0x00100000 - 0x0DEADFFF: Kernel (size depends on the configuration) (221MB)
* 0x0DEAE000 - 0x3FFFFFFF: Kernel heap
*
*/
/*
* Note that linker symbols are not variables, they have no memory allocated for
* maintaining a value, rather their address is their value.
*/
extern const void kernel_start;
extern const void kernel_end;
// boot task's page directory and page directory lock
extern page_dir_t boot_pgd;
static spinlock_t kslock = SPINLOCK_INIT;
static int paging_enabled = 0;
page_dir_t* get_boot_pgd(void)
{
return &boot_pgd;
}
int create_pgd(task_t* task, int copy)
{
// Currently, we support only kernel tasks
// => all tasks are able to use the same pgd
if (BUILTIN_EXPECT(!paging_enabled, 0))
return -EINVAL;
task->pgd = get_boot_pgd();
return 0;
}
/*
* drops all page frames and the PGD of a user task
*/
int drop_pgd(void)
{
#if 0
page_dir_t* pgd = per_core(current_task)->pgd;
size_t phy_pgd = virt_to_phys((size_t) pgd);
task_t* task = per_core(current_task);
uint32_t i;
if (BUILTIN_EXPECT(pgd == &boot_pgd, 0))
return -EINVAL;
spinlock_lock(&task->pgd_lock);
for(i=0; i<1024; i++) {
if (pgd->entries[i] & PG_USER) {
put_page(pgd->entries[i] & PAGE_MASK);
pgd->entries[i] = 0;
}
}
// freeing the page directory
put_page(phy_pgd);
task->pgd = NULL;
spinlock_unlock(&task->pgd_lock);
#endif
return 0;
}
size_t virt_to_phys(size_t viraddr)
{
task_t* task = per_core(current_task);
uint16_t idx_pd4 = (viraddr >> 39) & 0x1FF;
uint16_t idx_dirp = (viraddr >> 30) & 0x1FF;
uint16_t idx_dir = (viraddr >> 21) & 0x1FF;
uint16_t idx_table = (viraddr >> 12) & 0x1FF;
page_table_t* pgt;
size_t ret = 0;
if (!paging_enabled)
return viraddr;
if (BUILTIN_EXPECT(!task || !task->pgd, 0))
return 0;
spinlock_irqsave_lock(&task->pgd_lock);
// Currently, we allocate pages only in kernel space.
// => physical address of the page table is identical of the virtual address
pgt = (page_table_t*) (task->pgd->entries[idx_pd4] & PAGE_MASK);
if (!pgt)
goto out;
pgt = (page_table_t*) (pgt->entries[idx_dirp] & PAGE_MASK);
if (!pgt)
goto out;
pgt = (page_table_t*) (pgt->entries[idx_dir] & PAGE_MASK);
if (!pgt)
goto out;
ret = (size_t) (pgt->entries[idx_table] & PAGE_MASK);
if (!ret)
goto out;
ret = ret | (viraddr & 0xFFF); // add page offset
out:
//kprintf("vir %p to phy %p\n", viraddr, ret);
spinlock_irqsave_unlock(&task->pgd_lock);
return ret;
}
size_t map_region(size_t viraddr, size_t phyaddr, uint32_t npages, uint32_t flags)
{
task_t* task = per_core(current_task);
page_table_t* pgt;
size_t i, ret;
if (BUILTIN_EXPECT(!task || !task->pgd, 0))
return 0;
if (BUILTIN_EXPECT(!paging_enabled && (viraddr != phyaddr), 0))
return 0;
if (flags & MAP_KERNEL_SPACE)
spinlock_lock(&kslock);
else
spinlock_irqsave_lock(&task->pgd_lock);
if (!viraddr) {
viraddr = vm_alloc(npages, flags);
if (BUILTIN_EXPECT(!viraddr, 0)) {
kputs("map_region: found no valid virtual address\n");
ret = 0;
goto out;
}
}
ret = viraddr;
for(i=0; i<npages; i++, viraddr+=PAGE_SIZE, phyaddr+=PAGE_SIZE) {
uint16_t idx_pd4 = (viraddr >> 39) & 0x1FF;
uint16_t idx_dirp = (viraddr >> 30) & 0x1FF;
uint16_t idx_dir = (viraddr >> 21) & 0x1FF;
uint16_t idx_table = (viraddr >> 12) & 0x1FF;
pgt = (page_table_t*) (task->pgd->entries[idx_pd4] & PAGE_MASK);
if (!pgt) {
kputs("map_region: out of memory\n");
ret = 0;
goto out;
}
pgt = (page_table_t*) (pgt->entries[idx_dirp] & PAGE_MASK);
if (!pgt) {
kputs("map_region: out of memory\n");
ret = 0;
goto out;
}
pgt = (page_table_t*) (pgt->entries[idx_dir] & PAGE_MASK);
if (!pgt) {
kputs("map_region: out of memory\n");
ret = 0;
goto out;
}
/* convert physical address to virtual */
// Currently, we allocate pages only in kernel space.
// => physical address of the page table is identical of the virtual address
//if (paging_enabled)
// pgt = (page_table_t*) ((KERNEL_SPACE - 1024*PAGE_SIZE + index*PAGE_SIZE) & PAGE_MASK);
if (pgt->entries[idx_table] && !(flags & MAP_REMAP)) {
kprintf("0x%x is already mapped\n", viraddr);
ret = 0;
goto out;
}
if (flags & MAP_USER_SPACE)
pgt->entries[idx_table] = USER_PAGE|(phyaddr & PAGE_MASK);
else
pgt->entries[idx_table] = KERN_PAGE|(phyaddr & PAGE_MASK);
if (flags & MAP_NO_CACHE)
pgt->entries[idx_table] |= PG_PCD;
if (flags & MAP_NO_ACCESS)
pgt->entries[idx_table] &= ~PG_PRESENT;
if (flags & MAP_WT)
pgt->entries[idx_table] |= PG_PWT;
if (flags & MAP_USER_SPACE)
atomic_int32_inc(&task->user_usage);
tlb_flush_one_page(viraddr);
}
out:
if (flags & MAP_KERNEL_SPACE)
spinlock_unlock(&kslock);
else
spinlock_irqsave_unlock(&task->pgd_lock);
return ret;
}
int change_page_permissions(size_t start, size_t end, uint32_t flags)
{
#if 0
uint32_t index1, index2, newflags;
size_t viraddr = start & PAGE_MASK;
size_t phyaddr;
page_table_t* pgt;
page_dir_t* pgd;
task_t* task = per_core(current_task);
if (BUILTIN_EXPECT(!paging_enabled, 0))
return -EINVAL;
pgd = per_core(current_task)->pgd;
if (BUILTIN_EXPECT(!pgd, 0))
return -EINVAL;
spinlock_lock(&task->pgd_lock);
while (viraddr < end)
{
index1 = viraddr >> 22;
index2 = (viraddr >> 12) & 0x3FF;
while ((viraddr < end) && (index2 < 1024)) {
pgt = (page_table_t*) (page_table_t*) ((KERNEL_SPACE - 1024*PAGE_SIZE + index1*PAGE_SIZE) & PAGE_MASK);
if (pgt && pgt->entries[index2]) {
phyaddr = pgt->entries[index2] & PAGE_MASK;
newflags = pgt->entries[index2] & 0xFFF; // get old flags
if (!(newflags & PG_SVM_INIT)) {
if ((newflags & PG_SVM_STRONG) && !(newflags & PG_PRESENT) && (flags & (VMA_READ|VMA_WRITE) && !(flags & VMA_NOACCESS)))
newflags |= PG_PRESENT;
else if ((newflags & PG_SVM_STRONG) && (newflags & PG_PRESENT) && (flags & VMA_NOACCESS))
newflags &= ~PG_PRESENT;
}
// update flags
if (!(flags & VMA_WRITE)) {
newflags &= ~PG_RW;
#ifdef CONFIG_ROCKCREEK
if (newflags & (PG_SVM_STRONG|PG_SVM_LAZYRELEASE))
newflags &= ~PG_MPE;
#endif
} else {
newflags |= PG_RW;
#ifdef CONFIG_ROCKCREEK
if (newflags & (PG_SVM_STRONG|PG_SVM_LAZYRELEASE))
newflags |= PG_MPE;
#endif
}
pgt->entries[index2] = (newflags & 0xFFF) | (phyaddr & PAGE_MASK);
tlb_flush_one_page(viraddr);
}
index2++;
viraddr += PAGE_SIZE;
}
}
spinlock_unlock(&task->pgd_lock);
#endif
return -EINVAL;
}
/*
* Use the first fit algorithm to find a valid address range
*
* TODO: O(n) => bad performance, we need a better approach
*/
size_t vm_alloc(uint32_t npages, uint32_t flags)
{
task_t* task = per_core(current_task);
size_t viraddr, i, j, ret = 0;
size_t start, end;
page_table_t* pgt;
if (BUILTIN_EXPECT(!task || !task->pgd || !paging_enabled, 0))
return 0;
if (flags & MAP_KERNEL_SPACE) {
start = (((size_t) &kernel_end) + 10*PAGE_SIZE) & PAGE_MASK;
end = (KERNEL_SPACE - PAGE_SIZE) & PAGE_MASK;
} else {
start = KERNEL_SPACE & PAGE_MASK;
end = PAGE_MASK;
}
if (BUILTIN_EXPECT(!npages, 0))
return 0;
if (flags & MAP_KERNEL_SPACE)
spinlock_lock(&kslock);
else
spinlock_irqsave_lock(&task->pgd_lock);
viraddr = i = start;
j = 0;
do {
uint16_t idx_pd4 = (viraddr >> 39) & 0x1FF;
uint16_t idx_dirp = (viraddr >> 30) & 0x1FF;
uint16_t idx_dir = (viraddr >> 21) & 0x1FF;
uint16_t idx_table = (viraddr >> 12) & 0x1FF;
// Currently, we allocate pages only in kernel space.
// => physical address of the page table is identical of the virtual address
pgt = (page_table_t*) (task->pgd->entries[idx_pd4] & PAGE_MASK);
if (!pgt) {
i += (size_t)PGT_ENTRIES*PGT_ENTRIES*PGT_ENTRIES*PAGE_SIZE;
j += PGT_ENTRIES*PGT_ENTRIES*PGT_ENTRIES;
continue;
}
pgt = (page_table_t*) (pgt->entries[idx_dirp] & PAGE_MASK);
if (!pgt) {
i += PGT_ENTRIES*PGT_ENTRIES*PAGE_SIZE;
j += PGT_ENTRIES*PGT_ENTRIES;
continue;
}
pgt = (page_table_t*) (pgt->entries[idx_dir] & PAGE_MASK);
if (!pgt) {
i += PGT_ENTRIES*PAGE_SIZE;
j += PGT_ENTRIES;
continue;
}
if (!(pgt->entries[idx_table])) {
i += PAGE_SIZE;
j++;
} else {
// restart search
j = 0;
viraddr = i + PAGE_SIZE;
i = i + PAGE_SIZE;
}
} while((j < npages) && (i<=end));
if ((j >= npages) && (viraddr < end))
ret = viraddr;
if (flags & MAP_KERNEL_SPACE)
spinlock_unlock(&kslock);
else
spinlock_irqsave_unlock(&task->pgd_lock);
return ret;
}
int unmap_region(size_t viraddr, uint32_t npages)
{
task_t* task = per_core(current_task);
page_table_t* pgt;
size_t i;
uint16_t idx_pd4, idx_dirp;
uint16_t idx_dir, idx_table;
if (BUILTIN_EXPECT(!task || !task->pgd || !paging_enabled, 0))
return -EINVAL;
if (viraddr <= KERNEL_SPACE)
spinlock_lock(&kslock);
else
spinlock_irqsave_lock(&task->pgd_lock);
i = 0;
while(i<npages)
{
idx_pd4 = (viraddr >> 39) & 0x1FF;
idx_dirp = (viraddr >> 30) & 0x1FF;
idx_dir = (viraddr >> 21) & 0x1FF;
idx_table = (viraddr >> 12) & 0x1FF;
// Currently, we allocate pages only in kernel space.
// => physical address of the page table is identical of the virtual address
pgt = (page_table_t*) (task->pgd->entries[idx_pd4] & PAGE_MASK);
if (!pgt) {
viraddr += (size_t) PGT_ENTRIES*PGT_ENTRIES*PGT_ENTRIES*PAGE_SIZE;
i += PGT_ENTRIES*PGT_ENTRIES*PGT_ENTRIES;
continue;
}
pgt = (page_table_t*) (pgt->entries[idx_dirp] & PAGE_MASK);
if (!pgt) {
viraddr += PGT_ENTRIES*PGT_ENTRIES*PAGE_SIZE;
i += PGT_ENTRIES*PGT_ENTRIES;
continue;
}
pgt = (page_table_t*) (pgt->entries[idx_dir] & PAGE_MASK);
if (!pgt) {
viraddr += PGT_ENTRIES*PAGE_SIZE;
i += PGT_ENTRIES;
continue;
}
if (pgt->entries[idx_table])
pgt->entries[idx_table] &= ~PG_PRESENT;
viraddr +=PAGE_SIZE;
i++;
if (viraddr > KERNEL_SPACE)
atomic_int32_dec(&task->user_usage);
tlb_flush_one_page(viraddr);
}
if (viraddr <= KERNEL_SPACE)
spinlock_unlock(&kslock);
else
spinlock_irqsave_unlock(&task->pgd_lock);
return 0;
}
int vm_free(size_t viraddr, uint32_t npages)
{
task_t* task = per_core(current_task);
page_table_t* pgt;
size_t i;
uint16_t idx_pd4, idx_dirp;
uint16_t idx_dir, idx_table;
if (BUILTIN_EXPECT(!task || !task->pgd || !paging_enabled, 0))
return -EINVAL;
if (viraddr <= KERNEL_SPACE)
spinlock_lock(&kslock);
else
spinlock_irqsave_lock(&task->pgd_lock);
i = 0;
while(i<npages)
{
idx_pd4 = (viraddr >> 39) & 0x1FF;
idx_dirp = (viraddr >> 30) & 0x1FF;
idx_dir = (viraddr >> 21) & 0x1FF;
idx_table = (viraddr >> 12) & 0x1FF;
// Currently, we allocate pages only in kernel space.
// => physical address of the page table is identical of the virtual address
pgt = (page_table_t*) (task->pgd->entries[idx_pd4] & PAGE_MASK);
if (!pgt) {
viraddr += (size_t) PGT_ENTRIES*PGT_ENTRIES*PGT_ENTRIES*PAGE_SIZE;
i += PGT_ENTRIES*PGT_ENTRIES*PGT_ENTRIES;
continue;
}
pgt = (page_table_t*) (pgt->entries[idx_dirp] & PAGE_MASK);
if (!pgt) {
viraddr += PGT_ENTRIES*PGT_ENTRIES*PAGE_SIZE;
i += PGT_ENTRIES*PGT_ENTRIES;
continue;
}
pgt = (page_table_t*) (pgt->entries[idx_dir] & PAGE_MASK);
if (!pgt) {
viraddr += PGT_ENTRIES*PAGE_SIZE;
i += PGT_ENTRIES;
continue;
}
if (pgt->entries[idx_table])
pgt->entries[idx_table] = 0;
viraddr +=PAGE_SIZE;
i++;
tlb_flush_one_page(viraddr);
}
if (viraddr <= KERNEL_SPACE)
spinlock_unlock(&kslock);
else
spinlock_irqsave_unlock(&task->pgd_lock);
return 0;
}
static void pagefault_handler(struct state *s)
{
task_t* task = per_core(current_task);
//page_dir_t* pgd = task->pgd;
//page_table_t* pgt = NULL;
size_t viraddr = read_cr2();
//size_t phyaddr;
#if 0
if ((viraddr >= task->start_heap) && (viraddr <= task->end_heap) && (viraddr > KERNEL_SPACE)) {
viraddr = viraddr & PAGE_MASK;
phyaddr = get_page();
if (BUILTIN_EXPECT(!phyaddr, 0))
goto default_handler;
if (map_region(viraddr, phyaddr, 1, MAP_USER_SPACE) == viraddr) {
memset((void*) viraddr, 0x00, PAGE_SIZE);
return;
}
kprintf("Could not map 0x%x at 0x%x\n", phyaddr, viraddr);
put_page(phyaddr);
}
#endif
//default_handler:
kprintf("PAGE FAULT: Task %u got page fault at %p (irq %llu, cs:rip 0x%llx:0x%llx)\n", task->id, viraddr, s->int_no, s->cs, s->rip);
kprintf("Register state: rax = 0x%llx, rbx = 0x%llx, rcx = 0x%llx, rdx = 0x%llx, rdi = 0x%llx, rsi = 0x%llx, rbp = 0x%llx, rsp = 0x%llx\n",
s->rax, s->rbx, s->rcx, s->rdx, s->rdi, s->rsi, s->rbp, s->rsp);
while(1);
irq_enable();
abort();
}
int arch_paging_init(void)
{
uint32_t i, npages;
// uninstall default handler and install our own
irq_uninstall_handler(14);
irq_install_handler(14, pagefault_handler);
// kernel is already maped into the kernel space (see entry64.asm)
// this includes .data, .bss, .text, video memory and the multiboot structure
#if MAX_CORES > 1
// Reserve page for smp boot code
if (!map_region(SMP_SETUP_ADDR, SMP_SETUP_ADDR, 1, MAP_KERNEL_SPACE|MAP_NO_CACHE)) {
kputs("could not reserve page for smp boot code\n");
return -ENOMEM;
}
#endif
#ifdef CONFIG_MULTIBOOT
#if 0
/*
* Map reserved memory regions into the kernel space
*/
if (mb_info && (mb_info->flags & MULTIBOOT_INFO_MEM_MAP)) {
multiboot_memory_map_t* mmap = (multiboot_memory_map_t*) mb_info->mmap_addr;
multiboot_memory_map_t* mmap_end = (void*) ((size_t) mb_info->mmap_addr + mb_info->mmap_length);
while (mmap < mmap_end) {
if (mmap->type != MULTIBOOT_MEMORY_AVAILABLE) {
npages = mmap->len / PAGE_SIZE;
if ((mmap->addr+mmap->len) % PAGE_SIZE)
npages++;
map_region(mmap->addr, mmap->addr, npages, MAP_KERNEL_SPACE|MAP_NO_CACHE);
}
mmap++;
}
}
#endif
/*
* Modules like the init ram disk are already loaded.
* Therefore, we map these moduels into the kernel space.
*/
if (mb_info && (mb_info->flags & MULTIBOOT_INFO_MODS)) {
multiboot_module_t* mmodule = (multiboot_module_t*) ((size_t) mb_info->mods_addr);
npages = mb_info->mods_count * sizeof(multiboot_module_t) >> PAGE_SHIFT;
if (mb_info->mods_count * sizeof(multiboot_module_t) & (PAGE_SIZE-1))
npages++;
map_region((size_t) (mb_info->mods_addr), (size_t) (mb_info->mods_addr), npages, MAP_REMAP|MAP_KERNEL_SPACE);
for(i=0; i<mb_info->mods_count; i++, mmodule++) {
// map physical address to the same virtual address
npages = (mmodule->mod_end - mmodule->mod_start) >> PAGE_SHIFT;
if (mmodule->mod_end & (PAGE_SIZE-1))
npages++;
kprintf("Map module %s at 0x%x (%u pages)\n", (char*) mmodule->cmdline, mmodule->mod_start, npages);
map_region((size_t) (mmodule->mod_start), (size_t) (mmodule->mod_start), npages, MAP_REMAP|MAP_KERNEL_SPACE);
}
}
#endif
/* signalize that we are able to use paging */
paging_enabled = 1;
/*
* we turned on paging
* => now, we are able to register our task
*/
register_task();
// APIC registers into the kernel address space
map_apic();
return 0;
}

View file

@ -20,7 +20,7 @@
#include <metalsvm/stddef.h>
#include <metalsvm/stdio.h>
#include <metalsvm/stdlib.h>
#include <metalsvm/memory.h>
#include <metalsvm/mmu.h>
#include <metalsvm/tasks.h>
#include <metalsvm/page.h>
#include <metalsvm/errno.h>

View file

@ -70,7 +70,7 @@ static ssize_t socket_write(fildes_t* file, uint8_t* buffer, size_t size)
return -ENOMEM;
memcpy(tmp, buffer, size);
ret = lwip_write(file->offset, tmp, size);
kfree(tmp);
kfree(tmp, size);
#endif
if (ret < 0)
ret = -errno;
@ -147,7 +147,7 @@ int socket_init(vfs_node_t* node, const char* name)
} while(blist);
kfree(new_node);
kfree(new_node, sizeof(vfs_node_t));
return -ENOMEM;
}

View file

@ -83,8 +83,15 @@ static ssize_t stdio_read(fildes_t* file, uint8_t* buffer, size_t size)
static ssize_t stdio_write(fildes_t* file, uint8_t* buffer, size_t size)
{
int i;
for (i = 0; i<size; i++, buffer++)
for (i = 0; i<size; i++, buffer++) {
#ifdef CONFIG_VGA
vga_putchar(*buffer);
#elif defined(CONFIG_UART)
uart_putchar(*buffer);
#else
kputchar(*buffer);
#endif
}
file->offset += size;
return size;
@ -145,7 +152,7 @@ int null_init(vfs_node_t* node, const char* name)
} while(blist);
kfree(new_node);
kfree(new_node, sizeof(vfs_node_t));
return -ENOMEM;
}
@ -204,7 +211,7 @@ int stdin_init(vfs_node_t* node, const char* name)
} while(blist);
kfree(new_node);
kfree(new_node, sizeof(vfs_node_t));
return -ENOMEM;
}
@ -263,7 +270,7 @@ int stdout_init(vfs_node_t* node, const char* name)
} while(blist);
kfree(new_node);
kfree(new_node, sizeof(vfs_node_t));
return -ENOMEM;
}
@ -322,7 +329,7 @@ int stderr_init(vfs_node_t* node, const char* name)
} while(blist);
kfree(new_node);
kfree(new_node, sizeof(vfs_node_t));
return -ENOMEM;
}

24
fs/fs.c
View file

@ -132,30 +132,6 @@ int close_fs(fildes_t* file)
return ret;
}
void list_fs(vfs_node_t* node, uint32_t depth)
{
int i = 0;
dirent_t* dirent = NULL;
fildes_t* file = kmalloc(sizeof(fildes_t));
file->offset = 0;
file->flags = 0;
while ((dirent = readdir_fs(node, i)) != 0) {
kprintf("%*c|- %s\n", 2*depth, ' ', dirent->name);
if (strcmp(dirent->name, ".") && strcmp(dirent->name, "..")) {
vfs_node_t *new_node = finddir_fs(node, dirent->name);
if (new_node) {
kprintf("%*c\\\n", 2*depth, ' ');
list_fs(new_node, depth + 1);
}
}
i++;
}
kfree(file);
}
struct dirent* readdir_fs(vfs_node_t * node, uint32_t index)
{
struct dirent* ret = NULL;

View file

@ -210,7 +210,7 @@ static int initrd_open(fildes_t* file, const char* name)
if (file->node->type == FS_FILE) {
if ((file->flags & O_CREAT) && (file->flags & O_EXCL))
return -EEXIST;
/* in the case of O_TRUNC kfree all the nodes */
if (file->flags & O_TRUNC) {
uint32_t i;
@ -221,7 +221,8 @@ static int initrd_open(fildes_t* file, const char* name)
/* the first blist pointer have do remain valid. */
for(i=0; i<MAX_DATABLOCKS && !data; i++) {
if (blist->data[i]) {
kfree(blist->data[i]);
kfree(blist->data[i],
sizeof(data_block_t));
}
}
if (blist->next) {
@ -233,12 +234,12 @@ static int initrd_open(fildes_t* file, const char* name)
do {
for(i=0; i<MAX_DATABLOCKS && !data; i++) {
if (blist->data[i]) {
kfree(blist->data[i]);
kfree(blist->data[i], sizeof(data_block_t));
}
}
lastblist = blist;
blist = blist->next;
kfree(lastblist);
kfree(lastblist, sizeof(block_list_t));
} while(blist);
}
@ -252,7 +253,7 @@ static int initrd_open(fildes_t* file, const char* name)
/* opendir was called: */
if (name[0] == '\0')
return 0;
/* open file was called: */
if (!(file->flags & O_CREAT))
return -ENOENT;
@ -263,11 +264,11 @@ static int initrd_open(fildes_t* file, const char* name)
vfs_node_t* new_node = kmalloc(sizeof(vfs_node_t));
if (BUILTIN_EXPECT(!new_node, 0))
return -EINVAL;
blist = &file->node->block_list;
dir_block_t* dir_block;
dirent_t* dirent;
memset(new_node, 0x00, sizeof(vfs_node_t));
new_node->type = FS_FILE;
new_node->read = &initrd_read;
@ -285,7 +286,7 @@ static int initrd_open(fildes_t* file, const char* name)
if (!dirent->vfs_node) {
dirent->vfs_node = new_node;
strncpy(dirent->name, (char*) name, MAX_FNAME);
goto exit_create_file; // TODO: there might be a better Solution
goto exit_create_file; // there might be a better Solution ***************
}
}
}
@ -424,9 +425,9 @@ static vfs_node_t* initrd_mkdir(vfs_node_t* node, const char* name)
blist = blist->next;
} while(blist);
kfree(dir_block);
kfree(dir_block, sizeof(dir_block_t));
out:
kfree(new_node);
kfree(new_node, sizeof(vfs_node_t));
return NULL;
}

View file

@ -34,18 +34,14 @@ extern "C" {
#define PAGE_SHIFT 12
#define CACHE_LINE 64
#define MAILBOX_SIZE 32
#define TIMER_FREQ 100 // in HZ
#define CLOCK_TICK_RATE 1193182 // 8254 chip's internal oscillator frequency
#define TIMER_FREQ 100 /* in HZ */
#define CLOCK_TICK_RATE 1193182 /* 8254 chip's internal oscillator frequency */
#define INT_SYSCALL 0x80
#define KERNEL_SPACE (1*1024*1024*1024)
#define UART_PORT 0x3F8 // 0x2F8 for SCC
#define BYTE_ORDER LITTLE_ENDIAN
// some fixed addresses
#define VIDEO_MEM_ADDR 0xB8000 // the video memora address
#define SMP_SETUP_ADDR 0x07000
#define VIDEO_MEM_ADDR 0xB8000 // identity paged
#define LAPIC_ADDR 0xB9000 // fixed mapping from 0xFEE00000
#define IOAPIC_ADDR 0xBA000 // fixed mapping from 0xFEC00000
#define BYTE_ORDER LITTLE_ENDIAN
/*
* address space / (page_size * sizeof(uint8_t))
@ -56,7 +52,7 @@ extern "C" {
#define CONFIG_PCI
#define CONFIG_LWIP
#define CONFIG_VGA
#define CONFIG_UART
//#define CONFIG_UART
#define CONFIG_KEYBOARD
#define CONFIG_MULTIBOOT
//#define CONFIG_ROCKCREEK
@ -76,7 +72,7 @@ extern "C" {
//#define SHMADD
#define SHMDBG
//#define SHMADD_CACHEABLE
#define SCC_BOOTINFO 0x80000
#define SCC_BOOTINFO 0x80000
#define BUILTIN_EXPECT(exp, b) __builtin_expect((exp), (b))
//#define BUILTIN_EXPECT(exp, b) (exp)

View file

@ -242,9 +242,6 @@ int open_fs(fildes_t* file, const char* fname);
/** @brief Yet to be documented */
int close_fs(fildes_t * file);
/** @brief List a filesystem hirachically */
void list_fs(vfs_node_t* node, uint32_t depth);
/** @brief Get dir entry at index
* @param node VFS node to get dir entry from
* @param index Index position of desired dir entry

View file

@ -1,76 +0,0 @@
/*
* Copyright 2010 Steffen Vogel, Chair for Operating Systems,
* RWTH Aachen University
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* This file is part of MetalSVM.
*/
/**
* @author Steffen Vogel <steffen.vogel@rwth-aachen.de>
*/
#ifndef __MALLOC_H__
#define __MALLOC_H__
#include <metalsvm/stddef.h>
#ifdef __cplusplus
extern "C" {
#endif
/// Binary exponent of maximal size for kmalloc()
#define BUDDY_MAX 32 // 4 GB
/// Binary exponent of minimal buddy size
#define BUDDY_MIN 3 // 8 Byte >= sizeof(buddy_t)
/// Binary exponent of the size which we allocate with buddy_fill()
#define BUDDY_ALLOC 16 // 64 KByte = 16 * PAGE_SIZE
#define BUDDY_LISTS (BUDDY_MAX-BUDDY_MIN+1)
#define BUDDY_MAGIC 0xBABE
union buddy;
/** @brief Buddy
*
* Every free memory block is stored in a linked list according to its size.
* We can use this free memory to store store this buddy_t union which represents
* this block (the buddy_t union is alligned to the front).
* Therefore the address of the buddy_t union is equal with the address
* of the underlying free memory block.
*
* Every allocated memory block is prefixed with its binary size exponent and
* a known magic number. This prefix is hidden by the user because its located
* before the actual memory address returned by kmalloc()
*/
typedef union buddy {
/// Pointer to the next buddy in the linked list.
union buddy* next;
struct {
/// The binary exponent of the block size
uint8_t exponent;
/// Must be equal to BUDDY_MAGIC for a valid memory block
uint16_t magic;
} prefix;
} buddy_t;
/** @brief Dump free buddies */
void buddy_dump(void);
#ifdef __cplusplus
}
#endif
#endif

View file

@ -31,6 +31,7 @@
#include <metalsvm/stddef.h>
#include <asm/atomic.h>
//#include <asm/mmu.h>
#ifdef __cplusplus
extern "C" {
@ -49,47 +50,33 @@ extern atomic_int32_t total_available_pages;
*/
int mmu_init(void);
/** @brief Get continuous pages
/** @brief get continuous pages
*
* Use first fit algorithm to find a suitable, continous physical memory region
* This function finds a continuous page region (first fit algorithm)
*
* @param no_pages Desired number of pages
*
* @param npages Desired number of pages
* @return
* - physical address on success
* - 0 on failure
*/
size_t get_pages(size_t npages);
size_t get_pages(uint32_t no_pages);
/** @brief Get a single page
/** @brief get a single page
*
* Convenience function: uses get_pages(1);
*/
static inline size_t get_page(void) { return get_pages(1); }
/** @brief Put back a sequence of continous pages
/** @brief Put back a page after use
*
* @param phyaddr Physical address of the first page
* @param npages Number of pages
* @param phyaddr Physical address to put back
*
* @return number of pages which were marked as used before calling
*/
int put_pages(size_t phyaddr, size_t npages);
/** @brief Put a single page
*
* Convenience function: uses put_pages(1);
*/
static inline int put_page(size_t phyaddr) { return put_pages(phyaddr, 1); }
/** @brief Copy a physical page frame
*
* @param psrc physical address of source page frame
* @param pdest physical address of source page frame
* @return
* @return
* - 0 on success
* - -1 on failure
* - -EINVAL (-22) on failure
*/
int copy_page(size_t pdest, size_t psrc);
int put_page(size_t phyaddr);
#ifdef __cplusplus
}

View file

@ -29,7 +29,10 @@
#include <metalsvm/stddef.h>
#include <asm/page.h>
/** @brief Sets up the environment, page directories etc and enables paging. */
/**
* Sets up the environment, page directories etc and
* enables paging.
*/
static inline int paging_init(void) { return arch_paging_init(); }
#endif

View file

@ -28,10 +28,14 @@
extern "C" {
#endif
#define NULL ((void*) 0)
#define NULL ((void*) 0)
typedef unsigned int tid_t;
#define PAGE_SIZE (1 << PAGE_SHIFT)
#define PAGE_MASK ~(PAGE_SIZE - 1)
#define PAGE_ALIGN(addr) (((addr) + PAGE_SIZE - 1) & PAGE_MASK)
#if MAX_CORES == 1
#define per_core(name) name
#define DECLARE_PER_CORE(type, name) extern type name;
@ -62,10 +66,10 @@ typedef unsigned int tid_t;
irq_nested_enable(flags);\
return ret; \
}
#define CORE_ID smp_id()
#define CORE_ID smp_id()
#endif
// needed to find the task, which is currently running on this core
/* needed to find the task, which is currently running on this core */
struct task;
DECLARE_PER_CORE(struct task*, current_task);

View file

@ -29,66 +29,72 @@
#ifndef __STDLIB_H__
#define __STDLIB_H__
#include <metalsvm/stddef.h>
#include <metalsvm/config.h>
#include <metalsvm/tasks_types.h>
#include <asm/stddef.h>
#ifdef __cplusplus
extern "C" {
#endif
#define MAP_NO_ACCESS (1 << 0)
#define MAP_READ_ONLY (1 << 1)
#define MAP_USER_SPACE (1 << 2)
#define MAP_CODE (1 << 3)
#define MAP_WT (1 << 4)
#define MAP_NO_CACHE (1 << 5)
#define MAP_MPE (1 << 6)
#define MAP_SVM_STRONG (1 << 7)
#define MAP_SVM_LAZYRELEASE (1 << 8)
#define MAP_SVM_INIT (1 << 9)
#define MAP_KERNEL_SPACE (0 << 2) // legacy compatibility
#define MAP_REMAP (1 << 12)
//#define MAP_NON_CONTINUOUS (1 << 13) // TODO
#define MAP_KERNEL_SPACE (1 << 0)
#define MAP_USER_SPACE (1 << 1)
#define MAP_PAGE_TABLE (1 << 2)
#define MAP_NO_CACHE (1 << 3)
#define MAP_WT (1 << 5)
#define MAP_CODE (1 << 6)
#define MAP_READONLY (1 << 7)
#ifdef CONFIG_ROCKCREEK
#define MAP_MPE (1 << 8)
#endif
#define MAP_SVM_STRONG (1 << 9)
#define MAP_SVM_LAZYRELEASE (1 << 10)
#define MAP_SVM_INIT (1 << 11)
#define MAP_NO_ACCESS (1 << 12)
#define MAP_REMAP (1 << 13)
void NORETURN abort(void);
/** @brief General page allocator function
/** @brief Kernel's memory allocator function.
*
* This function allocates and maps whole pages.
* To avoid fragmentation you should use kmalloc() and kfree()!
* This will just call mem_allocation with
* the flags MAP_KERNEL_SPACE and MAP_HEAP.
*
* @return Pointer to the new memory range
*/
void* kmalloc(size_t);
/** @brief Kernel's more general memory allocator function.
*
* This function lets you choose flags for the newly allocated memory.
*
* @param sz Desired size of the new memory
* @param flags Flags to for map_region(), vma_add()
* @param flags Flags to specify
*
* @return Pointer to the new memory range
*/
void* palloc(size_t sz, uint32_t flags);
void* mem_allocation(size_t sz, uint32_t flags);
/** @brief Free general kernel memory
/** @brief Free memory
*
* The pmalloc() doesn't track how much memory was allocated for which pointer,
* The kernel malloc doesn't track how
* much memory was allocated for which pointer,
* so you have to specify how much memory shall be freed.
*
* @param sz The size which should freed
*/
void pfree(void* addr, size_t sz);
void kfree(void*, size_t);
/** @brief The memory allocator function
/** @brief Create a new stack for a new task
*
* This allocator uses a buddy system to manage free memory.
*
* @return Pointer to the new memory range
* @return start address of the new stack
*/
void* kmalloc(size_t sz);
void* create_stack(void);
/** @brief The memory free function
/** @brief Delete stack of a finished task
*
* Releases memory allocated by malloc()
*
* @param addr The address to the memory block allocated by malloc()
* @param addr Pointer to the stack
* @return 0 on success
*/
void kfree(void* addr);
int destroy_stack(task_t* addr);
/** @brief String to long
*
@ -107,7 +113,7 @@ unsigned long strtoul(const char* nptr, char** endptr, int base);
*/
static inline int atoi(const char *str)
{
return (int)strtol(str, (char **) NULL, 10);
return (int)strtol(str, (char **)NULL, 10);
}
#ifdef __cplusplus

View file

@ -147,7 +147,9 @@ tid_t wait(int32_t* result);
*/
void update_load(void);
/** @brief Print the current cpu load */
/** @brief Print the current cpu load
*
*/
void dump_load(void);
#if MAX_CORES > 1
@ -199,7 +201,9 @@ int block_current_task(void);
*/
int set_timer(uint64_t deadline);
/** @brief check is a timer is expired */
/** @brief check is a timer is expired
*
*/
void check_timers(void);
/** @brief Abort current task */

View file

@ -36,7 +36,6 @@
#include <metalsvm/mailbox_types.h>
#include <asm/tasks_types.h>
#include <asm/atomic.h>
#include <asm/page.h>
#ifdef __cplusplus
extern "C" {
@ -63,6 +62,7 @@ extern "C" {
#define TASK_L2 (1 << 3)
typedef int (*entry_point_t)(void*);
struct page_dir;
/** @brief The task_t structure */
typedef struct task {
@ -86,12 +86,12 @@ typedef struct task {
struct task* prev;
/// last core id on which the task was running
uint32_t last_core;
/// usage in number of pages (including page map tables)
/// usage in number of pages
atomic_int32_t user_usage;
/// locks access to all page maps with PG_USER flag set
spinlock_irqsave_t page_lock;
/// virtual address of page map for CR3
page_entry_t* page_map;
/// avoids concurrent access to the page directory
spinlock_irqsave_t pgd_lock;
/// pointer to the page directory
struct page_dir* pgd;
/// lock for the VMA_list
spinlock_t vma_lock;
/// list of VMAs
@ -100,12 +100,14 @@ typedef struct task {
filp_t* fildes_table;
/// starting time/tick of the task
uint64_t start_tick;
/// the userspace heap
vma_t* heap;
/// start address of the heap
size_t start_heap;
/// end address of the heap
size_t end_heap;
/// LwIP error code
int lwip_err;
/// mail inbox
mailbox_wait_msg_t inbox;
mailbox_wait_msg_t inbox;
/// mail outbox array
mailbox_wait_msg_t* outbox[MAX_TASKS];
/// FPU state

View file

@ -19,7 +19,6 @@
/**
* @author Stefan Lankes
* @author Steffen Vogel <steffen.vogel@rwth-aachen.de>
* @file include/metalsvm/vma.h
* @brief VMA related sructure and functions
*/
@ -28,125 +27,56 @@
#define __VMA_H__
#include <metalsvm/stddef.h>
#include <asm/page.h>
#ifdef __cplusplus
extern "C" {
#endif
/// Read access to this VMA is allowed
#define VMA_READ (1 << 0)
/// Write access to this VMA is allowed
#define VMA_WRITE (1 << 1)
/// Instructions fetches in this VMA are allowed
#define VMA_EXECUTE (1 << 2)
/// This VMA is cacheable
#define VMA_CACHEABLE (1 << 3)
/// This VMA is not accessable
#define VMA_NO_ACCESS (1 << 4)
/// This VMA should be part of the userspace
#define VMA_USER (1 << 5)
/// A collection of flags used for the kernel heap (kmalloc)
#define VMA_HEAP (VMA_READ|VMA_WRITE|VMA_CACHEABLE)
// boundaries for VAS allocation
#define VMA_KERN_MIN 0xC0000
#define VMA_KERN_MAX KERNEL_SPACE
#define VMA_USER_MIN KERNEL_SPACE
// last three top level entries are reserved
#ifdef CONFIG_X86_32
#define VMA_USER_MAX 0xFF400000
#elif defined (CONFIG_X86_64)
#define VMA_USER_MAX 0xFFFFFE8000000000
#endif
#define VMA_NOACCESS (1 << 4)
struct vma;
/** @brief VMA structure definition
*
* Each item in this linked list marks a used part of the virtual address space.
* Its used by vm_alloc() to find holes between them.
*/
/** @brief VMA structure definition */
typedef struct vma {
/// Start address of the memory area
size_t start;
/// End address of the memory area
size_t end;
/// Type flags field
uint32_t flags;
uint32_t type;
/// Pointer of next VMA element in the list
struct vma* next;
/// Pointer to previous VMA element in the list
struct vma* prev;
} vma_t;
/** @brief Initalize the kernelspace VMA list
/** @brief Add a new virtual memory region to the list of VMAs
*
* Reserves several system-relevant virtual memory regions:
* - SMP boot page (SMP_SETUP_ADDR)
* - VGA video memory (VIDEO_MEM_ADDR)
* - The kernel (kernel_start - kernel_end)
* - Multiboot structure (mb_info)
* - Multiboot mmap (mb_info->mmap_*)
* - Multiboot modules (mb_info->mods_*)
* - Init Ramdisk
*
* @return
* - 0 on success
* - <0 on failure
*/
int vma_init();
/** @brief Add a new virtual memory area to the list of VMAs
*
* @param start Start address of the new area
* @param end End address of the new area
* @param flags Type flags the new area shall have
* @param task Pointer to the task_t structure of the task
* @param start Start address of the new region
* @param end End address of the new region
* @param type Type flags the new region shall have
*
* @return
* - 0 on success
* - -EINVAL (-22) or -EINVAL (-12) on failure
*/
int vma_add(size_t start, size_t end, uint32_t flags);
int vma_add(struct task* task, size_t start, size_t end, uint32_t type);
/** @brief Search for a free memory area
/** @brief Dump information about this task's VMAs into the terminal.
*
* @param size Size of requestes VMA in bytes
* @param flags
* @return Type flags the new area shall have
* - 0 on failure
* - the start address of a free area
*/
size_t vma_alloc(size_t size, uint32_t flags);
/** @brief Free an allocated memory area
* This will print out Start, end and flags for each VMA in the task's list
*
* @param start Start address of the area to be freed
* @param end End address of the to be freed
* @param task The task's task_t structure
* @return
* - 0 on success
* - -EINVAL (-22) on failure
*/
int vma_free(size_t start, size_t end);
/** @brief Free all virtual memory areas
*
* @return
* - 0 on success
*/
int drop_vma_list(struct task* task);
/** @brief Copy the VMA list of the current task to task
*
* @param task The task where the list should be copied to
* @return
* - 0 on success
*/
int copy_vma_list(struct task* src, struct task* dest);
/** @brief Dump information about this task's VMAs into the terminal. */
void vma_dump();
int vma_dump(struct task* task);
#ifdef __cplusplus
}

View file

@ -63,7 +63,7 @@ extern const void bss_end;
int lowlevel_init(void)
{
// initialize .bss section
memset((char*) &bss_start, 0x00, (char*) &bss_end - (char*) &bss_start);
memset((void*)&bss_start, 0x00, ((size_t) &bss_end - (size_t) &bss_start));
koutput_init();
@ -238,6 +238,49 @@ int network_shutdown(void)
return 0;
}
#if 0
static void list_fs(vfs_node_t* node, uint32_t depth)
{
int j, i = 0;
dirent_t* dirent = NULL;
fildes_t* file = kmalloc(sizeof(fildes_t));
file->offset = 0;
file->flags = 0;
while ((dirent = readdir_fs(node, i)) != 0) {
for(j=0; j<depth; j++)
kputs(" ");
kprintf("%s\n", dirent->name);
if (strcmp(dirent->name, ".") && strcmp(dirent->name, "..")) {
vfs_node_t *new_node = finddir_fs(node, dirent->name);
if (new_node) {
if (new_node->type == FS_FILE) {
char buff[16] = {[0 ... 15] = 0x00};
file->node = new_node;
file->offset = 0;
file->flags = 0;
read_fs(file, (uint8_t*)buff, 8);
for(j=0; j<depth+1; j++)
kputs(" ");
kprintf("content: %s\n", buff);
} else list_fs(new_node, depth+1);
}
}
i++;
}
kfree(file, sizeof(fildes_t));
}
static void list_root(void) {
kprintf("List of the file system:\n/\n");
list_fs(fs_root, 1);
}
#endif
int initd(void* arg)
{
#ifdef CONFIG_LWIP
@ -274,13 +317,9 @@ int initd(void* arg)
#endif
#endif
#if 1
kputs("Filesystem:\n");
list_fs(fs_root, 1);
#endif
// list_root();
test_init();
return 0;
}

View file

@ -21,7 +21,7 @@
#include <metalsvm/stdio.h>
#include <metalsvm/string.h>
#include <metalsvm/time.h>
#include <metalsvm/memory.h>
#include <metalsvm/mmu.h>
#include <metalsvm/tasks.h>
#include <metalsvm/processor.h>
#include <metalsvm/errno.h>
@ -29,7 +29,6 @@
#include <metalsvm/fs.h>
#include <asm/irq.h>
#include <asm/irqflags.h>
#include <asm/page.h>
#include <asm/kb.h>
#ifdef CONFIG_ROCKCREEK
#include <asm/icc.h>
@ -72,9 +71,8 @@ int main(void)
pushbg(COL_BLUE);
kprintf("This is MetalSVM %s Build %u, %u\n",
METALSVM_VERSION, &__BUILD_DATE, &__BUILD_TIME);
METALSVM_VERSION, &__BUILD_DATE, &__BUILD_TIME);
popbg();
system_init();
irq_init();
timer_init();
@ -87,7 +85,7 @@ int main(void)
icc_init();
svm_init();
#endif
initrd_init();
initrd_init();
irq_enable();
@ -103,10 +101,9 @@ int main(void)
disable_timer_irq();
#endif
sleep(2);
sleep(5);
create_kernel_task(&id, initd, NULL, NORMAL_PRIO);
kprintf("Create initd with id %u\n", id);
reschedule();
while(1) {

View file

@ -105,11 +105,11 @@ static int sys_open(const char* name, int flags, int mode)
/* file doesn't exist! */
if (check < 0) {
/* tidy up the fildescriptor */
kfree(curr_task->fildes_table[fd]);
kfree(curr_task->fildes_table[fd], sizeof(fildes_t));
curr_task->fildes_table[fd] = NULL;
return check;
}
return fd;
}
@ -196,7 +196,7 @@ static int sys_socket(int domain, int type, int protocol)
/* file doesn't exist! */
if (curr_task->fildes_table[fd]->node == NULL) {
/* tidy up the fildescriptor */
kfree(curr_task->fildes_table[fd]);
kfree(curr_task->fildes_table[fd], sizeof(fildes_t));
curr_task->fildes_table[fd] = NULL;
return -ENOENT;
}
@ -236,11 +236,11 @@ static int sys_accept(int s, struct sockaddr* addr, socklen_t* addrlen)
curr_task->fildes_table[fd]->offset = sock2;
curr_task->fildes_table[fd]->count = 1;
curr_task->fildes_table[fd]->node = findnode_fs("/dev/socket");
/* file doesn't exist! */
if (curr_task->fildes_table[fd]->node == NULL) {
/* tidy up the fildescriptor */
kfree(curr_task->fildes_table[fd]);
kfree(curr_task->fildes_table[fd], sizeof(fildes_t));
curr_task->fildes_table[fd] = NULL;
return -ENOENT;
}
@ -273,7 +273,7 @@ static int sys_close(int fd)
/* close command failed -> return check = errno */
if (BUILTIN_EXPECT(check < 0, 0))
return check;
kfree(curr_task->fildes_table[fd]);
kfree(curr_task->fildes_table[fd], sizeof(fildes_t));
curr_task->fildes_table[fd] = NULL;
} else {
curr_task->fildes_table[fd]->count--;
@ -356,7 +356,7 @@ static int sys_dup(int fd)
* free the memory which was allocated in get_fildes()
* cause will link it to another existing memory space
*/
kfree(curr_task->fildes_table[new_fd]);
kfree(curr_task->fildes_table[new_fd], sizeof(fildes_t));
/* and link it to another existing memory space */
curr_task->fildes_table[new_fd] = curr_task->fildes_table[fd];
@ -381,7 +381,7 @@ static int sys_dup2(int fd, int fd2)
/* If fd and fd2 are equal, then dup2() just returns fd2 */
if (fd == fd2)
return fd2;
/*
* if descriptor fd2 is already in use, it is first deallocated
* as if a close(2) call had been done first
@ -398,32 +398,30 @@ static int sys_dup2(int fd, int fd2)
static int sys_sbrk(int incr)
{
task_t* task = per_core(current_task);
vma_t* heap = task->heap;
vma_t* tmp = NULL;
int ret;
spinlock_lock(&task->vma_lock);
if (BUILTIN_EXPECT(!heap,0 )) {
kprintf("sys_sbrk: missing heap!\n");
abort();
}
tmp = task->vma_list;
while(tmp && !((task->end_heap >= tmp->start) && (task->end_heap <= tmp->end)))
tmp = tmp->next;
ret = heap->end;
heap->end += incr;
if (heap->end < heap->start)
heap->end = heap->start;
// allocation and mapping of new pages for the heap
// is catched by the pagefault handler
kprintf("sys_sbrk: task = %d, heap->start = %#lx, heap->end = %#lx, incr = %i\n", task->id, heap->start, heap->end, incr); // TOD0: remove
ret = (int) task->end_heap;
task->end_heap += incr;
if (task->end_heap < task->start_heap)
task->end_heap = task->start_heap;
// resize virtual memory area
if (tmp && (tmp->end <= task->end_heap))
tmp->end = task->end_heap;
spinlock_unlock(&task->vma_lock);
return ret;
}
int syscall_handler(size_t sys_nr, ...)
int syscall_handler(uint32_t sys_nr, ...)
{
int ret = -EINVAL;
va_list vl;
@ -502,7 +500,7 @@ int syscall_handler(size_t sys_nr, ...)
break;
case __NR_wait: {
int32_t* status = va_arg(vl, int32_t*);
ret = wait(status);
break;
}
@ -551,7 +549,7 @@ int syscall_handler(size_t sys_nr, ...)
ret = -ENOTSOCK;
break;
}
//kprintf("lwip_connect: %p with lenght %i and Socket %i", name, namelen, per_core(current_task)->fildes_table[fd].offset); // TODO: remove
//kprintf("lwip_connect: %p with lenght %i and Socket %i", name, namelen, per_core(current_task)->fildes_table[fd].offset);
ret = lwip_connect(per_core(current_task)->fildes_table[fd]->offset, name, namelen);
@ -603,7 +601,7 @@ int syscall_handler(size_t sys_nr, ...)
}
#endif
default:
kprintf("syscall_handler: invalid system call %u\n", sys_nr);
kputs("invalid system call\n");
ret = -ENOSYS;
break;
};

View file

@ -30,7 +30,7 @@
#include <metalsvm/stdlib.h>
#include <metalsvm/string.h>
#include <metalsvm/errno.h>
#include <metalsvm/memory.h>
#include <metalsvm/mmu.h>
#include <metalsvm/page.h>
#include <metalsvm/tasks.h>
#include <metalsvm/processor.h>
@ -47,27 +47,26 @@
* A task's id will be its position in this array.
*/
static task_t task_table[MAX_TASKS] = { \
[0] = {0, TASK_IDLE, NULL, NULL, 0, 0, 0, NULL, NULL, 0, ATOMIC_INIT(0), SPINLOCK_IRQSAVE_INIT, 0, SPINLOCK_INIT, NULL, NULL, 0, NULL}, \
[1 ... MAX_TASKS-1] = {0, TASK_INVALID, NULL, NULL, 0, 0, 0, NULL, NULL, 0, ATOMIC_INIT(0), SPINLOCK_IRQSAVE_INIT, 0, SPINLOCK_INIT, NULL, NULL, 0, NULL}
};
[0] = {0, TASK_IDLE, NULL, NULL, 0, 0, 0, NULL, NULL, 0, ATOMIC_INIT(0), SPINLOCK_IRQSAVE_INIT, NULL, SPINLOCK_INIT, NULL, NULL, 0, 0, 0, 0}, \
[1 ... MAX_TASKS-1] = {0, TASK_INVALID, NULL, NULL, 0, 0, 0, NULL, NULL, 0, ATOMIC_INIT(0), SPINLOCK_IRQSAVE_INIT, NULL, SPINLOCK_INIT, NULL, NULL, 0, 0, 0, 0}};
static spinlock_irqsave_t table_lock = SPINLOCK_IRQSAVE_INIT;
#ifndef CONFIG_TICKLESS
#if MAX_CORES > 1
static runqueue_t runqueues[MAX_CORES] = { \
[0] = {task_table+0, NULL, 0, {[0 ... 2] = 0}, TIMER_FREQ/5, TIMER_FREQ/2, 0, {[0 ... MAX_PRIO-1] = {NULL, NULL}}, {NULL, NULL}, SPINLOCK_IRQSAVE_INIT}, \
[1 ... MAX_CORES-1] = {NULL, NULL, 0, {[0 ... 2] = 0}, TIMER_FREQ/5, TIMER_FREQ/2, 0, {[0 ... MAX_PRIO-1] = {NULL, NULL}}, {NULL, NULL}, SPINLOCK_IRQSAVE_INIT}};
[0] = {task_table+0, NULL, 0, {[0 ... 2] = 0}, TIMER_FREQ/5, TIMER_FREQ/2, 0, {[0 ... MAX_PRIO-1] = {NULL, NULL}}, {NULL, NULL}, SPINLOCK_IRQSAVE_INIT}, \
[1 ... MAX_CORES-1] = {NULL, NULL, 0, {[0 ... 2] = 0}, TIMER_FREQ/5, TIMER_FREQ/2, 0, {[0 ... MAX_PRIO-1] = {NULL, NULL}}, {NULL, NULL}, SPINLOCK_IRQSAVE_INIT}};
#else
static runqueue_t runqueues[1] = { \
[0] = {task_table+0, NULL, 0, {[0 ... 2] = 0}, TIMER_FREQ/5, TIMER_FREQ/2, 0, {[0 ... MAX_PRIO-1] = {NULL, NULL}}, {NULL, NULL}, SPINLOCK_IRQSAVE_INIT}};
[0] = {task_table+0, NULL, 0, {[0 ... 2] = 0}, TIMER_FREQ/5, TIMER_FREQ/2, 0, {[0 ... MAX_PRIO-1] = {NULL, NULL}}, {NULL, NULL}, SPINLOCK_IRQSAVE_INIT}};
#endif
#else
#if MAX_CORES > 1
static runqueue_t runqueues[MAX_CORES] = { \
[0] = {task_table+0, NULL, 0, 0, {[0 ... MAX_PRIO-1] = {NULL, NULL}}, {NULL, NULL}, SPINLOCK_IRQSAVE_INIT}, \
[1 ... MAX_CORES-1] = {NULL, NULL, 0, 0, {[0 ... MAX_PRIO-1] = {NULL, NULL}}, {NULL, NULL}, SPINLOCK_IRQSAVE_INIT}};
[0] = {task_table+0, NULL, 0, 0, {[0 ... MAX_PRIO-1] = {NULL, NULL}}, {NULL, NULL}, SPINLOCK_IRQSAVE_INIT}, \
[1 ... MAX_CORES-1] = {NULL, NULL, 0, 0, {[0 ... MAX_PRIO-1] = {NULL, NULL}}, {NULL, NULL}, SPINLOCK_IRQSAVE_INIT}};
#else
static runqueue_t runqueues[1] = { \
[0] = {task_table+0, NULL, 0, 0, {[0 ... MAX_PRIO-1] = {NULL, NULL}}, {NULL, NULL}, SPINLOCK_IRQSAVE_INIT}};
[0] = {task_table+0, NULL, 0, 0, {[0 ... MAX_PRIO-1] = {NULL, NULL}}, {NULL, NULL}, SPINLOCK_IRQSAVE_INIT}};
#endif
#endif
@ -79,7 +78,6 @@ DEFINE_PER_CORE(task_t*, current_task, task_table+0);
extern const void boot_stack;
/** @brief helper function for the assembly code to determine the current task
*
* @return Pointer to the task_t structure of current task
*/
task_t* get_current_task(void) {
@ -98,32 +96,6 @@ uint32_t get_highest_priority(void)
return msb(runqueues[CORE_ID].prio_bitmap);
}
/** @brief Create a new stack for a new task
*
* @return start address of the new stack
*/
static void* create_stack(void)
{
return palloc(KERNEL_STACK_SIZE, MAP_KERNEL_SPACE);
}
/** @brief Delete stack of a finished task
*
* @param addr Pointer to the stack
* @return
* - 0 on success
* - -EINVAL on failure
*/
static int destroy_stack(task_t* task)
{
if (BUILTIN_EXPECT(!task || !task->stack, 0))
return -EINVAL;
pfree(task->stack, KERNEL_STACK_SIZE);
return 0;
}
int multitasking_init(void) {
if (BUILTIN_EXPECT(task_table[0].status != TASK_IDLE, 0)) {
kputs("Task 0 is not an idle task\n");
@ -132,7 +104,7 @@ int multitasking_init(void) {
mailbox_wait_msg_init(&task_table[0].inbox);
memset(task_table[0].outbox, 0x00, sizeof(mailbox_wait_msg_t*)*MAX_TASKS);
task_table[0].page_map = get_boot_page_map();
task_table[0].pgd = get_boot_pgd();
task_table[0].flags = TASK_DEFAULT_FLAGS;
task_table[0].prio = IDLE_PRIO;
task_table[0].stack = (void*) &boot_stack;
@ -156,7 +128,7 @@ size_t get_idle_task(uint32_t id)
atomic_int32_set(&task_table[id].user_usage, 0);
mailbox_wait_msg_init(&task_table[id].inbox);
memset(task_table[id].outbox, 0x00, sizeof(mailbox_wait_msg_t*)*MAX_TASKS);
task_table[id].page_map = get_boot_page_map();
task_table[id].pgd = get_boot_pgd();
current_task[id].var = task_table+id;
runqueues[id].idle = task_table+id;
@ -221,8 +193,10 @@ static void wakeup_blocked_tasks(int result)
spinlock_irqsave_unlock(&table_lock);
}
/** @brief A procedure to be called by procedures which are called by exiting tasks. */
/** @brief A procedure to be called by
* procedures which are called by exiting tasks. */
static void NORETURN do_exit(int arg) {
vma_t* tmp;
task_t* curr_task = per_core(current_task);
uint32_t flags, core_id, fd, status;
@ -230,17 +204,17 @@ static void NORETURN do_exit(int arg) {
for (fd = 0; fd < NR_OPEN; fd++) {
if(curr_task->fildes_table[fd] != NULL) {
/*
* Delete a descriptor from the per-process object
* reference table. If this is not the last reference to the underlying
* object, the object will be ignored.
*/
* delete a descriptor from the per-process object
* reference table. If this is not the last reference to the underlying
* object, the object will be ignored.
*/
if (curr_task->fildes_table[fd]->count == 1) {
// try to close the file
/* try to close the file */
status = close_fs(curr_task->fildes_table[fd]);
// close command failed -> return check = errno
/* close command failed -> return check = errno */
if (BUILTIN_EXPECT(status < 0, 0))
kprintf("Task %u was not able to close file descriptor %i. close_fs returned %d", curr_task->id, fd, -status);
kfree(curr_task->fildes_table[fd]);
kfree(curr_task->fildes_table[fd], sizeof(fildes_t));
curr_task->fildes_table[fd] = NULL;
} else {
curr_task->fildes_table[fd]->count--;
@ -248,26 +222,37 @@ static void NORETURN do_exit(int arg) {
}
}
}
kfree(curr_task->fildes_table); // finally the table has to be cleared
//finally the table has to be cleared.
kfree(curr_task->fildes_table, sizeof(filp_t)*NR_OPEN);
}
kprintf("Terminate task: %u, return value %d\n", curr_task->id, arg);
wakeup_blocked_tasks(arg);
drop_vma_list(curr_task);
drop_page_map();
//vma_dump(curr_task);
spinlock_lock(&curr_task->vma_lock);
#if 1
// remove memory regions
while((tmp = curr_task->vma_list) != NULL) {
kfree((void*) tmp->start, tmp->end - tmp->start + 1);
curr_task->vma_list = tmp->next;
kfree((void*) tmp, sizeof(vma_t));
}
spinlock_unlock(&curr_task->vma_lock);
drop_pgd(); // delete page directory and its page tables
#if 0
if (atomic_int32_read(&curr_task->user_usage))
kprintf("Memory leak! Task %d did not release %d pages\n",
curr_task->id, atomic_int32_read(&curr_task->user_usage));
curr_task->id, atomic_int32_read(&curr_task->user_usage));
#endif
flags = irq_nested_disable();
curr_task->status = TASK_FINISHED;
// decrease the number of active tasks
flags = irq_nested_disable();
core_id = CORE_ID;
spinlock_irqsave_lock(&runqueues[core_id].lock);
runqueues[core_id].nr_tasks--;
@ -277,7 +262,9 @@ static void NORETURN do_exit(int arg) {
reschedule();
kprintf("Kernel panic: scheduler on core %d found no valid task\n", CORE_ID);
while(1) HALT;
while(1) {
HALT;
}
}
/** @brief A procedure to be called by kernel tasks */
@ -313,7 +300,6 @@ void NORETURN abort(void) {
static int create_task(tid_t* id, entry_point_t ep, void* arg, uint8_t prio, uint32_t core_id)
{
task_t* curr_task;
task_t* new_task = NULL;
int ret = -ENOMEM;
uint32_t i;
@ -333,76 +319,64 @@ static int create_task(tid_t* id, entry_point_t ep, void* arg, uint8_t prio, uin
#endif
{
core_id = CORE_ID;
kprintf("create_task: invalid core id! Set id to %u!\n", core_id);
kprintf("Inavlid core id! Set id to %u!\n", core_id);
}
curr_task = per_core(current_task);
// search free entry in task table
for(i=0; i<MAX_TASKS; i++) {
if (task_table[i].status == TASK_INVALID) {
new_task = &task_table[i];
atomic_int32_set(&task_table[i].user_usage, 0);
ret = create_pgd(task_table+i, 0);
if (ret < 0) {
ret = -ENOMEM;
goto create_task_out;
}
task_table[i].id = i;
task_table[i].status = TASK_READY;
task_table[i].last_stack_pointer = NULL;
task_table[i].stack = create_stack();
task_table[i].flags = TASK_DEFAULT_FLAGS;
task_table[i].prio = prio;
task_table[i].last_core = 0;
spinlock_init(&task_table[i].vma_lock);
task_table[i].vma_list = NULL;
task_table[i].fildes_table = NULL;
mailbox_wait_msg_init(&task_table[i].inbox);
memset(task_table[i].outbox, 0x00, sizeof(mailbox_wait_msg_t*)*MAX_TASKS);
task_table[i].outbox[curr_task->id] = &curr_task->inbox;
if (id)
*id = i;
ret = create_default_frame(task_table+i, ep, arg);
task_table[i].start_heap = 0;
task_table[i].end_heap = 0;
task_table[i].lwip_err = 0;
task_table[i].start_tick = get_clock_tick();
// add task in the runqueue
spinlock_irqsave_lock(&runqueues[core_id].lock);
runqueues[core_id].prio_bitmap |= (1 << prio);
runqueues[core_id].nr_tasks++;
if (!runqueues[core_id].queue[prio-1].first) {
task_table[i].next = task_table[i].prev = NULL;
runqueues[core_id].queue[prio-1].first = task_table+i;
runqueues[core_id].queue[prio-1].last = task_table+i;
} else {
task_table[i].prev = runqueues[core_id].queue[prio-1].last;
task_table[i].next = NULL;
runqueues[core_id].queue[prio-1].last->next = task_table+i;
runqueues[core_id].queue[prio-1].last = task_table+i;
}
spinlock_irqsave_unlock(&runqueues[core_id].lock);
break;
}
}
if (BUILTIN_EXPECT(!new_task, 0)) {
ret = -ENOMEM;
goto out;
}
atomic_int32_set(&new_task->user_usage, 0);
ret = copy_page_map(new_task, 0);
if (ret < 0) {
ret = -ENOMEM;
goto out;
}
new_task->id = i;
new_task->status = TASK_READY;
new_task->last_stack_pointer = NULL;
new_task->flags = TASK_DEFAULT_FLAGS;
new_task->prio = prio;
new_task->last_core = 0;
spinlock_init(&new_task->vma_lock);
new_task->vma_list = NULL;
new_task->fildes_table = NULL;
new_task->stack = create_stack();
if (BUILTIN_EXPECT(!new_task->stack, 0)) {
ret = -ENOMEM;
goto out;
}
mailbox_wait_msg_init(&new_task->inbox);
memset(new_task->outbox, 0x00, sizeof(mailbox_wait_msg_t*)*MAX_TASKS);
new_task->outbox[curr_task->id] = &curr_task->inbox;
if (id)
*id = i;
ret = create_default_frame(new_task, ep, arg);
new_task->lwip_err = 0;
new_task->start_tick = get_clock_tick();
// add task in the runqueue
spinlock_irqsave_lock(&runqueues[core_id].lock);
runqueues[core_id].prio_bitmap |= (1 << prio);
runqueues[core_id].nr_tasks++;
if (!runqueues[core_id].queue[prio-1].first) {
new_task->next = new_task->prev = NULL;
runqueues[core_id].queue[prio-1].first = new_task;
runqueues[core_id].queue[prio-1].last = new_task;
}
else {
new_task->prev = runqueues[core_id].queue[prio-1].last;
new_task->next = NULL;
runqueues[core_id].queue[prio-1].last->next = new_task;
runqueues[core_id].queue[prio-1].last = new_task;
}
spinlock_irqsave_unlock(&runqueues[core_id].lock);
out:
create_task_out:
spinlock_irqsave_unlock(&table_lock);
return ret;
@ -413,103 +387,109 @@ int sys_fork(void)
int ret = -ENOMEM;
unsigned int i, core_id, fd_i;
task_t* parent_task = per_core(current_task);
task_t* child_task = NULL;
vma_t** child;
vma_t* parent;
vma_t* tmp;
spinlock_lock(&parent_task->vma_lock);
spinlock_irqsave_lock(&table_lock);
core_id = CORE_ID;
// search free entry in task_table
for(i=0; i<MAX_TASKS; i++) {
if (task_table[i].status == TASK_INVALID) {
child_task = &task_table[i];
atomic_int32_set(&task_table[i].user_usage, 0);
ret = create_pgd(task_table+i, 1);
if (ret < 0) {
ret = -ENOMEM;
goto create_task_out;
}
task_table[i].id = i;
task_table[i].last_stack_pointer = NULL;
task_table[i].stack = create_stack();
spinlock_init(&task_table[i].vma_lock);
// copy VMA list
child = &task_table[i].vma_list;
parent = parent_task->vma_list;
tmp = NULL;
while(parent) {
*child = (vma_t*) kmalloc(sizeof(vma_t));
if (BUILTIN_EXPECT(!child, 0))
break;
(*child)->start = parent->start;
(*child)->end = parent->end;
(*child)->type = parent->type;
(*child)->prev = tmp;
(*child)->next = NULL;
parent = parent->next;
tmp = *child;
child = &((*child)->next);
}
/* init fildes_table */
task_table[i].fildes_table = kmalloc(sizeof(filp_t)*NR_OPEN);
memcpy(task_table[i].fildes_table, parent_task->fildes_table, sizeof(filp_t)*NR_OPEN);
for (fd_i = 0; fd_i < NR_OPEN; fd_i++)
if ((task_table[i].fildes_table[fd_i]) != NULL)
task_table[i].fildes_table[fd_i]->count++;
mailbox_wait_msg_init(&task_table[i].inbox);
memset(task_table[i].outbox, 0x00, sizeof(mailbox_wait_msg_t*)*MAX_TASKS);
task_table[i].outbox[parent_task->id] = &parent_task->inbox;
task_table[i].flags = parent_task->flags;
memcpy(&(task_table[i].fpu), &(parent_task->fpu), sizeof(union fpu_state));
task_table[i].start_tick = get_clock_tick();
task_table[i].start_heap = 0;
task_table[i].end_heap = 0;
task_table[i].lwip_err = 0;
task_table[i].prio = parent_task->prio;
task_table[i].last_core = parent_task->last_core;
// add task in the runqueue
spinlock_irqsave_lock(&runqueues[core_id].lock);
runqueues[core_id].prio_bitmap |= (1 << parent_task->prio);
runqueues[core_id].nr_tasks++;
if (!runqueues[core_id].queue[parent_task->prio-1].first) {
task_table[i].next = task_table[i].prev = NULL;
runqueues[core_id].queue[parent_task->prio-1].first = task_table+i;
runqueues[core_id].queue[parent_task->prio-1].last = task_table+i;
} else {
task_table[i].prev = runqueues[core_id].queue[parent_task->prio-1].last;
task_table[i].next = NULL;
runqueues[core_id].queue[parent_task->prio-1].last->next = task_table+i;
runqueues[core_id].queue[parent_task->prio-1].last = task_table+i;
}
spinlock_irqsave_unlock(&runqueues[core_id].lock);
ret = arch_fork(task_table+i);
if (parent_task != per_core(current_task)) {
// Oh, the current task is the new child task!
// Leave the function without releasing the locks
// because the locks are already released
// by the parent task!
return 0;
}
if (!ret) {
task_table[i].status = TASK_READY;
ret = i;
}
break;
}
}
if (BUILTIN_EXPECT(!child_task, 0)) {
ret = -ENOMEM;
goto out;
}
kprintf("sys_fork: parent id = %u, child id = %u\n", parent_task->id , child_task->id); // TODO: remove
atomic_int32_set(&child_task->user_usage, 0);
ret = copy_page_map(child_task, 1);
if (BUILTIN_EXPECT(ret < 0, 0)) {
ret = -ENOMEM;
goto out;
}
ret = copy_vma_list(parent_task, child_task);
if (BUILTIN_EXPECT(ret < 0, 0)) {
ret = -ENOMEM;
goto out;
}
child_task->id = i;
child_task->last_stack_pointer = NULL;
child_task->stack = create_stack();
if (BUILTIN_EXPECT(!child_task->stack, 0)) {
ret = -ENOMEM;
goto out;
}
// init fildes_table
child_task->fildes_table = kmalloc(sizeof(filp_t)*NR_OPEN);
memcpy(child_task->fildes_table, parent_task->fildes_table, sizeof(filp_t)*NR_OPEN);
for (fd_i=0; fd_i<NR_OPEN; fd_i++) {
if ((child_task->fildes_table[fd_i]) != NULL)
child_task->fildes_table[fd_i]->count++;
}
// init mailbox
mailbox_wait_msg_init(&child_task->inbox);
memset(child_task->outbox, 0x00, sizeof(mailbox_wait_msg_t*)*MAX_TASKS);
child_task->outbox[parent_task->id] = &parent_task->inbox;
child_task->flags = parent_task->flags;
memcpy(&child_task->fpu, &parent_task->fpu, sizeof(union fpu_state));
child_task->start_tick = get_clock_tick();
child_task->lwip_err = 0;
child_task->prio = parent_task->prio;
child_task->last_core = parent_task->last_core;
// add task in the runqueue
spinlock_irqsave_lock(&runqueues[core_id].lock);
runqueues[core_id].prio_bitmap |= (1 << parent_task->prio);
runqueues[core_id].nr_tasks++;
if (!runqueues[core_id].queue[parent_task->prio-1].first) {
child_task->next = child_task->prev = NULL;
runqueues[core_id].queue[parent_task->prio-1].first = child_task;
runqueues[core_id].queue[parent_task->prio-1].last = child_task;
}
else {
child_task->prev = runqueues[core_id].queue[parent_task->prio-1].last;
child_task->next = NULL;
runqueues[core_id].queue[parent_task->prio-1].last->next = child_task;
runqueues[core_id].queue[parent_task->prio-1].last = child_task;
}
spinlock_irqsave_unlock(&runqueues[core_id].lock);
ret = arch_fork(child_task);
if (parent_task != per_core(current_task))
/*
* Oh, the current task is the new child task!
* Leave the function without releasing the locks
* because the locks are already released by the parent task!
*/
return 0;
if (!ret) {
child_task->status = TASK_READY;
ret = i;
}
out:
create_task_out:
spinlock_irqsave_unlock(&table_lock);
spinlock_unlock(&parent_task->vma_lock);
return ret;
}
@ -535,7 +515,7 @@ static int kernel_entry(void* args)
ret = kernel_args->func(kernel_args->args);
kfree(kernel_args);
kfree(kernel_args, sizeof(kernel_args_t));
return ret;
}
@ -581,15 +561,16 @@ static int load_task(load_args_t* largs)
{
uint32_t i, offset, idx, fd_i;
uint32_t addr, npages, flags;
size_t stack = 0, heap = 0;
size_t stack = 0;
elf_header_t header;
elf_program_header_t prog_header;
//elf_section_header_t sec_header;
fildes_t *file = kmalloc(sizeof(fildes_t)); // TODO: kfree is missing!
///!!! kfree is missing!
fildes_t *file = kmalloc(sizeof(fildes_t));
file->offset = 0;
file->flags = 0;
// TODO: init the hole fildes_t struct!
//TODO: init the hole fildes_t struct!
task_t* curr_task = per_core(current_task);
int err;
@ -600,22 +581,22 @@ static int load_task(load_args_t* largs)
if (!file->node)
return -EINVAL;
// init fildes_table
/* init fildes_table */
spinlock_irqsave_lock(&table_lock);
if (!curr_task->fildes_table) {
curr_task->fildes_table = kmalloc(sizeof(filp_t)*NR_OPEN);
if (BUILTIN_EXPECT(!curr_task->fildes_table, 0)) {
if (!task_table[curr_task->id].fildes_table) {
task_table[curr_task->id].fildes_table = kmalloc(sizeof(filp_t)*NR_OPEN);
if (BUILTIN_EXPECT(!task_table[curr_task->id].fildes_table, 0)) {
spinlock_irqsave_unlock(&table_lock);
return -ENOMEM;
}
memset(curr_task->fildes_table, 0x00, sizeof(filp_t)*NR_OPEN);
memset(task_table[curr_task->id].fildes_table, 0x00, sizeof(filp_t)*NR_OPEN);
for (fd_i = 0; fd_i < 3; fd_i++) {
curr_task->fildes_table[fd_i] = kmalloc(sizeof(fildes_t));
curr_task->fildes_table[fd_i]->count = 1;
task_table[curr_task->id].fildes_table[fd_i] = kmalloc(sizeof(fildes_t));
task_table[curr_task->id].fildes_table[fd_i]->count = 1;
}
curr_task->fildes_table[0]->node = findnode_fs("/dev/stdin");
curr_task->fildes_table[1]->node = findnode_fs("/dev/stdout");
curr_task->fildes_table[2]->node = findnode_fs("/dev/stderr");
task_table[curr_task->id].fildes_table[0]->node = findnode_fs("/dev/stdin");
task_table[curr_task->id].fildes_table[1]->node = findnode_fs("/dev/stdout");
task_table[curr_task->id].fildes_table[2]->node = findnode_fs("/dev/stderr");
}
spinlock_irqsave_unlock(&table_lock);
@ -636,43 +617,43 @@ static int load_task(load_args_t* largs)
if (BUILTIN_EXPECT(header.ident._class != ELF_CLASS_32, 0))
goto invalid;
#elif defined(CONFIG_X86_64)
#else
if (BUILTIN_EXPECT(header.machine != ELF_EM_X86_64, 0))
goto invalid;
if (BUILTIN_EXPECT(header.ident._class != ELF_CLASS_64, 0))
goto invalid;
#else
#error "unknown arch"
#endif
if (BUILTIN_EXPECT(header.ident.data != ELF_DATA_2LSB, 0))
goto invalid;
if (header.entry < KERNEL_SPACE)
if (header.entry <= KERNEL_SPACE)
goto invalid;
// interpret program header table
for (i=0; i<header.ph_entry_count; i++) {
file->offset = header.ph_offset+i*header.ph_entry_size;
if (read_fs(file, (uint8_t*) &prog_header, sizeof(elf_program_header_t)) == 0) {
if (read_fs(file, (uint8_t*)&prog_header, sizeof(elf_program_header_t)) == 0) {
kprintf("Could not read programm header!\n");
continue;
}
switch(prog_header.type) {
switch(prog_header.type)
{
case ELF_PT_LOAD: // load program segment
if (!prog_header.virt_addr)
continue;
npages = PAGE_FLOOR(prog_header.mem_size) >> PAGE_BITS;
npages = (prog_header.mem_size >> PAGE_SHIFT);
if (prog_header.mem_size & (PAGE_SIZE-1))
npages++;
addr = get_pages(npages);
flags = MAP_USER_SPACE;
if (prog_header.flags & PF_X)
flags |= MAP_CODE;
if (!(prog_header.flags & PF_W))
flags |= MAP_READ_ONLY;
// map page frames in the address space of the current task
if (!map_region(prog_header.virt_addr, addr, npages, flags)) {
@ -681,30 +662,35 @@ static int load_task(load_args_t* largs)
}
// clear pages
memset((void*) prog_header.virt_addr, 0x00, npages * PAGE_SIZE);
memset((void*) prog_header.virt_addr, 0x00, npages*PAGE_SIZE);
// update heap location
if (heap < prog_header.virt_addr + prog_header.mem_size)
heap = prog_header.virt_addr+prog_header.mem_size;
// set starting point of the heap
if (curr_task->start_heap < prog_header.virt_addr+prog_header.mem_size)
curr_task->start_heap = curr_task->end_heap = prog_header.virt_addr+prog_header.mem_size;
// load program
file->offset = prog_header.offset;
read_fs(file, (uint8_t*) prog_header.virt_addr, prog_header.file_size);
read_fs(file, (uint8_t*)prog_header.virt_addr, prog_header.file_size);
flags = VMA_CACHEABLE | VMA_USER;
flags = VMA_CACHEABLE;
if (prog_header.flags & PF_R)
flags |= VMA_READ;
if (prog_header.flags & PF_W)
flags |= VMA_WRITE;
if (prog_header.flags & PF_X)
flags |= VMA_EXECUTE;
vma_add(curr_task, prog_header.virt_addr, prog_header.virt_addr+npages*PAGE_SIZE-1, flags);
vma_add(prog_header.virt_addr, prog_header.virt_addr+npages*PAGE_SIZE, flags);
if (!(prog_header.flags & PF_W))
change_page_permissions(prog_header.virt_addr, prog_header.virt_addr+npages*PAGE_SIZE-1, flags);
break;
case ELF_PT_GNU_STACK: // Indicates stack executability
// create user-level stack
npages = PAGE_FLOOR(DEFAULT_STACK_SIZE) >> PAGE_BITS;
npages = DEFAULT_STACK_SIZE >> PAGE_SHIFT;
if (DEFAULT_STACK_SIZE & (PAGE_SIZE-1))
npages++;
addr = get_pages(npages);
stack = header.entry*2; // virtual address of the stack
@ -715,15 +701,14 @@ static int load_task(load_args_t* largs)
memset((void*) stack, 0x00, npages*PAGE_SIZE);
// create vma regions for the user-level stack
flags = VMA_CACHEABLE | VMA_USER;
flags = VMA_CACHEABLE;
if (prog_header.flags & PF_R)
flags |= VMA_READ;
if (prog_header.flags & PF_W)
flags |= VMA_WRITE;
if (prog_header.flags & PF_X)
flags |= VMA_EXECUTE;
vma_add(stack, stack+npages*PAGE_SIZE, flags);
vma_add(curr_task, stack, stack+npages*PAGE_SIZE-1, flags);
break;
}
}
@ -741,23 +726,8 @@ static int load_task(load_args_t* largs)
}
#endif
// setup heap
if (!curr_task->heap)
curr_task->heap = (vma_t*) kmalloc(sizeof(vma_t));
if (BUILTIN_EXPECT(!curr_task->heap || !heap, 0)) {
kprintf("load_task: heap is missing!\n");
return -ENOMEM;
}
curr_task->heap->flags = VMA_HEAP|VMA_USER;
curr_task->heap->start = heap;
curr_task->heap->end = heap;
// TODO: insert into list
if (BUILTIN_EXPECT(!stack, 0)) {
kprintf("load_task: stack is missing!\n");
kprintf("Stack is missing!\n");
return -ENOMEM;
}
@ -770,9 +740,9 @@ static int load_task(load_args_t* largs)
// push argv on the stack
offset -= largs->argc * sizeof(char*);
for (i=0; i<largs->argc; i++) {
for(i=0; i<largs->argc; i++) {
((char**) (stack+offset))[i] = (char*) (stack+idx);
while(((char*) stack)[idx] != '\0')
idx++;
idx++;
@ -780,7 +750,7 @@ static int load_task(load_args_t* largs)
// push env on the stack
offset -= (largs->envc+1) * sizeof(char*);
for (i=0; i<largs->envc; i++) {
for(i=0; i<largs->envc; i++) {
((char**) (stack+offset))[i] = (char*) (stack+idx);
while(((char*) stack)[idx] != '\0')
@ -801,10 +771,10 @@ static int load_task(load_args_t* largs)
*((char***) (stack+offset)) = (char**) (stack + offset + 2*sizeof(char**) + (largs->envc+1) * sizeof(char*));
// push argc on the stack
offset -= sizeof(size_t);
offset -= sizeof(int);
*((int*) (stack+offset)) = largs->argc;
kfree(largs);
kfree(largs, sizeof(load_args_t));
// clear fpu state
curr_task->flags &= ~(TASK_FPU_USED|TASK_FPU_INIT);
@ -815,12 +785,12 @@ static int load_task(load_args_t* largs)
invalid:
kprintf("Invalid executable!\n");
kprintf("Magic number: 0x%x\n", (uint32_t) header.ident.magic);
kprintf("Header type: 0x%x\n", (uint32_t) header.type);
kprintf("Machine type: 0x%x\n", (uint32_t) header.machine);
kprintf("ELF ident class: 0x%x\n", (uint32_t) header.ident._class);
kprintf("ELF ident data: 0x%x\n", header.ident.data);
kprintf("Program entry point: 0x%x\n", (size_t) header.entry);
kprintf("magic number 0x%x\n", (uint32_t) header.ident.magic);
kprintf("header type 0x%x\n", (uint32_t) header.type);
kprintf("machine type 0x%x\n", (uint32_t) header.machine);
kprintf("elf ident class 0x%x\n", (uint32_t) header.ident._class);
kprintf("elf identdata !0x%x\n", header.ident.data);
kprintf("program entry point 0x%x\n", (size_t) header.entry);
return -EINVAL;
}
@ -836,7 +806,7 @@ static int user_entry(void* arg)
ret = load_task((load_args_t*) arg);
kfree(arg);
kfree(arg, sizeof(load_args_t));
return ret;
}
@ -854,6 +824,7 @@ static int user_entry(void* arg)
*/
int create_user_task_on_core(tid_t* id, const char* fname, char** argv, uint32_t core_id)
{
#ifdef CONFIG_X86_32
vfs_node_t* node;
int argc = 0;
size_t i, buffer_size = 0;
@ -889,14 +860,18 @@ int create_user_task_on_core(tid_t* id, const char* fname, char** argv, uint32_t
while ((*dest++ = *src++) != 0);
}
// create new task
/* create new task */
return create_task(id, user_entry, load_args, NORMAL_PRIO, core_id);
#else
return -EINVAL;
#endif
}
/** @brief Used by the execve-Systemcall */
int sys_execve(const char* fname, char** argv, char** env)
{
vfs_node_t* node;
vma_t* tmp;
size_t i, buffer_size = 0;
load_args_t* load_args = NULL;
char *dest, *src;
@ -904,8 +879,6 @@ int sys_execve(const char* fname, char** argv, char** env)
int envc = 0;
task_t* curr_task = per_core(current_task);
kprintf("sys_execve: fname = %s, argv = %p, env = %p\n", fname, argv, env); // TODO: remove
node = findnode_fs((char*) fname);
if (!node || !(node->type == FS_FILE))
return -EINVAL;
@ -947,15 +920,14 @@ int sys_execve(const char* fname, char** argv, char** env)
while ((*dest++ = *src++) != 0);
}
spinlock_lock(&curr_task->vma_lock);
// remove old program
vma_t *vma;
for (vma=curr_task->vma_list; vma; vma = vma->next)
pfree((void*) vma->start, vma->end - vma->start);
// TODO: Heap?
while((tmp = curr_task->vma_list) != NULL) {
kfree((void*) tmp->start, tmp->end - tmp->start + 1);
curr_task->vma_list = tmp->next;
kfree((void*) tmp, sizeof(vma_t));
}
spinlock_unlock(&curr_task->vma_lock);
@ -968,7 +940,7 @@ int sys_execve(const char* fname, char** argv, char** env)
ret = load_task(load_args);
kfree(load_args);
kfree(load_args, sizeof(load_args_t));
return ret;
}

View file

@ -26,19 +26,21 @@
#include <asm/atomic.h>
#include <asm/processor.h>
#include <asm/io.h>
#ifdef CONFIG_UART
#include <asm/uart.h>
#endif
#ifdef CONFIG_VGA
#include <asm/vga.h>
#include <asm/vga.h>
#endif
#define NO_EARLY_PRINT 0
#define VGA_EARLY_PRINT 1
#define UART_EARLY_PRINT 2
#ifdef CONFIG_VGA
static uint32_t early_print = VGA_EARLY_PRINT;
#elif defined(CONFIG_UART)
static uint32_t early_print = UART_EARLY_PRINT;
#else
static uint32_t early_print = NO_EARLY_PRINT;
#endif
static spinlock_irqsave_t olock = SPINLOCK_IRQSAVE_INIT;
static atomic_int32_t kmsg_counter = ATOMIC_INIT(0);
static unsigned char kmessages[KMSG_SIZE] __attribute__ ((section(".kmsg"))) = {[0 ... KMSG_SIZE-1] = 0x00};
@ -134,7 +136,7 @@ int kmsg_init(vfs_node_t * node, const char *name)
}
} while (blist);
kfree(new_node);
kfree(new_node, sizeof(vfs_node_t));
return -ENOMEM;
}
@ -143,10 +145,6 @@ int koutput_init(void)
{
#ifdef CONFIG_VGA
vga_init();
early_print |= VGA_EARLY_PRINT;
#endif
#ifdef CONFIG_UART
early_print |= UART_EARLY_PRINT;
#endif
return 0;
@ -163,16 +161,12 @@ int kputchar(int c)
kmessages[pos % KMSG_SIZE] = (unsigned char) c;
#ifdef CONFIG_VGA
if (early_print & VGA_EARLY_PRINT)
if (early_print == VGA_EARLY_PRINT)
vga_putchar(c);
#endif
#ifdef CONFIG_UART
if (early_print & UART_EARLY_PRINT) {
if (c == '\n')
uart_putchar('\r');
if (early_print == UART_EARLY_PRINT)
uart_putchar(c);
}
#endif
if (early_print != NO_EARLY_PRINT)
@ -192,14 +186,11 @@ int kputs(const char *str)
pos = atomic_int32_inc(&kmsg_counter);
kmessages[pos % KMSG_SIZE] = str[i];
#ifdef CONFIG_VGA
if (early_print & VGA_EARLY_PRINT)
if (early_print == VGA_EARLY_PRINT)
vga_putchar(str[i]);
#endif
#ifdef CONFIG_UART
if (early_print & UART_EARLY_PRINT)
if (str[i] == '\n')
uart_putchar('\r');
if (early_print == UART_EARLY_PRINT)
uart_putchar(str[i]);
#endif
}

View file

@ -1,4 +1,4 @@
C_source := memory.c vma.c malloc.c
C_source := memory.c vma.c
MODULE := mm
include $(TOPDIR)/Makefile.inc

View file

@ -1,213 +0,0 @@
/*
* Copyright 2014 Steffen Vogel, Chair for Operating Systems,
* RWTH Aachen University
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* This file is part of MetalSVM.
*/
/**
* @author Steffen Vogel <steffen.vogel@rwth-aachen.de>
*/
#include <metalsvm/malloc.h>
#include <metalsvm/spinlock.h>
#include <metalsvm/stdio.h>
#include <metalsvm/memory.h>
/// A linked list for each binary size exponent
static buddy_t* buddy_lists[BUDDY_LISTS] = { NULL };
/// Lock for the buddy lists
static spinlock_t buddy_lock = SPINLOCK_INIT;
/** @brief Check if larger free buddies are available */
static inline int buddy_large_avail(uint8_t exp)
{
while (exp<BUDDY_MAX && !buddy_lists[exp-BUDDY_MIN])
exp++;
return exp != BUDDY_MAX;
}
/** @brief Calculate the required buddy size */
static inline int buddy_exp(size_t sz)
{
int exp;
for (exp=0; sz>(1<<exp); exp++);
if (exp > BUDDY_MAX)
exp = 0;
if (exp < BUDDY_MIN)
exp = BUDDY_MIN;
return exp;
}
/** @brief Get a free buddy by potentially splitting a larger one */
static buddy_t* buddy_get(int exp)
{
spinlock_lock(&buddy_lock);
buddy_t** list = &buddy_lists[exp-BUDDY_MIN];
buddy_t* buddy = *list;
buddy_t* split;
if (buddy)
// there is already a free buddy =>
// we remove it from the list
*list = buddy->next;
else if (exp >= BUDDY_ALLOC && !buddy_large_avail(exp))
// theres no free buddy larger than exp =>
// we can allocate new memory
buddy = (buddy_t*) palloc(1<<exp, 0);
else {
// we recursivly request a larger buddy...
buddy = buddy_get(exp+1);
if (BUILTIN_EXPECT(!buddy, 0))
goto out;
// ... and split it, by putting the second half back to the list
split = (buddy_t*) ((size_t) buddy + (1<<exp));
split->next = *list;
*list = split;
}
out:
spinlock_unlock(&buddy_lock);
return buddy;
}
/** @brief Put a buddy back to its free list
*
* TODO: merge adjacent buddies (memory compaction)
*/
static void buddy_put(buddy_t* buddy)
{
spinlock_lock(&buddy_lock);
buddy_t** list = &buddy_lists[buddy->prefix.exponent-BUDDY_MIN];
buddy->next = *list;
*list = buddy;
spinlock_unlock(&buddy_lock);
}
void buddy_dump()
{
size_t free = 0;
int i;
for (i=0; i<BUDDY_LISTS; i++) {
buddy_t* buddy;
int exp = i+BUDDY_MIN;
if (buddy_lists[i])
kprintf("buddy_list[%u] (exp=%u, size=%lu bytes):\n", i, exp, 1<<exp);
for (buddy=buddy_lists[i]; buddy; buddy=buddy->next) {
kprintf(" %p -> %p \n", buddy, buddy->next);
free += 1<<exp;
}
}
kprintf("free buddies: %lu bytes\n", free);
}
void* palloc(size_t sz, uint32_t flags)
{
size_t phyaddr, viraddr;
uint32_t npages = PAGE_FLOOR(sz) >> PAGE_BITS;
kprintf("palloc(%lu) (%lu pages)\n", sz, npages); // TODO: remove
// get free virtual address space
viraddr = vma_alloc(npages*PAGE_SIZE, VMA_HEAP);
if (BUILTIN_EXPECT(!viraddr, 0))
return NULL;
// get continous physical pages
phyaddr = get_pages(npages);
if (BUILTIN_EXPECT(!phyaddr, 0)) {
vma_free(viraddr, viraddr+npages*PAGE_SIZE);
return NULL;
}
// map physical pages to VMA
viraddr = map_region(viraddr, phyaddr, npages, flags);
if (BUILTIN_EXPECT(!viraddr, 0)) {
vma_free(viraddr, viraddr+npages*PAGE_SIZE);
put_pages(phyaddr, npages);
return NULL;
}
return (void*) viraddr;
}
void pfree(void* addr, size_t sz)
{
if (BUILTIN_EXPECT(!addr || !sz, 0))
return;
size_t i;
size_t phyaddr;
size_t viraddr = (size_t) addr & PAGE_MASK;
uint32_t npages = PAGE_FLOOR(sz) >> PAGE_BITS;
// memory is propably not continously mapped! (userspace heap)
for (i=0; i<npages; i++) {
phyaddr = virt_to_phys(viraddr+i*PAGE_SIZE);
put_page(phyaddr);
}
unmap_region(viraddr, npages);
vma_free(viraddr, viraddr+npages*PAGE_SIZE);
}
void* kmalloc(size_t sz)
{
if (BUILTIN_EXPECT(!sz, 0))
return NULL;
// add space for the prefix
sz += sizeof(buddy_t);
int exp = buddy_exp(sz);
if (BUILTIN_EXPECT(!exp, 0))
return NULL;
buddy_t* buddy = buddy_get(exp);
if (BUILTIN_EXPECT(!buddy, 0))
return NULL;
// setup buddy prefix
buddy->prefix.magic = BUDDY_MAGIC;
buddy->prefix.exponent = exp;
kprintf("kmalloc(%lu) = %p\n", sz, buddy+1); // TODO: remove
// pointer arithmetic: we hide the prefix
return buddy+1;
}
void kfree(void *addr)
{
if (BUILTIN_EXPECT(!addr, 0))
return;
kprintf("kfree(%lu)\n", addr); // TODO: remove
buddy_t* buddy = (buddy_t*) addr - 1; // get prefix
// check magic
if (BUILTIN_EXPECT(buddy->prefix.magic != BUDDY_MAGIC, 0))
return;
buddy_put(buddy);
}

View file

@ -20,7 +20,7 @@
#include <metalsvm/stdio.h>
#include <metalsvm/string.h>
#include <metalsvm/stdlib.h>
#include <metalsvm/memory.h>
#include <metalsvm/mmu.h>
#include <metalsvm/spinlock.h>
#include <metalsvm/time.h>
#include <metalsvm/processor.h>
@ -37,15 +37,17 @@
#endif
/*
* Set whole address space as occupied:
* 0 => free, 1 => occupied
* 0 => free
* 1 => occupied
*
* Set whole address space as occupied
*/
static uint8_t bitmap[BITMAP_SIZE] = {[0 ... BITMAP_SIZE-1] = 0xFF};
static spinlock_t bitmap_lock = SPINLOCK_INIT;
atomic_int32_t total_pages = ATOMIC_INIT(0);
atomic_int32_t total_allocated_pages = ATOMIC_INIT(0);
atomic_int32_t total_available_pages = ATOMIC_INIT(0);
static uint8_t bitmap[BITMAP_SIZE]; // = {[0 ... BITMAP_SIZE-1] = 0xFF};
static spinlock_t bitmap_lock = SPINLOCK_INIT;
static size_t alloc_start;
atomic_int32_t total_pages = ATOMIC_INIT(0);
atomic_int32_t total_allocated_pages = ATOMIC_INIT(0);
atomic_int32_t total_available_pages = ATOMIC_INIT(0);
/*
* Note that linker symbols are not variables, they have no memory allocated for
@ -62,12 +64,20 @@ inline static int page_marked(size_t i)
return (bitmap[index] & (1 << mod));
}
inline static int page_unmarked(size_t i)
{
return !page_marked(i);
}
inline static void page_set_mark(size_t i)
{
size_t index = i >> 3;
size_t mod = i & 0x7;
bitmap[index] = bitmap[index] | (1 << mod);
//if (page_marked(i))
// kprintf("page %u is alread marked\n", i);
bitmap[index] = bitmap[index] | (1 << mod);
}
inline static void page_clear_mark(size_t i)
@ -75,214 +85,107 @@ inline static void page_clear_mark(size_t i)
size_t index = i / 8;
size_t mod = i % 8;
if (page_unmarked(i))
kprintf("page %u is already unmarked\n", i);
bitmap[index] = bitmap[index] & ~(1 << mod);
}
size_t get_pages(size_t npages)
{
size_t cnt, off;
if (BUILTIN_EXPECT(!npages, 0))
return 0;
if (BUILTIN_EXPECT(npages > atomic_int32_read(&total_available_pages), 0))
return 0;
spinlock_lock(&bitmap_lock);
off = 1;
while (off <= BITMAP_SIZE*8 - npages) {
for (cnt=0; cnt<npages; cnt++) {
if (page_marked(off+cnt))
goto next;
}
for (cnt=0; cnt<npages; cnt++) {
page_set_mark(off+cnt);
}
spinlock_unlock(&bitmap_lock);
atomic_int32_add(&total_allocated_pages, npages);
atomic_int32_sub(&total_available_pages, npages);
return off << PAGE_BITS;
next: off += cnt+1;
}
spinlock_unlock(&bitmap_lock);
return 0;
}
int put_pages(size_t phyaddr, size_t npages)
{
size_t i, ret = 0;
size_t base = phyaddr >> PAGE_BITS;
if (BUILTIN_EXPECT(!phyaddr, 0))
return -EINVAL;
if (BUILTIN_EXPECT(!npages, 0))
return -EINVAL;
spinlock_lock(&bitmap_lock);
for (i=0; i<npages; i++) {
if (page_marked(base+i)) {
page_clear_mark(base+i);
ret++;
}
}
spinlock_unlock(&bitmap_lock);
atomic_int32_sub(&total_allocated_pages, ret);
atomic_int32_add(&total_available_pages, ret);
kprintf("put_pages: phyaddr=%#lx, npages = %d, ret = %d\n",
phyaddr, npages, ret); // TODO: remove
return ret;
}
int copy_page(size_t pdest, size_t psrc)
{
static size_t viraddr;
if (!viraddr) { // statically allocate virtual memory area
viraddr = vma_alloc(2 * PAGE_SIZE, VMA_HEAP);
if (BUILTIN_EXPECT(!viraddr, 0))
return -ENOMEM;
}
// map pages
size_t vsrc = map_region(viraddr, psrc, 1, MAP_KERNEL_SPACE);
size_t vdest = map_region(viraddr + PAGE_SIZE, pdest, 1, MAP_KERNEL_SPACE);
if (BUILTIN_EXPECT(!vsrc || !vdest, 0)) {
unmap_region(viraddr, 2);
return -ENOMEM;
}
kprintf("copy_page: copy page frame from: %#lx (%#lx) to %#lx (%#lx)\n", vsrc, psrc, vdest, pdest); // TODO remove
// copy the whole page
memcpy((void*) vdest, (void*) vsrc, PAGE_SIZE);
// householding
unmap_region(viraddr, 2);
return 0;
}
int mmu_init(void)
{
size_t kernel_size;
unsigned int i;
size_t addr;
int ret = 0;
// at first, set default value of the bitmap
memset(bitmap, 0xFF, sizeof(uint8_t)*BITMAP_SIZE);
#ifdef CONFIG_MULTIBOOT
if (mb_info) {
if (mb_info->flags & MULTIBOOT_INFO_MEM_MAP) {
multiboot_memory_map_t* mmap = (multiboot_memory_map_t*) ((size_t) mb_info->mmap_addr);
multiboot_memory_map_t* mmap_end = (void*) ((size_t) mb_info->mmap_addr + mb_info->mmap_length);
if (mb_info && (mb_info->flags & MULTIBOOT_INFO_MEM_MAP)) {
size_t end_addr;
multiboot_memory_map_t* mmap = (multiboot_memory_map_t*) ((size_t) mb_info->mmap_addr);
multiboot_memory_map_t* mmap_end = (void*) ((size_t) mb_info->mmap_addr + mb_info->mmap_length);
// mark available memory as free
while (mmap < mmap_end) {
if (mmap->type == MULTIBOOT_MEMORY_AVAILABLE) {
for (addr=mmap->addr; addr < mmap->addr + mmap->len; addr += PAGE_SIZE) {
page_clear_mark(addr >> PAGE_BITS);
atomic_int32_inc(&total_pages);
atomic_int32_inc(&total_available_pages);
}
while (mmap < mmap_end) {
if (mmap->type == MULTIBOOT_MEMORY_AVAILABLE) {
/* set the available memory as "unused" */
addr = mmap->addr;
end_addr = addr + mmap->len;
while (addr < end_addr) {
page_clear_mark(addr >> PAGE_SHIFT);
addr += PAGE_SIZE;
atomic_int32_inc(&total_pages);
atomic_int32_inc(&total_available_pages);
}
mmap++;
}
mmap++;
}
else if (mb_info->flags & MULTIBOOT_INFO_MEM) {
size_t page;
size_t pages_lower = mb_info->mem_lower >> 2; /* KiB to page number */
size_t pages_upper = mb_info->mem_upper >> 2;
for (page=0; page<pages_lower; page++)
page_clear_mark(page);
for (page=0; page<pages_upper; page++)
page_clear_mark(page + 256); /* 1 MiB == 256 pages offset */
atomic_int32_add(&total_pages, pages_lower + pages_upper);
atomic_int32_add(&total_available_pages, pages_lower + pages_upper);
}
else {
kputs("Unable to initialize the memory management subsystem\n");
while (1) HALT;
}
// mark mb_info as used
page_set_mark((size_t) mb_info >> PAGE_BITS);
atomic_int32_inc(&total_allocated_pages);
atomic_int32_dec(&total_available_pages);
// mark modules list as used
if (mb_info->flags & MULTIBOOT_INFO_MODS) {
for(addr=mb_info->mods_addr; addr<mb_info->mods_addr+mb_info->mods_count*sizeof(multiboot_module_t); addr+=PAGE_SIZE) {
page_set_mark(addr >> PAGE_BITS);
atomic_int32_inc(&total_allocated_pages);
atomic_int32_dec(&total_available_pages);
}
} else {
kputs("Unable to initialize the memory management subsystem\n");
while(1) {
HALT;
}
}
#elif defined(CONFIG_ROCKCREEK)
// of course, the first slots belong to the private memory
/* of course, the first slots belong to the private memory */
for(addr=0x00; addr<1*0x1000000; addr+=PAGE_SIZE) {
page_clear_mark(addr >> PAGE_BITS);
page_clear_mark(addr >> PAGE_SHIFT);
if (addr > addr + PAGE_SIZE)
break;
atomic_int32_inc(&total_pages);
atomic_int32_inc(&total_available_pages);
atomic_int32_inc(&total_available_pages);
}
// Note: The last slot belongs always to the private memory.
for(addr=0xFF000000; addr<0xFFFFFFFF; addr+=PAGE_SIZE) {
page_clear_mark(addr >> PAGE_BITS);
page_clear_mark(addr >> PAGE_SHIFT);
if (addr > addr + PAGE_SIZE)
break;
atomic_int32_inc(&total_pages);
atomic_int32_inc(&total_available_pages);
}
// mark the bootinfo as used.
page_set_mark((size_t)bootinfo >> PAGE_BITS);
/*
* Mark the bootinfo as used.
*/
page_set_mark((size_t)bootinfo >> PAGE_SHIFT);
atomic_int32_inc(&total_allocated_pages);
atomic_int32_dec(&total_available_pages);
#else
#error Currently, MetalSVM supports only the Multiboot specification or the RockCreek processor!
#endif
// mark kernel as used
for(addr=(size_t) &kernel_start; addr<(size_t) &kernel_end; addr+=PAGE_SIZE) {
page_set_mark(addr >> PAGE_BITS);
atomic_int32_inc(&total_allocated_pages);
atomic_int32_dec(&total_available_pages);
}
kernel_size = (size_t) &kernel_end - (size_t) &kernel_start;
if (kernel_size & (PAGE_SIZE-1))
kernel_size += PAGE_SIZE - (kernel_size & (PAGE_SIZE-1));
atomic_int32_add(&total_allocated_pages, kernel_size >> PAGE_SHIFT);
atomic_int32_sub(&total_available_pages, kernel_size >> PAGE_SHIFT);
/* set kernel space as used */
for(i=(size_t) &kernel_start >> PAGE_SHIFT; i < (size_t) &kernel_end >> PAGE_SHIFT; i++)
page_set_mark(i);
if ((size_t) &kernel_end & (PAGE_SIZE-1))
page_set_mark(i);
alloc_start = (size_t) &kernel_end >> PAGE_SHIFT;
if ((size_t) &kernel_end & (PAGE_SIZE-1))
alloc_start++;
#if MAX_CORES > 1
page_set_mark(SMP_SETUP_ADDR >> PAGE_BITS);
atomic_int32_inc(&total_allocated_pages);
atomic_int32_dec(&total_available_pages);
// reserve physical page for SMP boot code
page_set_mark(SMP_SETUP_ADDR >> PAGE_SHIFT);
atomic_int32_add(&total_allocated_pages, 1);
atomic_int32_sub(&total_available_pages, 1);
#endif
// enable paging and map SMP, VGA, Multiboot modules etc.
ret = paging_init();
if (BUILTIN_EXPECT(ret, 0)) {
if (ret) {
kprintf("Failed to initialize paging: %d\n", ret);
return ret;
}
ret = vma_init();
if (BUILTIN_EXPECT(ret, 0)) {
kprintf("Failed to initialize VMA regions: %d\n", ret);
return ret;
}
#ifdef CONFIG_MULTIBOOT
/*
* Modules like the init ram disk are already loaded.
@ -290,9 +193,23 @@ int mmu_init(void)
*/
if (mb_info && (mb_info->flags & MULTIBOOT_INFO_MODS)) {
multiboot_module_t* mmodule = (multiboot_module_t*) ((size_t) mb_info->mods_addr);
for(i=0; i<mb_info->mods_count; i++) {
for(addr=mmodule[i].mod_start; addr<mmodule[i].mod_end; addr+=PAGE_SIZE) {
page_set_mark(addr >> PAGE_BITS);
/*
* Mark the mb_info as used.
*/
page_set_mark((size_t)mb_info >> PAGE_SHIFT);
atomic_int32_inc(&total_allocated_pages);
atomic_int32_dec(&total_available_pages);
for(addr = mb_info->mods_addr; addr < mb_info->mods_addr + mb_info->mods_count * sizeof(multiboot_module_t); addr += PAGE_SIZE) {
page_set_mark(addr >> PAGE_SHIFT);
atomic_int32_inc(&total_allocated_pages);
atomic_int32_dec(&total_available_pages);
}
for(i=0; i<mb_info->mods_count; i++, mmodule++) {
for(addr=mmodule->mod_start; addr<mmodule->mod_end; addr+=PAGE_SIZE) {
page_set_mark(addr >> PAGE_SHIFT);
atomic_int32_inc(&total_allocated_pages);
atomic_int32_dec(&total_available_pages);
}
@ -311,7 +228,7 @@ int mmu_init(void)
// define the residual private slots as free
for(addr=1*0x1000000; addr<slots*0x1000000; addr+=PAGE_SIZE) {
page_clear_mark(addr >> PAGE_BITS);
page_clear_mark(addr >> PAGE_SHIFT);
if (addr > addr + PAGE_SIZE)
break;
atomic_int32_inc(&total_pages);
@ -322,9 +239,9 @@ int mmu_init(void)
* The init ram disk are already loaded.
* Therefore, we set these pages as used.
*/
for(addr=bootinfo->addr; addr<bootinfo->addr+bootinfo->size; addr+=PAGE_SIZE) {
// this area is already mapped, so we need to virt_to_phys() these addresses.
page_set_mark(virt_to_phys(addr) >> PAGE_BITS);
for(addr=bootinfo->addr; addr < bootinfo->addr+bootinfo->size; addr+=PAGE_SIZE) {
// This area is already mapped, so we need to virt_to_phys() these addresses.
page_set_mark(virt_to_phys(addr) >> PAGE_SHIFT);
atomic_int32_inc(&total_allocated_pages);
atomic_int32_dec(&total_available_pages);
}
@ -333,3 +250,148 @@ int mmu_init(void)
return ret;
}
/*
* Use first fit algorithm to find a suitable physical memory region
*/
size_t get_pages(uint32_t npages)
{
uint32_t i, j, l;
uint32_t k = 0;
size_t ret = 0;
if (BUILTIN_EXPECT(!npages, 0))
return ret;
if (BUILTIN_EXPECT(npages > atomic_int32_read(&total_available_pages), 0))
return ret;
spinlock_lock(&bitmap_lock);
i = alloc_start;
next_try:
while((k < BITMAP_SIZE) && page_marked(i)) {
k++;
i = (i+1) & (BITMAP_SIZE-1);
}
if (k >= BITMAP_SIZE)
goto oom;
for(j=1; (j<npages) && (i+j < BITMAP_SIZE) && (k < BITMAP_SIZE); j++, k++) {
if (page_marked(i+j)) {
i = (i+j) & (BITMAP_SIZE-1);
goto next_try;
}
}
if (i+j >= BITMAP_SIZE) {
i = 0;
goto next_try;
}
if (k >= BITMAP_SIZE)
goto oom;
ret = i*PAGE_SIZE;
//kprintf("alloc: ret 0x%x, i = %d, j = %d, npages = %d\n", ret, i, j, npages);
for(l=i; l<i+j; l++)
page_set_mark(l);
alloc_start = i+j;
spinlock_unlock(&bitmap_lock);
atomic_int32_add(&total_allocated_pages, npages);
atomic_int32_sub(&total_available_pages, npages);
return ret;
oom:
spinlock_unlock(&bitmap_lock);
return ret;
}
int put_page(size_t phyaddr)
{
uint32_t index = phyaddr >> PAGE_SHIFT;
if (BUILTIN_EXPECT(!phyaddr, 0))
return -EINVAL;
spinlock_lock(&bitmap_lock);
page_clear_mark(index);
spinlock_unlock(&bitmap_lock);
atomic_int32_sub(&total_allocated_pages, 1);
atomic_int32_add(&total_available_pages, 1);
return 0;
}
void* mem_allocation(size_t sz, uint32_t flags)
{
size_t phyaddr, viraddr;
uint32_t npages = sz >> PAGE_SHIFT;
if (sz & (PAGE_SIZE-1))
npages++;
phyaddr = get_pages(npages);
if (BUILTIN_EXPECT(!phyaddr, 0))
return 0;
viraddr = map_region(0, phyaddr, npages, flags);
return (void*) viraddr;
}
void* kmalloc(size_t sz)
{
return mem_allocation(sz, MAP_KERNEL_SPACE);
}
void kfree(void* addr, size_t sz)
{
uint32_t index, npages, i;
size_t phyaddr;
if (BUILTIN_EXPECT(!addr && !sz, 0))
return;
npages = sz >> PAGE_SHIFT;
if (sz & (PAGE_SIZE-1))
npages++;
spinlock_lock(&bitmap_lock);
for(i=0; i<npages; i++) {
unmap_region((size_t) addr+i*PAGE_SIZE, 1);
phyaddr = virt_to_phys((size_t) addr+i*PAGE_SIZE);
if (!phyaddr)
continue;
index = phyaddr >> PAGE_SHIFT;
page_clear_mark(index);
}
spinlock_unlock(&bitmap_lock);
vm_free((size_t) addr, npages);
atomic_int32_sub(&total_allocated_pages, npages);
atomic_int32_add(&total_available_pages, npages);
}
void* create_stack(void)
{
return kmalloc(KERNEL_STACK_SIZE);
}
int destroy_stack(task_t* task)
{
if (BUILTIN_EXPECT(!task || !task->stack, 0))
return -EINVAL;
kfree(task->stack, KERNEL_STACK_SIZE);
return 0;
}

407
mm/vma.c
View file

@ -1,5 +1,5 @@
/*
* Copyright 2011 Steffen Vogel, Chair for Operating Systems,
* Copyright 2011 Stefan Lankes, Chair for Operating Systems,
* RWTH Aachen University
*
* Licensed under the Apache License, Version 2.0 (the "License");
@ -17,392 +17,85 @@
* This file is part of MetalSVM.
*/
#include <metalsvm/vma.h>
#include <metalsvm/stdlib.h>
#include <metalsvm/stdio.h>
#include <metalsvm/tasks_types.h>
#include <metalsvm/spinlock.h>
#include <metalsvm/vma.h>
#include <metalsvm/errno.h>
#ifdef CONFIG_MULTIBOOT
#include <asm/multiboot.h>
#endif
/*
* Note that linker symbols are not variables, they have no memory allocated for
* maintaining a value, rather their address is their value.
*/
extern const void kernel_start;
extern const void kernel_end;
/*
* Kernel space VMA list and lock
*
* For bootstrapping we initialize the VMA list with one empty VMA
* (start == end) and expand this VMA by calls to vma_alloc()
* add a new virtual memory region to the list of VMAs
*/
static vma_t vma_boot = { VMA_KERN_MIN, VMA_KERN_MIN, VMA_HEAP };
static vma_t* vma_list = &vma_boot;
static spinlock_t vma_lock = SPINLOCK_INIT;
// TODO: we might move the architecture specific VMA regions to a
// seperate function arch_vma_init()
int vma_init()
int vma_add(task_t* task, size_t start, size_t end, uint32_t type)
{
int ret;
// add Kernel
ret = vma_add(PAGE_CEIL((size_t) &kernel_start),
PAGE_FLOOR((size_t) &kernel_end),
VMA_READ|VMA_WRITE|VMA_EXECUTE|VMA_CACHEABLE);
if (BUILTIN_EXPECT(ret, 0))
goto out;
#ifdef CONFIG_VGA
// add VGA video memory
ret = vma_add(VIDEO_MEM_ADDR, VIDEO_MEM_ADDR + PAGE_SIZE, VMA_READ|VMA_WRITE);
if (BUILTIN_EXPECT(ret, 0))
goto out;
#endif
#if MAX_CORES > 1
// add SMP boot page
ret = vma_add(SMP_SETUP_ADDR, SMP_SETUP_ADDR + PAGE_SIZE,
VMA_READ|VMA_WRITE|VMA_EXECUTE|VMA_CACHEABLE);
if (BUILTIN_EXPECT(ret, 0))
goto out;
#endif
#ifdef CONFIG_MULTIBOOT
// add Multiboot structures as modules
if (mb_info) {
ret = vma_add(PAGE_CEIL((size_t) mb_info),
PAGE_FLOOR((size_t) mb_info + sizeof(multiboot_info_t)),
VMA_READ|VMA_CACHEABLE);
if (BUILTIN_EXPECT(ret, 0))
goto out;
if (mb_info->flags & MULTIBOOT_INFO_MEM_MAP) {
ret = vma_add(PAGE_CEIL((size_t) mb_info->mmap_addr),
PAGE_FLOOR((size_t) mb_info->mmap_addr + mb_info->mmap_length),
VMA_READ|VMA_CACHEABLE);
}
if (mb_info->flags & MULTIBOOT_INFO_MODS) {
multiboot_module_t* mmodule = (multiboot_module_t*) ((size_t) mb_info->mods_addr);
ret = vma_add(PAGE_CEIL((size_t) mb_info->mods_addr),
PAGE_FLOOR((size_t) mb_info->mods_addr + mb_info->mods_count*sizeof(multiboot_module_t)),
VMA_READ|VMA_CACHEABLE);
int i;
for(i=0; i<mb_info->mods_count; i++) {
ret = vma_add(PAGE_CEIL(mmodule[i].mod_start),
PAGE_FLOOR(mmodule[i].mod_end),
VMA_READ|VMA_WRITE|VMA_CACHEABLE);
if (BUILTIN_EXPECT(ret, 0))
goto out;
}
}
}
#endif
out:
return ret;
}
size_t vma_alloc(size_t size, uint32_t flags)
{
task_t* task = per_core(current_task);
spinlock_t* lock;
vma_t** list;
kprintf("vma_alloc: size = %#lx, flags = %#x\n", size, flags); // TODO: remove
size_t base, limit; // boundaries for search
size_t start, end; // boundaries of free gaps
if (flags & VMA_USER) {
base = VMA_USER_MIN;
limit = VMA_USER_MAX;
list = &task->vma_list;
lock = &task->vma_lock;
}
else {
base = VMA_KERN_MIN;
limit = VMA_KERN_MAX;
list = &vma_list;
lock = &vma_lock;
}
spinlock_lock(lock);
// first fit search for free memory area
vma_t* pred = NULL; // vma before current gap
vma_t* succ = *list; // vma after current gap
do {
start = (pred) ? pred->end : base;
end = (succ) ? succ->start : limit;
if (start + size < end && start >= base && start + size < limit)
goto found; // we found a gap which is large enough and in the bounds
pred = succ;
succ = (pred) ? pred->next : NULL;
} while (pred || succ);
fail:
spinlock_unlock(lock); // we were unlucky to find a free gap
return 0;
found:
if (pred && pred->flags == flags)
pred->end = start + size; // resize VMA
else {
// insert new VMA
vma_t* new = kmalloc(sizeof(vma_t));
if (BUILTIN_EXPECT(!new, 0))
goto fail;
new->start = start;
new->end = start + size;
new->flags = flags;
new->next = succ;
new->prev = pred;
if (succ)
succ->prev = new;
if (pred)
pred->next = new;
else
*list = new;
}
spinlock_unlock(lock);
return start;
}
int vma_free(size_t start, size_t end)
{
task_t* task = per_core(current_task);
spinlock_t* lock;
vma_t* vma;
vma_t** list;
kprintf("vma_free: start = %#lx, end = %#lx\n", start, end); // TODO: remove
if (BUILTIN_EXPECT(start >= end, 0))
vma_t* new_vma;
if (BUILTIN_EXPECT(!task || start > end, 0))
return -EINVAL;
if (end < VMA_KERN_MAX) {
lock = &vma_lock;
list = &vma_list;
}
else if (start >= VMA_KERN_MAX) {
lock = &task->vma_lock;
list = &task->vma_list;
}
if (BUILTIN_EXPECT(!*list, 0))
return -EINVAL;
spinlock_lock(lock);
// search vma
vma = *list;
while (vma) {
if (start >= vma->start && end <= vma->end) break;
vma = vma->next;
}
if (BUILTIN_EXPECT(!vma, 0)) {
spinlock_unlock(lock);
return -EINVAL;
}
// free/resize vma
if (start == vma->start && end == vma->end) {
if (vma == *list)
*list = vma->next; // update list head
if (vma->prev)
vma->prev->next = vma->next;
if (vma->next)
vma->next->prev = vma->prev;
kfree(vma);
}
else if (start == vma->start)
vma->start = end;
else if (end == vma->end)
vma->end = start;
else {
vma_t* new = kmalloc(sizeof(vma_t));
if (BUILTIN_EXPECT(!new, 0)) {
spinlock_unlock(lock);
return -ENOMEM;
}
new->end = vma->end;
vma->end = start;
new->start = end;
new->next = vma->next;
vma->next = new;
new->prev = vma;
}
spinlock_unlock(lock);
return 0;
}
int vma_add(size_t start, size_t end, uint32_t flags)
{
task_t* task = per_core(current_task);
spinlock_t* lock;
vma_t** list;
if (BUILTIN_EXPECT(start >= end, 0))
return -EINVAL;
if (flags & VMA_USER) {
list = &task->vma_list;
lock = &task->vma_lock;
// check if address is in userspace
if (BUILTIN_EXPECT(start < VMA_KERN_MAX, 0))
return -EINVAL;
}
else {
list = &vma_list;
lock = &vma_lock;
// check if address is in kernelspace
if (BUILTIN_EXPECT(end >= VMA_KERN_MAX, 0))
return -EINVAL;
}
kprintf("vma_add: start = %#lx, end = %#lx, flags = %#x\n", start, end, flags); // TODO: remove
spinlock_lock(lock);
// search gap
vma_t* pred = NULL;
vma_t* succ = *list;
while (pred || succ) {
if ((!pred || pred->end <= start) &&
(!succ || succ->start >= end))
break;
pred = succ;
succ = (succ) ? succ->next : NULL;
}
if (BUILTIN_EXPECT(*list && !pred && !succ, 0)) {
spinlock_unlock(lock);
return -EINVAL;
}
// insert new VMA
vma_t* new = kmalloc(sizeof(vma_t));
if (BUILTIN_EXPECT(!new, 0)) {
spinlock_unlock(lock);
new_vma = kmalloc(sizeof(new_vma));
if (!new_vma)
return -ENOMEM;
spinlock_lock(&task->vma_lock);
new_vma->start = start;
new_vma->end = end;
new_vma->type = type;
if (!(task->vma_list)) {
new_vma->next = new_vma->prev = NULL;
task->vma_list = new_vma;
} else {
vma_t* tmp = task->vma_list;
while (tmp->next && tmp->start < start)
tmp = tmp->next;
new_vma->next = tmp->next;
new_vma->prev = tmp;
tmp->next = new_vma;
}
new->start = start;
new->end = end;
new->flags = flags;
new->next = succ;
new->prev = pred;
if (succ)
succ->prev = new;
if (pred)
pred->next = new;
else
*list = new;
spinlock_unlock(lock);
spinlock_unlock(&task->vma_lock);
return 0;
}
int copy_vma_list(task_t* src, task_t* dest)
int vma_dump(task_t* task)
{
spinlock_init(&dest->vma_lock);
vma_t* tmp;
spinlock_lock(&src->vma_lock);
spinlock_lock(&dest->vma_lock);
if (BUILTIN_EXPECT(!task, 0))
return -EINVAL;
vma_t* last = NULL;
vma_t* old;
for (old=src->vma_list; old; old=old->next) {
vma_t *new = kmalloc(sizeof(vma_t));
if (BUILTIN_EXPECT(!new, 0)) {
spinlock_unlock(&dest->vma_lock);
spinlock_unlock(&src->vma_lock);
return -ENOMEM;
}
spinlock_lock(&task->vma_lock);
new->start = old->start;
new->end = old->end;
new->flags = old->flags;
new->prev = last;
tmp = task->vma_list;
while (tmp) {
kprintf("%8x - %8x: ", tmp->start, tmp->end);
if (last)
last->next = new;
if (tmp->type & VMA_READ)
kputs("r");
else
dest->vma_list = new;
kputs("-");
last = new;
}
if (tmp->type & VMA_WRITE)
kputs("w");
else
kputs("-");
spinlock_unlock(&dest->vma_lock);
spinlock_unlock(&src->vma_lock);
if (tmp->type & VMA_EXECUTE)
kputs("x");
else
kputs("-");
kputs("\n");
return 0;
}
int drop_vma_list(task_t *task)
{
vma_t* vma;
spinlock_lock(&task->vma_lock);
while ((vma = task->vma_list)) {
task->vma_list = vma->next;
kfree(vma);
tmp = tmp->next;
}
spinlock_unlock(&task->vma_lock);
return 0;
}
void vma_dump()
{
void print_vma(vma_t *vma) {
while (vma) {
kprintf("0x%lx - 0x%lx: size=%x, flags=%c%c%c\n", vma->start, vma->end, vma->end - vma->start,
(vma->flags & VMA_READ) ? 'r' : '-',
(vma->flags & VMA_WRITE) ? 'w' : '-',
(vma->flags & VMA_EXECUTE) ? 'x' : '-');
vma = vma->next;
}
}
task_t* task = per_core(current_task);
kputs("Kernelspace VMAs:\n");
spinlock_lock(&vma_lock);
print_vma(vma_list);
spinlock_unlock(&vma_lock);
kputs("Userspace VMAs:\n");
spinlock_lock(&task->vma_lock);
print_vma(task->vma_list);
spinlock_unlock(&task->vma_lock);
}

View file

@ -1,6 +1,6 @@
TOPDIR = $(shell pwd)
TOPDIR = $(shell pwd)
ARCH = x86
BIT = 32
BIT=32
ifeq ($(ARCH),x86)
ifeq ($(BIT),32)
@ -19,10 +19,10 @@ TMP = $(TOPDIR)/tmp
OPT = --disable-shared --disable-multilib --enable-newlib-hw-fp --disable-newlib-multithread --disable-newlib-reent-small
default: $(ARCH)
$(MAKE) ARCH=$(ARCH) BIT=$(BIT) TARGET=$(TARGET) CFLAGS+="-ffreestanding -Wall -I$(NEWLIB)/include -I../../include -I../../arch/$(ARCH)/include" LDFLAGS+="-nostdlib -L$(NEWLIB)/lib" -C net depend
$(MAKE) ARCH=$(ARCH) BIT=$(BIT) TARGET=$(TARGET) CFLAGS+="-ffreestanding -Wall -I$(NEWLIB)/include -I../../include -I../../arch/$(ARCH)/include" LDFLAGS+="-nostdlib -L$(NEWLIB)/lib" -C net
$(MAKE) ARCH=$(ARCH) BIT=$(BIT) TARGET=$(TARGET) CFLAGS+="-ffreestanding -Wall -I$(NEWLIB)/include -I../../include -I../../arch/$(ARCH)/include" LDFLAGS+="-nostdlib -L$(NEWLIB)/lib" -C examples depend
$(MAKE) ARCH=$(ARCH) BIT=$(BIT) TARGET=$(TARGET) CFLAGS+="-ffreestanding -Wall -I$(NEWLIB)/include -I../../include -I../../arch/$(ARCH)/include" LDFLAGS+="-nostdlib -L$(NEWLIB)/lib" -C examples
$(MAKE) ARCH=$(ARCH) TARGET=$(TARGET) CFLAGS+="-ffreestanding -Wall -I$(NEWLIB)/include -I../../include -I../../arch/$(ARCH)/include" LDFLAGS+="-nostdlib -L$(NEWLIB)/lib" -C net depend
$(MAKE) ARCH=$(ARCH) TARGET=$(TARGET) CFLAGS+="-ffreestanding -Wall -I$(NEWLIB)/include -I../../include -I../../arch/$(ARCH)/include" LDFLAGS+="-nostdlib -L$(NEWLIB)/lib" -C net
$(MAKE) ARCH=$(ARCH) TARGET=$(TARGET) CFLAGS+="-ffreestanding -Wall -I$(NEWLIB)/include -I../../include -I../../arch/$(ARCH)/include" LDFLAGS+="-nostdlib -L$(NEWLIB)/lib" -C examples depend
$(MAKE) ARCH=$(ARCH) TARGET=$(TARGET) CFLAGS+="-ffreestanding -Wall -I$(NEWLIB)/include -I../../include -I../../arch/$(ARCH)/include" LDFLAGS+="-nostdlib -L$(NEWLIB)/lib" -C examples
$(ARCH):
$(RM) $(TMP)

View file

@ -1,84 +1,68 @@
ARCH = x86
BIT = 32
NEWLIB = ../x86/i586-metalsvm-elf32
MAKE = make
STRIP_DEBUG = --strip-debug
KEEP_DEBUG = --only-keep-debug
override LDFLAGS += -T link.ld
ifeq ($(BIT),64)
# Default section offsets in x86-64 ELF files are aligned to the page-size.
# For x86-64 the pagesize is huge (2 MB) with the consquence of large sparse
# ELF files (which lead to a huge initrd). To solve this, we manually set the page-size to 4 KB.
override LDFLAGS += -Wl,-n,-z,max-page-size=0x1000
endif
LDFLAGS =
# other implicit rules
%.o : %.c
$(CC_FOR_TARGET) -c $(CFLAGS) -o $@ $<
default: all
all: memtest hello tests jacobi mshell server client rlogind fork
all: memtest hello tests jacobi mshell server client rlogind
jacobi: jacobi.o
$(CC_FOR_TARGET) $(LDFLAGS) -o $@ $< -lm
$(CC_FOR_TARGET) -T link.ld -o $@ $(LDFLAGS) $< -lm
$(OBJCOPY_FOR_TARGET) $(KEEP_DEBUG) $@ $@.sym
$(OBJCOPY_FOR_TARGET) $(STRIP_DEBUG) $@
chmod a-x $@.sym
memtest: memtest.o
$(CC_FOR_TARGET) $(LDFLAGS) -o $@ $<
$(CC_FOR_TARGET) -T link.ld -o $@ $(LDFLAGS) $<
$(OBJCOPY_FOR_TARGET) $(KEEP_DEBUG) $@ $@.sym
$(OBJCOPY_FOR_TARGET) $(STRIP_DEBUG) $@
chmod a-x $@.sym
fork: fork.o
$(CC_FOR_TARGET) $(LDFLAGS) -o $@ $<
$(OBJCOPY_FOR_TARGET) $(KEEP_DEBUG) $@ $@.sym
$(OBJCOPY_FOR_TARGET) $(STRIP_DEBUG) $@
chmod a-x $@.sym
tests: tests.o
$(CC_FOR_TARGET) $(LDFLAGS) -o $@ $<
$(CC_FOR_TARGET) -T link.ld -o $@ $(LDFLAGS) $<
$(OBJCOPY_FOR_TARGET) $(KEEP_DEBUG) $@ $@.sym
$(OBJCOPY_FOR_TARGET) $(STRIP_DEBUG) $@
chmod a-x $@.sym
hello: hello.o
$(CC_FOR_TARGET) $(LDFLAGS) -o $@ $<
$(CC_FOR_TARGET) -T link.ld -o $@ $(LDFLAGS) $<
$(OBJCOPY_FOR_TARGET) $(KEEP_DEBUG) $@ $@.sym
$(OBJCOPY_FOR_TARGET) $(STRIP_DEBUG) $@
chmod a-x $@.sym
mshell: mshell.o
$(CC_FOR_TARGET) $(LDFLAGS) -o $@ $<
$(CC_FOR_TARGET) -T link.ld -o $@ $(LDFLAGS) $<
$(OBJCOPY_FOR_TARGET) $(KEEP_DEBUG) $@ $@.sym
$(OBJCOPY_FOR_TARGET) $(STRIP_DEBUG) $@
chmod a-x $@.sym
rlogind: rlogind.o
$(CC_FOR_TARGET) $(LDFLAGS) -o $@ $< -lsocket
$(CC_FOR_TARGET) -T link.ld -o $@ $(LDFLAGS) $< -lsocket
$(OBJCOPY_FOR_TARGET) $(KEEP_DEBUG) $@ $@.sym
$(OBJCOPY_FOR_TARGET) $(STRIP_DEBUG) $@
chmod a-x $@.sym
server: server.o
$(CC_FOR_TARGET) $(LDFLAGS) -o $@ $< -lsocket
$(CC_FOR_TARGET) -T link.ld -o $@ $(LDFLAGS) $< -lsocket
$(OBJCOPY_FOR_TARGET) $(KEEP_DEBUG) $@ $@.sym
$(OBJCOPY_FOR_TARGET) $(STRIP_DEBUG) $@
chmod a-x $@.sym
client: client.o
$(CC_FOR_TARGET) $(LDFLAGS) -o $@ $< -lsocket
$(CC_FOR_TARGET) -T link.ld -o $@ $(LDFLAGS) $< -lsocket
$(OBJCOPY_FOR_TARGET) $(KEEP_DEBUG) $@ $@.sym
$(OBJCOPY_FOR_TARGET) $(STRIP_DEBUG) $@
chmod a-x $@.sym
clean:
$(RM) hello tests server client rlogind memtest fork mshell jacobi hello *.sym *.o *~
$(RM) hello tests server client rlogind memtest mshell jacobi hello *.sym *.o *~
depend:
$(CC_FOR_TARGET) -MM $(CFLAGS) *.c > Makefile.dep

View file

@ -1,59 +0,0 @@
/*
* Copyright 2013 Steffen Vogel, Chair for Operating Systems,
* RWTH Aachen University
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* This file is part of MetalSVM.
*/
/**
* @author Steffen Vogel <steffen.vogel@rwth-aachen.de>
*/
#include <stdlib.h>
#include <stdio.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <unistd.h>
#include <errno.h>
int main(int argc, char** argv)
{
printf("======== USER: test forking...\n");
int id = fork();
int ret = 0;
if (id < 0) {
perror(" PARENT: fork");
exit(-1);
}
else if (id == 0) {
printf(" CHILD: This is the child. My pid is %u\n", getpid());
printf(" CHILD: Running memtest...\n");
const char *argv[] = {"/bin/memtest", "512", "kb", "10", NULL};
execve(argv[0], argv, NULL);
perror(" CHILD: exec() returned: ");
ret = -1;
}
else {
printf(" PARENT: Here is the parent. My pid is %u\n", getpid());
wait(&ret);
printf(" PARENT: My child returned with code %i...\n", ret);
}
return ret;
}

View file

@ -68,7 +68,7 @@ int main(int argc, char** argv)
exit(1);
}
testdirent = readdir(testdir);
printf("1. Dirent: %s\n", testdirent->d_name);
printf("1. Dirent: %s", testdirent->d_name);
closedir(testdir);
return errno;

View file

@ -17,90 +17,44 @@
* This file is part of MetalSVM.
*/
/**
* @author Steffen Vogel <steffen.vogel@rwth-aachen.de>
*/
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <time.h>
#include <sys/times.h>
void sleep(int sec) {
struct tms tms;
clock_t t, s = times(&tms);
do {
t = times(&tms);
}
while (t - s <= 1000 * sec);
}
#include <unistd.h>
#include <fcntl.h>
#include <errno.h>
#include <dirent.h>
int print_usage() {
printf("usage: size mb/kb/b [chunks]\n");
exit(-1);
printf("usage: [size mb/kb/b]");
exit(0);
}
int main(int argc, char** argv)
{
int multp = 0;
int size = 0;
int chunks = 1;
void **test;
if (argc <= 2 || argc > 4)
int m = 0;
uint32_t size = 0;
if(argc <= 2)
print_usage();
size = atoi(argv[1]);
if (size <= 0)
print_usage();
if (!strcasecmp(argv[2], "mb"))
multp = (1 << 20);
else if (!strcasecmp(argv[2], "kb"))
multp = (1 << 10);
else if (!strcasecmp(argv[2], "b"))
multp = (1 << 0);
else
print_usage();
size *= multp;
if (argc == 4)
chunks = atoi(argv[3]);
test = malloc(chunks * sizeof(void *));
printf("malloc(%lu)\n", chunks * sizeof(void *));
if (!test) {
printf("malloc(%lu) - FAILED!\n", chunks * sizeof(void *));
exit(-1);
}
// allocate...
int i;
for (i = 0; i < chunks; i++) {
test[i] = malloc(size);
if (test[i])
printf("malloc(%d)\tCHUNK: %d START: %p END: %p\n", size, i, test[i], test[i] + size);
if(argc == 3) {
if(!strcmp(argv[2], "mb"))
m = 1024*1024;
else if(!strcmp(argv[2], "kb"))
m = 1024;
else if(!strcmp(argv[2], "b"))
m = 0;
else
printf("malloc(%d)\tFAILED! Abort allocation, start with freeing memory\n", size);
sleep(1);
print_usage();
}
if(argc > 3)
print_usage();
size = atoi(argv[1]);
if(size <= 0)
print_usage();
// and release again
for (i = 0; i < chunks; i++) {
if (test[i]) {
free(test[i]);
printf("free(%p)\tCHUNK: %d\n", test[i], i);
}
sleep(1);
}
free(test);
printf("free(%p)\n", test);
size *= m;
uint8_t* test = malloc(size);
printf("malloc(%d) - START: %p END: %p \n", size, test, test + size);
return 0;
}

12603
newlib/src/Makefile Normal file

File diff suppressed because it is too large Load diff

1070
newlib/src/config.status Executable file

File diff suppressed because it is too large Load diff

248
newlib/src/etc/Makefile Normal file
View file

@ -0,0 +1,248 @@
#
# Makefile.in for etc
#
DESTDIR =
prefix = /tmp
exec_prefix = ${prefix}
srcdir = .
bindir = ${exec_prefix}/bin
libdir = ${exec_prefix}/lib
tooldir = $(libdir)
datadir = ${datarootdir}
mandir = ${datarootdir}/man
man1dir = $(mandir)/man1
man2dir = $(mandir)/man2
man3dir = $(mandir)/man3
man4dir = $(mandir)/man4
man5dir = $(mandir)/man5
man6dir = $(mandir)/man6
man7dir = $(mandir)/man7
man8dir = $(mandir)/man8
man9dir = $(mandir)/man9
datarootdir = ${prefix}/share
docdir = ${datarootdir}/doc/${PACKAGE}
infodir = ${datarootdir}/info
pdfdir = ${datarootdir}/doc/${PACKAGE}
htmldir = ${docdir}
SHELL = /bin/sh
INSTALL = /usr/bin/install -c
INSTALL_PROGRAM = /usr/bin/install -c
INSTALL_DATA = /usr/bin/install -c -m 644
MAKEINFO = `if [ -f ../texinfo/makeinfo/makeinfo ]; \
then echo ../texinfo/makeinfo/makeinfo; \
else echo makeinfo; fi`
TEXI2DVI = `if [ -f ../texinfo/util/texi2dvi ]; \
then echo ../texinfo/util/texi2dvi; \
else echo texi2dvi; fi`
TEXI2PDF = `if [ -f ../texinfo/util/texi2dvi ]; \
then echo "../texinfo/util/texi2dvi --pdf"; \
else echo "texi2dvi --pdf"; fi`
TEXI2HTML = `if [ -f ../texinfo/makeinfo/makeinfo ]; \
then echo "../texinfo/makeinfo/makeinfo --html"; \
else echo "makeinfo --html"; fi`
DVIPS = dvips
# Where to find texinfo.tex to format documentation with TeX.
TEXIDIR = $(srcdir)/../texinfo
#### Host, target, and site specific Makefile fragments come in here.
###
INFOFILES = standards.info configure.info
DVIFILES = standards.dvi configure.dvi
PDFFILES = standards.pdf configure.pdf
HTMLFILES = standards.html configure.html
all: info
install install-strip: install-info
uninstall:
info:
for f in $(INFOFILES); do \
if test -f $(srcdir)/`echo $$f | sed -e 's/.info$$/.texi/'`; then \
if $(MAKE) "MAKEINFO=$(MAKEINFO)" $$f; then \
true; \
else \
exit 1; \
fi; \
fi; \
done
install-info: info
$(SHELL) $(srcdir)/../mkinstalldirs $(DESTDIR)$(infodir)
if test ! -f standards.info; then cd $(srcdir); fi; \
if test -f standards.info; then \
for i in standards.info*; do \
$(INSTALL_DATA) $$i $(DESTDIR)$(infodir)/$$i; \
done; \
fi
if test ! -f configure.info; then cd $(srcdir); fi; \
if test -f configure.info; then \
for i in configure.info*; do \
$(INSTALL_DATA) $$i $(DESTDIR)$(infodir)/$$i; \
done; \
fi
html:
for f in $(HTMLFILES); do \
if test -f $(srcdir)/`echo $$f | sed -e 's/.html$$/.texi/'`; then \
if $(MAKE) "TEXI2HTML=$(TEXI2HTML)" $$f; then \
true; \
else \
exit 1; \
fi; \
fi; \
done
install-html: html
$(SHELL) $(srcdir)/../mkinstalldirs $(DESTDIR)$(htmldir)
if test ! -f standards.html; then cd $(srcdir); fi; \
if test -f standards.html; then \
for i in standards.html*; do \
$(INSTALL_DATA) $$i $(DESTDIR)$(htmldir)/$$i; \
done; \
fi
if test ! -f configure.html; then cd $(srcdir); fi; \
if test -f configure.html; then \
for i in configure.html*; do \
$(INSTALL_DATA) $$i $(DESTDIR)$(htmldir)/$$i; \
done; \
fi
dvi:
for f in $(DVIFILES); do \
if test -f $(srcdir)/`echo $$f | sed -e 's/.dvi$$/.texi/'`; then \
if $(MAKE) "TEXI2DVI=$(TEXI2DVI)" $$f; then \
true; \
else \
exit 1; \
fi; \
fi; \
done
pdf:
for f in $(PDFFILES); do \
if test -f $(srcdir)/`echo $$f | sed -e 's/.pdf$$/.texi/'`; then \
if $(MAKE) "TEXI2PDF=$(TEXI2PDF)" $$f; then \
true; \
else \
exit 1; \
fi; \
fi; \
done
install-pdf: pdf
$(SHELL) $(srcdir)/../mkinstalldirs $(DESTDIR)$(pdfdir)/etc
if test ! -f standards.pdf; then cd $(srcdir); fi; \
if test -f standards.pdf; then \
for i in standards.pdf*; do \
$(INSTALL_DATA) $$i $(DESTDIR)$(pdfdir)/etc/$$i; \
done; \
fi
if test ! -f configure.pdf; then cd $(srcdir); fi; \
if test -f configure.pdf; then \
for i in configure.pdf*; do \
$(INSTALL_DATA) $$i $(DESTDIR)$(pdfdir)/etc/$$i; \
done; \
fi
standards.info: $(srcdir)/standards.texi $(srcdir)/make-stds.texi
$(MAKEINFO) --no-split -I$(srcdir) -o standards.info $(srcdir)/standards.texi
standards.html: $(srcdir)/standards.texi $(srcdir)/make-stds.texi
$(TEXI2HTML) --no-split -I$(srcdir) -o standards.html $(srcdir)/standards.texi
standards.dvi: $(srcdir)/standards.texi
TEXINPUTS=$(TEXIDIR):$$TEXINPUTS $(TEXI2DVI) $(srcdir)/standards.texi
standards.ps: standards.dvi
$(DVIPS) standards.dvi -o standards.ps
standards.pdf: $(srcdir)/standards.texi
TEXINPUTS=$(TEXIDIR):$$TEXINPUTS $(TEXI2PDF) $(srcdir)/standards.texi
# makeinfo requires images to be in the current directory.
configure.info: $(srcdir)/configure.texi $(srcdir)/configdev.tin $(srcdir)/configbuild.tin
rm -f configdev.txt configbuild.txt
cp $(srcdir)/configdev.tin configdev.txt
cp $(srcdir)/configbuild.tin configbuild.txt
$(MAKEINFO) -I$(srcdir) -o configure.info $(srcdir)/configure.texi
rm -f configdev.txt configbuild.txt
# texi2dvi wants both the .txt and the .eps files.
configure.dvi: $(srcdir)/configure.texi $(srcdir)/configdev.tin $(srcdir)/configbuild.tin $(srcdir)/configdev.ein $(srcdir)/configbuild.ein
rm -f configdev.txt configbuild.txt
cp $(srcdir)/configdev.tin configdev.txt
cp $(srcdir)/configbuild.tin configbuild.txt
rm -f configdev.eps configbuild.eps
cp $(srcdir)/configdev.ein configdev.eps
cp $(srcdir)/configbuild.ein configbuild.eps
TEXINPUTS=$(TEXIDIR):$$TEXINPUTS $(TEXI2DVI) $(srcdir)/configure.texi
rm -f configdev.txt configbuild.txt
rm -f configdev.eps configbuild.eps
# dvips requires images to be in the current directory
configure.ps: configure.dvi $(srcdir)/configdev.ein $(srcdir)/configbuild.ein
rm -f configdev.eps configbuild.eps
cp $(srcdir)/configdev.ein configdev.eps
cp $(srcdir)/configbuild.ein configbuild.eps
$(DVIPS) configure.dvi -o configure.ps
rm -f configdev.eps configbuild.eps
configure.pdf: $(srcdir)/configure.texi $(srcdir)/configdev.tin $(srcdir)/configbuild.tin $(srcdir)/configdev.ein $(srcdir)/configbuild.ein
rm -f configdev.pdf configbuild.pdf
epstopdf $(srcdir)/configdev.ein -outfile=configdev.pdf
epstopdf $(srcdir)/configbuild.ein -outfile=configbuild.pdf
TEXINPUTS=$(TEXIDIR):$$TEXINPUTS $(TEXI2PDF) $(srcdir)/configure.texi
rm -f configdev.pdf configbuild.pdf
configure.html: $(srcdir)/configure.texi
cp $(srcdir)/configdev.jin configdev.jpg
cp $(srcdir)/configbuild.jin configbuild.jpg
$(TEXI2HTML) --no-split -I$(srcdir) -o configure.html $(srcdir)/configure.texi
clean:
rm -f *.aux *.cp *.cps *.dvi *.fn *.fns *.ky *.kys *.log
rm -f *.pg *.pgs *.toc *.tp *.tps *.vr *.vrs
rm -f configdev.txt configbuild.txt
rm -f configdev.eps configbuild.eps
rm -f configdev.jpg configbuild.jpg
mostlyclean: clean
distclean: clean
rm -f Makefile config.status config.cache
maintainer-clean realclean: distclean
rm -f *.html*
rm -f *.info*
Makefile: $(srcdir)/Makefile.in $(host_makefile_frag) $(target_makefile_frag) \
config.status
$(SHELL) ./config.status
AUTOCONF = autoconf
configure_deps = $(srcdir)/configure.in
$(srcdir)/configure: # $(configure_deps)
cd $(srcdir) && $(AUTOCONF)
config.status: $(srcdir)/configure
$(SHELL) ./config.status --recheck
## these last targets are for standards.texi conformance
dist:
check:
installcheck:
TAGS:

View file

@ -56,8 +56,9 @@ L1:
call rax
L2:
; register a function to be called at normal process termination
mov rdi, __do_global_dtors
push __do_global_dtors
call atexit
pop rax
; call init function
call __do_global_ctors
@ -75,17 +76,13 @@ L4:
; arguments are already on the stack
; call the user's function
pop rdi ; argc
pop rsi ; argv pointer
pop rdx ; env pointer
call main
; call exit from the C library so atexit gets called, and the
; C++ destructors get run. This calls our exit routine below
; when it's done.
; call "exit"
mov rdi, rax
push rax
call exit
; endless loop

View file

@ -85,7 +85,7 @@ syscall(int nr, unsigned long arg0, unsigned long arg1, unsigned long arg2,
asm volatile (_SYSCALLSTR(INT_SYSCALL)
: "=a" (res)
: "D" (nr), "S" (arg0), "d" (arg1), "c" (arg2), "b" (arg3), "a" (arg4)
: "0" (nr), "b" (arg0), "c" (arg1), "d" (arg2), "S" (arg3), "D" (arg4)
: "memory", "cc");
return res;

1
newlib/src/serdep.tmp Normal file
View file

@ -0,0 +1 @@
# serdep.tmp

View file

@ -2,21 +2,6 @@
symbol-file metalsvm.sym
target remote localhost:1234
# Debugging 32bit code
#set architecture i386
#break stublet
#continue
# Debugging 64bit code
set architecture i386:x86-64
#break main
# Set memory watchpoint
#rwatch apic_mp
# Debugging userspace
#add-symbol-file newlib/examples/memtest.sym 0x40200000
#break main
#continue # skip kernel main()
# Configure breakpoints and everything as you wish here.
break main
continue

View file

@ -1,15 +0,0 @@
#!ipxe
# iPXE is a open source network boot firmware.
# It provides a full PXE implementation enhanced with additional features such as
# booting from HTTP, FTP, iSCSI SAN, Fibre Channel SAN, Wireless, WAN or Infiniband
#
# http://ipxe.org/
#
# We use it to rapidly compile & debug metalsvm on real hardware.
# This script is fetched and executed by iPXE. Thus enables easy changes in the boot
# procedure without recompiling iPXE and reflashing your USB thumbdrive or network boot ROM.
kernel http://134.130.62.174:8080/metalsvm.elf
module http://134.130.62.174:8080/tools/initrd.img
boot