Compare commits

...
Sign in to create a new pull request.

96 commits

Author SHA1 Message Date
da54e953f9 added note to userspace debugging with gdb (dont forget to add debug flags for newlib
!)
2014-01-09 14:10:02 +01:00
594a454b1f updated userspace test application 2014-01-09 14:08:33 +01:00
98197ac076 fixed little bug in stdout handler (output appearing twice) 2014-01-09 14:06:55 +01:00
11977e40bc implemented userspace task loading and userpsace heap management with the new vma_list 2014-01-09 14:06:09 +01:00
a00177ec09 adapted c runtime and syscalls to x86-64 ABI calling convention 2014-01-09 14:04:02 +01:00
d7644300a8 code cleanup of task creation and some additions to the previous commit 2014-01-09 13:59:01 +01:00
6699886358 changed vma_list order 2014-01-09 13:45:20 +01:00
ab67350783 thats a huge commit: reimplemented all remaining page map functions according to the new page_iterate() 2014-01-09 13:44:20 +01:00
269bffc208 unified comment fontcase 2014-01-09 13:41:22 +01:00
88a2e573c6 replaced old 32bit only page_copy by new generic one 2014-01-09 13:33:21 +01:00
494ee1299a unified and extended pagefault exception handler
some cleanup
2014-01-09 13:32:00 +01:00
d5ac6e6092 added macros for x86 control registers & some MSRs 2014-01-09 13:13:59 +01:00
066e29fde9 added NX bit support
renamed some CPP macros
2014-01-09 12:49:04 +01:00
c21ea42058 added page_dump() and page_stats() for testing and demonstration of page_iterate() 2013-12-03 16:40:14 +01:00
8fe165c162 added page_iterate(): a recursive page tree walker 2013-12-03 16:34:34 +01:00
4514080014 enable global pages for the kernelspace 2013-12-03 15:54:58 +01:00
fff2708c5a move page map setup to arch_paging_init() 2013-12-03 15:52:16 +01:00
443ffdf012 replaced PAGE_ALIGN macro by PAGE_CEIL, PAGE_FLOOR pair 2013-12-03 15:29:05 +01:00
2923b1a7ed cleanup of macros and comments, idention etc... 2013-12-03 15:26:21 +01:00
edf178f39a Merge branch 'vogel' into x64_paging 2013-11-26 17:25:53 +01:00
949500fe6c moved mm subsystem tests to apps/memory.c 2013-11-26 17:24:03 +01:00
3f63d80b9c fixed some compiler warnings 2013-11-26 17:18:47 +01:00
60f8f53169 added test for new buddy malloc implementation 2013-11-20 14:12:57 +01:00
7a3e77c82d palloc()/pfree() replace our old kmalloc()/kfree() with PAGE_SIZE granularity 2013-11-20 14:11:19 +01:00
9018781eee replaced old kfree() calls with new signature 2013-11-20 14:11:19 +01:00
0153fb538d removed old kmalloc() 2013-11-20 14:11:19 +01:00
954ccf1379 added malloc.c to Makefile 2013-11-20 14:11:19 +01:00
1e98d0e410 added first implementation of a buddy system kmalloc() allocator 2013-11-20 14:11:18 +01:00
a972efe288 Merge branch 'vma_kernel' into vogel
Conflicts:
	kernel/tasks.c
2013-11-20 14:00:04 +01:00
1fea8eb13b Merge branch 'qemu_debug' into vogel 2013-11-20 13:54:23 +01:00
acc6e2124e disable VGA output and kill remaining processes 2013-11-20 13:51:03 +01:00
9db28ec380 using PAGE_ALIGN macro to calc pages 2013-11-20 13:43:18 +01:00
de33962e9d removed old vm_alloc()/vm_free() which have been replaced by vma_alloc()/vma_free() 2013-11-20 13:22:09 +01:00
71f55f0a89 ported userspace tasks to new VMA implementation (untested!) 2013-11-20 13:19:58 +01:00
76e52aa473 time is precious... 2013-11-20 12:06:53 +01:00
af5fa15d8d fixed possible wrap-around in tlb_flush functions 2013-11-20 12:04:55 +01:00
aa1730919e standardized comment format and code cleanup 2013-11-20 12:03:24 +01:00
707d7132c8 added test for the new VMA implementation 2013-11-20 11:30:04 +01:00
79c4f2703e setup kernelspace VMA list 2013-11-20 11:27:49 +01:00
3cd5a5853b added VMA list implementation 2013-11-20 11:26:55 +01:00
421e7ec66e added missing multiboot #defines 2013-11-20 11:20:52 +01:00
df99b4dfff some rewrite of mmu_init concerning the initialization of the memory bitmap 2013-11-20 11:18:10 +01:00
0d7aa3d0ca allow memory initialization without multiboot mmap 2013-11-20 11:15:10 +01:00
06877ff108 bitmap cleanup 2013-11-18 15:47:26 +01:00
fa07bdee53 improved tests for the paging system 2013-11-14 13:17:14 +01:00
ec171dfcce implemented map_region() (more testing needed; will propably replaced by a iterative solution) 2013-11-14 13:12:35 +01:00
892154c9f1 implemented drop_page_map() (more testing needed) 2013-11-14 13:09:56 +01:00
bbb8c5c186 implemented copy_page_frame() (more testing needed) 2013-11-14 13:09:31 +01:00
92b2badf71 implemented copy_page_map() (more testing needed) 2013-11-14 13:08:56 +01:00
cdcd9e7d20 implemented create_page_map() 2013-11-14 12:25:52 +01:00
14938ef7e1 added some helper functions to for the recursive mapping structures 2013-11-14 12:25:07 +01:00
4b485f5733 implemented virt_to_phys() 2013-11-14 12:23:42 +01:00
9441d21d89 more cleanup of old 32bit code relicts, typos and indention 2013-11-14 12:22:52 +01:00
90d884ec8d changed naming of 64bit boot paging tables according to 32bit boot tables
calculate required boot page tables as a function of KERNEL_SPACE
2013-10-25 12:02:04 +02:00
fca96e9851 unified paging preprocessor #defines and naming scheme 2013-10-24 12:36:05 +02:00
143de82f3d added test case for vm_alloc() and MAP_REMAP flag 2013-10-22 21:49:03 +02:00
cd57f5ec28 added kernel app to test the paging and mm subsystem 2013-10-22 21:30:30 +02:00
d59676dbf5 more work to make our assembly initialization more beautiful 2013-10-17 21:35:13 +02:00
3e73d6384e fixed regression 2013-10-17 13:09:20 +02:00
3be25b99d2 reorganized 64bit paging initialization 2013-10-17 11:36:02 +02:00
403c529e8b fixed copy&paste bug and moved cpu initialization in cpu_init() 2013-10-16 17:50:37 +02:00
Steffen Vogel
9b47b3ef45 refactored paging code to be more universial and suitable for 64bit paging 2013-10-16 17:50:37 +02:00
e290d41149 Merge branch 'memtest' into vogel 2013-10-16 17:36:13 +02:00
f361783f4b Merge branch 'cleanup' into vogel 2013-10-16 17:34:18 +02:00
6826e0374d replace awk script for NASM's config.inc by univerial sed scripts 2013-10-16 16:50:04 +02:00
3ee658d008 Merge branch 'qemu_debug' into vogel
Conflicts:
	Makefile.example
2013-10-16 15:15:31 +02:00
5ab075df9b added an example to debug 64bit code in gdb (QEmu is buggy when switching from 32 to 64bit code) 2013-10-16 15:13:04 +02:00
db21f7cf05 simplified Makefile for building 64bit code
and added new debug target (see L²P: Howto QEmu und GDB)y
2013-10-16 14:58:05 +02:00
dac9b20c18 some comments cleaned up 2013-10-16 13:42:58 +02:00
40e5d83217 Merge branch 'cleanup' into vogel 2013-10-10 11:51:31 +02:00
Steffen Vogel
2e230a609e added more output to vma_dump() 2013-10-10 11:50:38 +02:00
d275c0a00a added more output to vma_dump() 2013-10-10 11:50:38 +02:00
Steffen Vogel
b0749fc448 added some comments 2013-10-10 11:48:00 +02:00
2f2dd1d3c7 added some comments 2013-10-10 11:48:00 +02:00
Steffen Vogel
2f02db8dc0 fixed typo 2013-10-10 11:46:04 +02:00
9621509e78 fixed typo 2013-10-10 11:46:04 +02:00
Steffen Vogel
6b7b70903e removed non-existant header asm/mmu.h 2013-10-10 11:45:03 +02:00
2e62ee2966 removed non-existant header asm/mmu.h 2013-10-10 11:45:03 +02:00
Steffen Vogel
7ffaec04f2 added missing newline at end of printf() 2013-10-10 11:44:31 +02:00
38eb3d5167 added missing newline at end of printf() 2013-10-10 11:44:31 +02:00
ce66d261b5 Merge branch 'memtest' into vogel 2013-10-10 11:42:36 +02:00
e757ac5c08 Merge branch 'qemu_uart' into vogel 2013-10-10 11:42:27 +02:00
Steffen Vogel
e731d60256 debug kernel messages over virtual uart port in qemu
use 'telnet localhost 12346' to listen
2013-10-10 11:39:41 +02:00
5424397b47 debug kernel messages over virtual uart port in qemu
use 'telnet localhost 12346' to listen
2013-10-10 11:39:41 +02:00
Steffen Vogel
3c8de24349 fixed some typos, added comments and some code cleanup 2013-10-10 11:09:36 +02:00
1fc3e40c4e fixed some typos, added comments and some code cleanup 2013-10-10 11:09:36 +02:00
Steffen Vogel
ae1af7a053 fixed segmention fault
abort allocation after first malloc fail
2013-10-07 17:22:53 +02:00
16c65de934 fixed segmention fault
abort allocation after first malloc fail
2013-10-07 17:22:53 +02:00
Steffen Vogel
b3fa94b0e0 free chunks before terminate 2013-08-19 00:44:24 +02:00
feec2b7de8 free chunks before terminate 2013-08-19 00:44:24 +02:00
Steffen Vogel
e766295d68 sequential allocation of multiple chunks 2013-08-19 00:43:34 +02:00
9c85f88333 sequential allocation of multiple chunks 2013-08-19 00:43:34 +02:00
Steffen Vogel
264146a7e1 bugfix: invalid multp for single byte allocation 2013-08-19 00:37:05 +02:00
0376d06594 bugfix: invalid multp for single byte allocation 2013-08-19 00:37:05 +02:00
Steffen Vogel
030ba0d75f code cleanup 2013-08-19 00:35:30 +02:00
8159ad78d7 code cleanup 2013-08-19 00:35:30 +02:00
47 changed files with 2820 additions and 1697 deletions

View file

@ -1,8 +1,12 @@
TOPDIR = $(shell pwd)
ARCH = x86
# For 64bit support, you have define BIT as 64
BIT=32
NAME = metalsvm
# For 64bit support, you have define BIT as 64
# Note: do not forget to 'make veryclean' after changing BIT!!!
BIT=64
ARCH = x86
SMP=1
TOPDIR = $(shell pwd)
LWIPDIRS = lwip/src/arch lwip/src/api lwip/src/core lwip/src/core/ipv4 lwip/src/netif
DRIVERDIRS = drivers/net drivers/char
KERNDIRS = libkern kernel mm fs apps arch/$(ARCH)/kernel arch/$(ARCH)/mm arch/$(ARCH)/scc $(LWIPDIRS) $(DRIVERDIRS)
@ -30,35 +34,56 @@ RANLIB_FOR_TARGET = $(CROSSCOMPREFIX)ranlib
STRIP_FOR_TARGET = $(CROSSCOMPREFIX)strip
READELF_FOR_TARGET = $(CROSSCOMPREFIX)readelf
# Tools
MAKE = make
RM = rm -rf
NASM = nasm
# For 64bit code, you have to use qemu-system-x86_64
QEMU = qemu-system-i386
GDB = gdb
# For 64bit support, you have to define -felf64 instead of -felf32
NASMFLAGS = -felf32 -g -i$(TOPDIR)/include/metalsvm/
INCLUDE = -I$(TOPDIR)/include -I$(TOPDIR)/arch/$(ARCH)/include -I$(TOPDIR)/lwip/src/include -I$(TOPDIR)/lwip/src/include/ipv4 -I$(TOPDIR)/drivers
# For 64bit support, you have to define "-m64 -mno-red-zone" instead of "-m32 -march=i586"
ifeq ($(BIT), 32)
QEMU = qemu-system-i386
else ifeq ($(BIT), 64)
QEMU = qemu-system-x86_64
endif
INCLUDE = -I$(TOPDIR)/include \
-I$(TOPDIR)/arch/$(ARCH)/include \
-I$(TOPDIR)/lwip/src/include \
-I$(TOPDIR)/lwip/src/include/ipv4 \
-I$(TOPDIR)/drivers
# Compiler options for final code
CFLAGS = -g -m32 -march=i586 -Wall -O2 -fstrength-reduce -fomit-frame-pointer -finline-functions -ffreestanding $(INCLUDE) $(STACKPROT)
CFLAGS = -g -O2 -m$(BIT) -Wall -fomit-frame-pointer -ffreestanding -fstrength-reduce -finline-functions $(INCLUDE) $(STACKPROT)
# Compiler options for debuging
#CFLAGS = -g -O -m32 -march=i586 -Wall -fomit-frame-pointer -ffreestanding $(INCLUDE) $(STACKPROT)
#CFLAGS = -g -O -m$(BIT) -Wall -fomit-frame-pointer -ffreestanding $(INCLUDE) $(STACKPROT)
NASMFLAGS = -felf$(BIT) -g -i$(TOPDIR)/include/metalsvm/
ARFLAGS = rsv
LDFLAGS = -T link$(BIT).ld -z max-page-size=4096 --defsym __BUILD_DATE=$(shell date +'%Y%m%d') --defsym __BUILD_TIME=$(shell date +'%H%M%S')
STRIP_DEBUG = --strip-debug
KEEP_DEBUG = --only-keep-debug
# Do not change to elf64!
# The Multiboot spec can only boot elf32 binaries
OUTPUT_FORMAT = -O elf32-i386
# For 64bit support, you have to define -m64 instead of "-m32 -march=i586"
CFLAGS_FOR_NEWLIB = -m32 -march=i586 -O2 $(STACKPROT)
# For 64bit support, you have to define -m64 instead of "-m32 -march=i586"
LDFLAGS_FOR_NEWLIB = -m32 -march=i586
# For 64bit support, you have to define -m64 instead of "-m32"
CFLAGS_FOR_TOOLS = -m32 -O2 -Wall
CFLAGS_FOR_NEWLIB = -m$(BIT) -O2 $(STACKPROT)
LDFLAGS_FOR_NEWLIB = -m$(BIT)
CFLAGS_FOR_TOOLS = -m$(BIT) -O2 -Wall
LDFLAGS_FOR_TOOLS =
# For 64bit support, you have to define -felf64 instead of -felf32
NASMFLAGS_FOR_NEWLIB = -felf32
NASMFLAGS_FOR_NEWLIB = -felf$(BIT)
ifeq ($(BIT), 32)
CFLAGS += -march=i586
CFLAGS_FOR_NEWLIB += -march=i586
LDFLAGS_FOR_NEWLIB += -march=i586
else ifeq ($(BIT), 64)
CFLAGS += -mno-red-zone
endif
# Prettify output
V = 0
@ -68,11 +93,15 @@ ifeq ($V,0)
endif
default: all
all: newlib tools $(NAME).elf
newlib:
$(MAKE) ARCH=$(ARCH) BIT=$(BIT) LDFLAGS="$(LDFLAGS_FOR_NEWLIB)" CFLAGS="$(CFLAGS_FOR_NEWLIB)" NASMFLAGS="$(NASMFLAGS_FOR_NEWLIB)" CC_FOR_TARGET=$(CC_FOR_TARGET) \
$(MAKE) ARCH=$(ARCH) BIT=$(BIT) \
LDFLAGS="$(LDFLAGS_FOR_NEWLIB)" \
CFLAGS="$(CFLAGS_FOR_NEWLIB)" \
NASMFLAGS="$(NASMFLAGS_FOR_NEWLIB)" \
CC_FOR_TARGET=$(CC_FOR_TARGET) \
CXX_FOR_TARGET=$(CXX_FOR_TARGET) \
GCC_FOR_TARGET=$(GCC_FOR_TARGET) \
AR_FOR_TARGET=$(AR_FOR_TARGET) \
@ -96,14 +125,23 @@ $(NAME).elf:
$Q$(OBJCOPY_FOR_TARGET) $(STRIP_DEBUG) $(OUTPUT_FORMAT) $(NAME).elf
qemu: newlib tools $(NAME).elf
$(QEMU) -monitor stdio -smp 2 -net nic,model=rtl8139 -net user,hostfwd=tcp::12345-:4711 -net dump -kernel metalsvm.elf -initrd tools/initrd.img
$(QEMU) -monitor stdio -serial tcp::12346,server,nowait -smp $(SMP) -net nic,model=rtl8139 -net user,hostfwd=tcp::12345-:4711 -kernel metalsvm.elf -initrd tools/initrd.img
qemudbg: newlib tools $(NAME).elf
$(QEMU) -s -S -smp 2 -net nic,model=rtl8139 -net user,hostfwd=tcp::12345-:4711 -net dump -kernel metalsvm.elf -initrd tools/initrd.img
$(QEMU) -s -S -nographic -monitor stdio -serial tcp::12346,server -smp $(SMP) -net nic,model=rtl8139 -net user,hostfwd=tcp::12345-:4711 -kernel metalsvm.elf -initrd tools/initrd.img
gdb: $(NAME).elf
make qemudbg > /dev/null &
$(GDB) -x script.gdb
$(GDB) -q -x script.gdb
debug: newlib tools $(NAME).elf
killall $(QEMU) || true
killall $(GDB) || true
sleep 1
gnome-terminal --working-directory=$(TOPDIR) \
--tab --title=Shell --command="bash -c 'sleep 1 && telnet localhost 12345'" \
--tab --title=QEmu --command="make qemudbg" \
--tab --title=GDB --command="make gdb" \
--tab --title=Debug --command="bash -c 'sleep 1 && telnet localhost 12346'"
clean:
$Q$(RM) $(NAME).elf $(NAME).sym *~
@ -112,7 +150,7 @@ clean:
veryclean: clean
$Q$(MAKE) -C newlib veryclean
@echo Very cleaned
@echo Very cleaned.
#depend:
# for i in $(SUBDIRS); do $(MAKE) -k -C $$i depend; done
@ -124,16 +162,15 @@ veryclean: clean
$Q$(CPP_FOR_TARGET) -MF $*.dep -MT $*.o -MM -D__KERNEL__ $(CFLAGS) $<
include/metalsvm/config.inc: include/metalsvm/config.h
@echo "; This file is generated automatically from the config.h file." > include/metalsvm/config.inc
@echo "; Before editing this, you should consider editing config.h." >> include/metalsvm/config.inc
@awk '/^#define MAX_CORES/{ print "%define MAX_CORES", $$3 }' include/metalsvm/config.h >> include/metalsvm/config.inc
@awk '/^#define KERNEL_STACK_SIZE/{ print "%define KERNEL_STACK_SIZE", $$3 }' include/metalsvm/config.h >> include/metalsvm/config.inc
@awk '/^#define CONFIG_VGA/{ print "%define CONFIG_VGA", $$3 }' include/metalsvm/config.h >> include/metalsvm/config.inc
@echo "; This file is generated automatically from the config.h file." > $@
@echo "; Before editing this, you should consider editing config.h." >> $@
@sed -nre 's/^[\t ]*#define[\t ]+([a-z_0-9]+)([\t ]+.*)*/%define \1/ip' $< >> $@
@sed -nre 's/^[\t ]*#define[\t ]+([a-z_0-9]+)[\t ]+([a-z_0-9.]+)([\t ]+.*)*/%define \1 \2/ip' $< >> $@
%.o : %.asm include/metalsvm/config.inc
@echo [ASM] $@
$Q$(NASM) $(NASMFLAGS) -o $@ $<
.PHONY: default all clean emu gdb newlib tools
.PHONY: default all clean qemu qemudbg gdb debug newlib tools
include $(addsuffix /Makefile,$(SUBDIRS))

View file

@ -1,4 +1,4 @@
C_source := tests.c echo.c netio.c jacobi.c laplace.c gfx_client.c gfx_generic.c
C_source := tests.c echo.c netio.c jacobi.c laplace.c gfx_client.c gfx_generic.c memory.c
MODULE := apps
include $(TOPDIR)/Makefile.inc

299
apps/memory.c Normal file
View file

@ -0,0 +1,299 @@
/*
* Copyright 2011 Steffen Vogel, Chair for Operating Systems,
* RWTH Aachen University
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* This file is part of MetalSVM.
*/
#include <metalsvm/stdlib.h>
#include <metalsvm/stdio.h>
#include <metalsvm/stdarg.h>
#include <metalsvm/mmu.h>
#include <metalsvm/time.h>
#include <metalsvm/tasks.h>
#include <metalsvm/vma.h>
#include <metalsvm/malloc.h>
#include <asm/page.h>
#include <asm/processor.h>
#define PAGE_COUNT 10
#define SIZE (PAGE_COUNT*PAGE_SIZE)
#define VIRT_FROM_ADDR 0x100000000000
#define VIRT_TO_ADDR 0x200000000000
/** @brief Simple helper to format our test results */
static void test(size_t expr, char *fmt, ...)
{
void _putchar(int c, void *arg) { kputchar(c); } // for kvprintf
static int c = 1;
va_list ap;
va_start(ap, fmt);
kprintf("%s #%u:\t", (expr) ? "PASSED" : "FAILED", c++);
kvprintf(fmt, _putchar, NULL, 10, ap);
kputs("\n");
va_end(ap);
if (!expr)
abort();
}
/** @brief Linear feedback shift register PRNG */
static uint16_t rand()
{
static uint16_t lfsr = 0xACE1u;
static uint16_t bit;
bit = ((lfsr >> 0) ^ (lfsr >> 2) ^ (lfsr >> 3) ^ (lfsr >> 5) ) & 1;
return lfsr = (lfsr >> 1) | (bit << 15);
}
/** @brief BSD sum algorithm ('sum' Unix command) and used by QEmu */
uint16_t checksum(size_t start, size_t end) {
size_t addr;
uint16_t sum;
for(addr = start, sum = 0; addr < end; addr++) {
uint8_t val = *((uint8_t *) addr);
sum = (sum >> 1) | (sum << 15);
sum += val;
}
return sum;
}
static int paging_stage2(void *arg) {
size_t old, new;
kprintf("PAGING: entering stage 2...\n");
old = *((size_t *) arg);
kprintf("old sum: %lu\n", old);
new = checksum(VIRT_FROM_ADDR, VIRT_FROM_ADDR + PAGE_COUNT*PAGE_SIZE);
test(old == new, "checksum(%p, %p) = %lu", VIRT_FROM_ADDR, VIRT_FROM_ADDR + PAGE_COUNT*PAGE_SIZE, new);
size_t cr3 = read_cr3();
kprintf("cr3 new = %x\n", cr3);
return 0;
}
/** @brief Test of the paging subsystem
*
* We will map a single physical memory region to two virtual regions.
* When writing to the first one, we should be able to read the same contents
* from the second one.
*/
static void paging(void)
{
size_t c, sum;
size_t *p1, *p2;
size_t virt_from, virt_to, virt_alloc;
size_t phys;
// show original page maps
page_dump(0, 0);
page_stats(0, 0, 1); // reset accessed and dirty bits
// allocate physical page frames
phys = get_pages(PAGE_COUNT);
test(phys, "get_pages(%lu) = 0x%lx", PAGE_COUNT, phys);
// create first mapping
virt_from = map_region(VIRT_FROM_ADDR, phys, PAGE_COUNT, MAP_USER_SPACE);
test(virt_from, "map_region(0x%lx, 0x%lx, %lu, 0x%x) = 0x%lx", VIRT_FROM_ADDR, phys, PAGE_COUNT, 0, virt_from);
// check address translation
phys = virt_to_phys(virt_from);
test(phys, "virt_to_phys(0x%lx) = 0x%lx", virt_from, phys);
// write test data
p1 = (size_t *) virt_from;
for (c = 0; c < PAGE_COUNT*PAGE_SIZE/sizeof(size_t); c++) {
p1[c] = c;
}
// create second mapping pointing to the same page frames
virt_to = map_region(VIRT_TO_ADDR, phys, PAGE_COUNT, MAP_USER_SPACE);
test(virt_to, "map_region(0x%lx, 0x%lx, %lu, 0x%x) = 0x%lx", VIRT_TO_ADDR, phys, PAGE_COUNT, 0, virt_to);
// show pagings infos again
page_dump(0, 0);
page_stats(0, 0, 0);
// check address translation
phys = virt_to_phys(virt_to);
test(phys, "virt_to_phys(0x%lx) = 0x%lx", virt_to, phys);
// check if both mapped areas are equal
p2 = (size_t *) virt_to;
for (c = 0; c < PAGE_COUNT*PAGE_SIZE/sizeof(size_t); c++) {
if (p1[c] != p2[c])
test(0, "data mismatch: *(%p) != *(%p)", &p1[c], &p2[c]);
}
test(1, "data is equal");
// try to remap without MAP_REMAP
virt_to = map_region(VIRT_TO_ADDR, phys+PAGE_SIZE, PAGE_COUNT, MAP_USER_SPACE);
test(!virt_to, "map_region(0x%lx, 0x%lx, %lu, 0x%x) = 0x%lx (without MAP_REMAP flag)", VIRT_TO_ADDR, phys+PAGE_SIZE, PAGE_COUNT, 0, virt_to);
// try to remap with MAP_REMAP
virt_to = map_region(VIRT_TO_ADDR, phys+PAGE_SIZE, PAGE_COUNT, MAP_REMAP|MAP_USER_SPACE);
test(virt_to, "map_region(0x%lx, 0x%lx, %lu, 0x%x) = 0x%lx (with MAP_REMAP flag)", VIRT_TO_ADDR, phys+PAGE_SIZE, PAGE_COUNT, MAP_REMAP, virt_to);
// check if data is not equal anymore (we remapped with 1 page offset)
p2 = (size_t *) virt_to;
for (c = 0; c < PAGE_COUNT*PAGE_SIZE/sizeof(size_t); c++) {
if (p1[c] == p2[c])
test(0, "data match at *(%p) != *(%p)", &p1[c], &p2[c]);
}
test(1, "data is unequal");
// test vma_alloc
virt_alloc = map_region(0, phys, PAGE_COUNT, 0);
test(virt_alloc, "map_region(0x%lx, 0x%lx, %lu, 0x%x) = 0x%lx", 0, phys, PAGE_COUNT, 0, virt_alloc);
// data should match against new vm addr
p2 = (size_t *) virt_alloc;
for (c = 0; c < PAGE_COUNT*PAGE_SIZE/sizeof(size_t); c++) {
if (p1[c] != p2[c])
test(0, "data mismatch at *(%p) != *(%p)", &p1[c], &p2[c]);
}
test(1, "data is equal");
// calc checksum
sum = checksum(virt_alloc, virt_alloc + PAGE_COUNT*PAGE_SIZE);
test(sum, "checksum(%p, %p) = %lu", virt_alloc, virt_alloc + PAGE_COUNT*PAGE_SIZE, sum);
size_t cr3 = read_cr3();
kprintf("cr3 old = %x\n", cr3);
//create_kernel_task(0, paging_stage2, &sum, NORMAL_PRIO);
//sleep(3);
}
/** @brief Test of the VMA allocator */
static void vma(void)
{
int ret;
// vma_alloc
size_t a1 = vma_alloc(SIZE, VMA_HEAP);
test(a1, "vma_alloc(0x%x, 0x%x) = 0x%lx", SIZE, VMA_HEAP, a1);
vma_dump();
size_t a2 = vma_alloc(SIZE, VMA_HEAP|VMA_USER);
test(a2 != 0, "vma_alloc(0x%x, 0x%x) = 0x%lx", SIZE, VMA_HEAP|VMA_USER, a2);
vma_dump();
// vma_add
ret = vma_add(VIRT_FROM_ADDR, VIRT_FROM_ADDR+SIZE, VMA_HEAP|VMA_USER);
test(ret >= 0, "vma_add(0x%lx, 0x%lx, 0x%x) = %u", VIRT_FROM_ADDR, VIRT_FROM_ADDR+SIZE, VMA_HEAP|VMA_USER, ret);
vma_dump();
ret = vma_add(VIRT_FROM_ADDR+SIZE, VIRT_FROM_ADDR+2*SIZE, VMA_HEAP|VMA_USER);
test(ret >= 0, "vma_add(0x%lx, 0x%lx, 0x%x) = %u", VIRT_FROM_ADDR+SIZE, VIRT_FROM_ADDR+2*SIZE, VMA_HEAP|VMA_USER, ret);
vma_dump();
ret = vma_add(VIRT_FROM_ADDR-SIZE, VIRT_FROM_ADDR, VMA_HEAP|VMA_USER);
test(ret >= 0, "vma_add(0x%lx, 0x%lx, 0x%x) = %u", VIRT_FROM_ADDR-SIZE, VIRT_FROM_ADDR, VMA_HEAP|VMA_USER, ret);
vma_dump();
// vma_free
ret = vma_free(VIRT_FROM_ADDR-SIZE, VIRT_FROM_ADDR);
test(ret >= 0, "vma_free(0x%lx, 0x%lx) = %u", VIRT_FROM_ADDR-SIZE, VIRT_FROM_ADDR, ret);
vma_dump();
ret = vma_free(VIRT_FROM_ADDR+SIZE, VIRT_FROM_ADDR+2*SIZE);
test(ret >= 0, "vma_free(0x%lx, 0x%lx) = %u", VIRT_FROM_ADDR+SIZE, VIRT_FROM_ADDR+2*SIZE, ret);
vma_dump();
ret = vma_free(VIRT_FROM_ADDR, VIRT_FROM_ADDR+SIZE);
test(ret >= 0, "vma_free(0x%lx, 0x%lx) = %u", VIRT_FROM_ADDR, VIRT_FROM_ADDR+SIZE, ret);
vma_dump();
}
/** @brief Test of the kernel malloc allocator */
static void malloc(void)
{
int i;
int* p[20];
int* a;
// kmalloc() test
buddy_dump();
a = kmalloc(SIZE);
test(a != NULL, "kmalloc(%lu) = %p", SIZE, a);
buddy_dump();
// simple write/read test
for (i=0; i<SIZE/sizeof(int); i++)
a[i] = i;
for (i=0; i<SIZE/sizeof(int); i++) {
if (a[i] != i)
test(0, "data mismatch: *(%p) != %lu", &a[i], i);
}
test(1, "data is equal");
// kfree() test
kfree(a);
test(1, "kfree(%p)", a);
buddy_dump();
// some random malloc/free patterns to stress the buddy system
for (i=0; i<20; i++) {
uint16_t sz = rand();
p[i] = kmalloc(sz);
test(p[i] != NULL, "kmalloc(%u) = %p", sz, p[i]);
}
buddy_dump();
for (i=0; i<20; i++) {
kfree(p[i]);
test(1, "kfree(%p)", p[i]);
}
buddy_dump();
}
/** @brief This is a simple procedure to test memory management subsystem */
int memory(void* arg)
{
tid_t id;
kprintf("======== PAGING: test started...\n");
paging();
kprintf("======== VMA: test started...\n");
vma();
kprintf("======== MALLOC: test started...\n");
malloc();
kprintf("======== USER: test userspace...\n");
char* argv[] = {"/bin/memtest", "17", "kb", "5", NULL};
create_user_task(&id, argv[0], argv);
kprintf(" calling %s %s %s %s with id = %i:\n", argv[0], argv[1], argv[2], argv[3], id);
kprintf("======== All tests finished successfull...\n");
return 0;
}

View file

@ -43,6 +43,7 @@
int laplace(void* arg);
int jacobi(void* arg);
int memory(void* arg);
void echo_init(void);
void netio_init(void);
@ -744,8 +745,7 @@ int test_init(void)
create_user_task(NULL, "/bin/jacobi", jacobi_argv);
//create_user_task_on_core(NULL, "/bin/jacobi", jacobi_argv, 1);
#endif
#ifdef START_MMNIF_TEST
#if defined(CONFIG_LWIP) && LWIP_SOCKET
#if defined(START_MMNIF_TEST) && defined(CONFIG_LWIP) && LWIP_SOCKET
if (RCCE_IAM == 0) {
kprintf("Start /bin/server...\n");
create_user_task(NULL, "/bin/server", server_argv);
@ -755,6 +755,8 @@ int test_init(void)
create_user_task(NULL, "/bin/client", client_argv);
}
#endif
#ifdef START_MEMORY
create_kernel_task(NULL, memory, NULL, NORMAL_PRIO);
#endif
return 0;

View file

@ -46,6 +46,7 @@
//#define START_HELLO
//#define START_TESTS
//#define START_JACOBI
//#define START_MEMORY
//#define START_CHIEFTEST

View file

@ -34,7 +34,7 @@
// ____ _ _
// / ___| _ _ _ __ ___ | |__ ___ | |___
// \___ \| | | | '_ ` _ \| '_ \ / _ \| / __|
// ___) | |_| | | | | | | |_) | (_) | \__ \
// ___) | |_| | | | | | | |_) | (_) | \__
// |____/ \__, |_| |_| |_|_.__/ \___/|_|___/
// |___/
//
@ -253,7 +253,7 @@
// _____ _ _
// | ___| _ _ __ ___| |_(_) ___ _ __ ___
// | |_ | | | | '_ \ / __| __| |/ _ \| '_ \/ __|
// | _|| |_| | | | | (__| |_| | (_) | | | \__ \
// | _|| |_| | | | | (__| |_| | (_) | | | \__
// |_| \__,_|_| |_|\___|\__|_|\___/|_| |_|___/
//
// #########################################################################################

View file

@ -102,7 +102,7 @@ inline static void outportl(unsigned short _port, unsigned int _data)
inline static void uart_putchar(unsigned char _data)
{
outportb(0x2F8, _data);
outportb(UART_PORT, _data);
}
/**

View file

@ -35,9 +35,11 @@
#ifdef CONFIG_MULTIBOOT
/* are there modules to do something with? */
/// Does the bootloader provide mem_* fields?
#define MULTIBOOT_INFO_MEM 0x00000001
/// Does the bootloader provide a list of modules?
#define MULTIBOOT_INFO_MODS 0x00000008
/* is there a full memory map? */
/// Does the bootloader provide a full memory map?
#define MULTIBOOT_INFO_MEM_MAP 0x00000040
typedef uint16_t multiboot_uint16_t;
@ -114,7 +116,6 @@ struct multiboot_info
multiboot_uint16_t vbe_interface_off;
multiboot_uint16_t vbe_interface_len;
};
typedef struct multiboot_info multiboot_info_t;
struct multiboot_mmap_entry

View file

@ -21,6 +21,7 @@
* @file arch/x86/include/asm/page.h
* @brief Definitions and functions related to paging
* @author Stefan Lankes
* @author Steffen Vogel <steffen.vogel@rwth-aachen.de>
*
* This file defines the interface for paging as like structures related to paging.
*/
@ -31,86 +32,129 @@
#include <metalsvm/stddef.h>
#include <metalsvm/stdlib.h>
#define _PAGE_BIT_PRESENT 0 /* is present */
#define _PAGE_BIT_RW 1 /* writeable */
#define _PAGE_BIT_USER 2 /* userspace addressable */
#define _PAGE_BIT_PWT 3 /* page write through */
#define _PAGE_BIT_PCD 4 /* page cache disabled */
#define _PAGE_BIT_ACCESSED 5 /* was accessed (raised by CPU) */
#define _PAGE_BIT_DIRTY 6 /* was written to (raised by CPU) */
#define _PAGE_BIT_PSE 7 /* 4 MB (or 2MB) page */
#define _PAGE_BIT_PAT 7 /* on 4KB pages */
#define _PAGE_BIT_GLOBAL 8 /* Global TLB entry PPro+ */
#define _PAGE_BIT_SVM_STRONG 9 /* mark a virtual address range as used by the SVM system */
#define _PAGE_BIT_SVM_LAZYRELEASE 10 /* mark a virtual address range as used by the SVM system */
#define _PAGE_BIT_SVM_INIT 11 /* mark if the MBP proxy is used */
/// Page offset bits
#define PAGE_BITS 12
#ifdef CONFIG_X86_32
/// Number of page map indirections
#define PAGE_MAP_LEVELS 2
/// Page map bits
#define PAGE_MAP_BITS 10
/// Total operand width in bits
#define BITS 32
/// Linear/virtual address width
#define VIRT_BITS BITS
/// Physical address width (we dont support PAE)
#define PHYS_BITS BITS
#elif defined(CONFIG_X86_64)
/// Number of page map indirections
#define PAGE_MAP_LEVELS 4
/// Page map bits
#define PAGE_MAP_BITS 9
/// Total operand width in bits
#define BITS 64
/// Linear/virtual address width
#define VIRT_BITS 48
/// Physical address width (maximum value)
#define PHYS_BITS 52
#endif
/// The size of a single page in bytes
#define PAGE_SIZE ( 1L << PAGE_BITS)
/// The number of entries in a page map table
#define PAGE_MAP_ENTRIES ( 1L << PAGE_MAP_BITS)
/// Mask the page address
#define PAGE_MASK (-1L << PAGE_BITS)
/// Mask the entry in a page table
#define PAGE_ENTRY_MASK (-1L << (PAGE_BITS-PAGE_MAP_BITS))
/// Mask for all flag bits in a page map entry (including ignored bits)
#define PAGE_FLAGS_MASK (~(-1L << PAGE_BITS) | (-1L << VIRT_BITS))
/// Align to next page
#define PAGE_FLOOR(addr) (((addr) + PAGE_SIZE - 1) & PAGE_MASK)
/// Align to page
#define PAGE_CEIL(addr) ( (addr) & PAGE_MASK)
/// Sign extension to get a valid canonical address (hack: by using aritmethic shifts)
#define VIRT_SEXT(addr) ((ssize_t) addr << (BITS-VIRT_BITS) >> (BITS-VIRT_BITS))
// base addresses of page map tables
#ifdef CONFIG_X86_32
#define PAGE_MAP_PGD 0xFFFFF000
#define PAGE_MAP_PGT 0xFFC00000
#elif defined(CONFIG_X86_64)
#define PAGE_MAP_PML4 0xFFFFFFFFFFFFF000
#define PAGE_MAP_PDPT 0xFFFFFFFFFFE00000
#define PAGE_MAP_PGD 0xFFFFFFFFC0000000
#define PAGE_MAP_PGT 0xFFFFFF8000000000
#endif
/// Page is present
#define PG_PRESENT (1 << _PAGE_BIT_PRESENT)
#define PG_PRESENT (1 << 0)
/// Page is read- and writable
#define PG_RW (1 << _PAGE_BIT_RW)
#define PG_RW (1 << 1)
/// Page is addressable from userspace
#define PG_USER (1 << _PAGE_BIT_USER)
#define PG_USER (1 << 2)
/// Page write through is activated
#define PG_PWT (1 << _PAGE_BIT_PWT)
#define PG_PWT (1 << 3)
/// Page cache is disabled
#define PG_PCD (1 << _PAGE_BIT_PCD)
#define PG_PCD (1 << 4)
/// Page was recently accessed (set by CPU)
#define PG_ACCESSED (1 << _PAGE_BIT_ACCESSED)
#define PG_ACCESSED (1 << 5)
/// Page is dirty due to recentwrite-access (set by CPU)
#define PG_DIRTY (1 << _PAGE_BIT_DIRTY)
/// Big page: 4MB (or 2MB)
#define PG_PSE (1 << _PAGE_BIT_PSE)
#define PG_DIRTY (1 << 6)
/// Huge page: 4MB (or 2MB, 1GB)
#define PG_PSE (1 << 7)
/// Page is part of the MPB (SCC specific entry)
#define PG_MPE PG_PSE
#define PG_MPE PG_PSE
/// Page attribute table
#define PG_PAT PG_PSE
/// Global TLB entry (Pentium Pro and later)
#define PG_GLOBAL (1 << _PAGE_BIT_GLOBAL)
/// Pattern flag
#define PG_PAT (1 << _PAGE_BIT_PAT)
#define PG_GLOBAL (1 << 8)
/// This virtual address range is used by SVM system as marked
#define PG_SVM PG_SVM_STRONG
#define PG_SVM_STRONG (1 << _PAGE_BIT_SVM_STRONG)
#define PG_SVM (1 << 9)
#define PG_SVM_STRONG PG_SVM
/// This virtual address range is used by SVM system as marked
#define PG_SVM_LAZYRELEASE (1 << _PAGE_BIT_SVM_LAZYRELEASE)
#define PG_SVM_LAZYRELEASE (1 << 10)
/// Currently, no page frame is behind this page (only the MBP proxy)
#define PG_SVM_INIT (1 << _PAGE_BIT_SVM_INIT)
#define PG_SVM_INIT (1 << 11)
/// Disable execution for this page
#define PG_XD (1L << 63)
/// This is a whole set of flags (PRESENT,RW,ACCESSED,DIRTY) for kernelspace tables
#define KERN_TABLE (PG_PRESENT|PG_RW|PG_ACCESSED|PG_DIRTY)
/// This is a whole set of flags (PRESENT,RW,ACCESSED,DIRTY,USER) for userspace tables
#define USER_TABLE (PG_PRESENT|PG_RW|PG_ACCESSED|PG_DIRTY|PG_USER)
#define PG_TABLE (PG_PRESENT|PG_RW|PG_XD)
/// This is a whole set of flags (PRESENT,RW,GLOBAL) for kernelspace pages
#define KERN_PAGE (PG_PRESENT|PG_RW|PG_GLOBAL)
/// This is a whole set of flags (PRESENT,RW,USER) for userspace pages
#define USER_PAGE (PG_PRESENT|PG_RW|PG_USER)
#define PG_PAGE (PG_PRESENT|PG_RW|PG_GLOBAL|PG_XD)
#if __SIZEOF_POINTER__ == 4
#define PGT_ENTRIES 1024
#elif __SIZEOF_POINTER__ == 8
#define PGT_ENTRIES 512
#endif
/** @brief Page table structure
/** @brief A single entry in a page map */
typedef size_t page_entry_t;
/** @brief General page map structure
*
* This structure keeps page table entries.\n
* On a 32bit system, a page table consists normally of 1024 entries.
* This page map structure is a general type for all indirecton levels.
* As all page map levels containing the same amount of entries.
* All page maps must be page aligned!
*/
typedef struct page_table
{
/// Page table entries are unsigned 32bit integers.
size_t entries[PGT_ENTRIES];
} page_table_t __attribute__ ((aligned (4096)));
typedef struct page_map {
page_entry_t entries[PAGE_MAP_ENTRIES];
} __attribute__ ((aligned (PAGE_SIZE))) page_map_t;
/** @brief Page directory structure
/** @brief A callback type for the page map iterator
*
* This structure keeps page directory entries.\
* On a 32bit system, a page directory consists normally of 1024 entries.
* @param entry A pointer to the current page map entry
* @return
* - 0 if we want to skip underlying page tables
* - >0 if want to recurse into underlying page tables
*/
typedef struct page_dir
{
/// Page dir entries are unsigned 32bit integers.
size_t entries[PGT_ENTRIES];
} page_dir_t __attribute__ ((aligned (4096)));
typedef int (*page_cb_t)(page_entry_t* entry, int level);
/** @brief Get the corresponding page map entry to a given virtual address */
static inline page_entry_t* virt_to_entry(size_t addr, int level) {
return (page_entry_t*) ((((ssize_t) addr | (-1L << VIRT_BITS)) >> ((level+1) * PAGE_MAP_BITS)) & ~0x7);
}
/** @brief Get the corresponding virtual address to a page map entry */
static inline size_t entry_to_virt(page_entry_t* entry, int level) {
return VIRT_SEXT((size_t) entry << ((level+1) * PAGE_MAP_BITS));
}
/** @brief Converts a virtual address to a physical
*
@ -192,7 +236,7 @@ int arch_paging_init(void);
*
* @return Returns the address of the boot task's page dir array.
*/
page_dir_t* get_boot_pgd(void);
page_map_t* get_boot_page_map(void);
/** @brief Setup a new page directory for a new user-level task
*
@ -203,18 +247,18 @@ page_dir_t* get_boot_pgd(void);
* - counter of allocated page tables
* - -ENOMEM (-12) on failure
*/
int create_pgd(task_t* task, int copy);
int create_page_map(struct task* task, int copy);
/** @brief Delete page directory and its page tables
/** @brief Delete all page map structures of the current task
*
* Puts page tables and page directory back to buffer and
* sets the task's page directory pointer to NULL
* Puts PML4, PDPT, PGD, PGT tables back to buffer and
* sets the task's page map pointer to NULL
*
* @return
* - 0 on success
* - -EINVAL (-22) on failure (in case PGD is still the boot-pgd).
*/
int drop_pgd(void);
int drop_page_map(void);
/** @brief Change the page permission in the page tables of the current task
*
@ -231,4 +275,13 @@ int drop_pgd(void);
*/
int change_page_permissions(size_t start, size_t end, uint32_t flags);
/** @brief Dump mapped memory */
void page_dump(size_t start, size_t end);
/** @brief Print stats about page flags
*
* @param reset Reset accessed and dirty bits in page tables
*/
void page_stats(size_t start, size_t end, int reset);
#endif

View file

@ -53,6 +53,97 @@ extern "C" {
#define CPU_FEATURE_AVX (1 << 28)
#define CPU_FEATURE_HYPERVISOR (1 << 31)
// x86 control registers
/// Protected Mode Enable
#define CR0_PE (1 << 0)
/// Monitor coprocessor
#define CR0_MP (1 << 1)
/// Enable FPU emulation
#define CR0_EM (1 << 2)
/// Task switched
#define CR0_TS (1 << 3)
/// Extension type of coprocessor
#define CR0_ET (1 << 4)
/// Enable FPU error reporting
#define CR0_NE (1 << 5)
/// Enable write protected pages
#define CR0_WP (1 << 16)
/// Enable alignment checks
#define CR0_AM (1 << 18)
/// Globally enables/disable write-back caching
#define CR0_NW (1 << 29)
/// Globally disable memory caching
#define CR0_CD (1 << 30)
/// Enable paging
#define CR0_PG (1 << 31)
/// Virtual 8086 Mode Extensions
#define CR4_VME (1 << 0)
/// Protected-mode Virtual Interrupts
#define CR4_PVI (1 << 1)
/// Disable Time Stamp Counter register (rdtsc instruction)
#define CR4_TSD (1 << 2)
/// Enable debug extensions
#define CR4_DE (1 << 3)
/// Enable hugepage support
#define CR4_PSE (1 << 4)
/// Enable physical address extension
#define CR4_PAE (1 << 5)
/// Enable machine check exceptions
#define CR4_MCE (1 << 6)
/// Enable global pages
#define CR4_PGE (1 << 7)
/// Enable Performance-Monitoring Counter
#define CR4_PCE (1 << 8)
/// Enable Operating system support for FXSAVE and FXRSTOR instructions
#define CR4_OSFXSR (1 << 9)
/// Enable Operating System Support for Unmasked SIMD Floating-Point Exceptions
#define CR4_OSXMMEXCPT (1 << 10)
/// Enable Virtual Machine Extensions, see Intel VT-x
#define CR4_VMXE (1 << 13)
/// Enable Safer Mode Extensions, see Trusted Execution Technology (TXT)
#define CR4_SMXE (1 << 14)
/// Enables the instructions RDFSBASE, RDGSBASE, WRFSBASE, and WRGSBASE
#define CR4_FSGSBASE (1 << 16)
/// Enables process-context identifiers
#define CR4_PCIDE (1 << 17)
/// Enable XSAVE and Processor Extended States
#define CR4_OSXSAVE (1 << 18)
/// Enable Supervisor Mode Execution Protection
#define CR4_SMEP (1 << 20)
/// Enable Supervisor Mode Access Protection
#define CR4_SMAP (1 << 21)
// x86-64 specific MSRs
/// extended feature register
#define MSR_EFER 0xc0000080
/// legacy mode SYSCALL target
#define MSR_STAR 0xc0000081
/// long mode SYSCALL target
#define MSR_LSTAR 0xc0000082
/// compat mode SYSCALL target
#define MSR_CSTAR 0xc0000083
/// EFLAGS mask for syscall
#define MSR_SYSCALL_MASK 0xc0000084
/// 64bit FS base
#define MSR_FS_BASE 0xc0000100
/// 64bit GS base
#define MSR_GS_BASE 0xc0000101
/// SwapGS GS shadow
#define MSR_KERNEL_GS_BASE 0xc0000102
// MSR EFER bits
#define EFER_SCE (1 << 0)
#define EFER_LME (1 << 8)
#define EFER_LMA (1 << 10)
#define EFER_NXE (1 << 11)
#define EFER_SVME (1 << 12)
#define EFER_LMSLE (1 << 13)
#define EFER_FFXSR (1 << 14)
#define EFER_TCE (1 << 15)
typedef struct {
uint32_t feature1, feature2;
} cpu_info_t;
@ -273,7 +364,7 @@ int ipi_tlb_flush(void);
/** @brief Flush a specific page entry in TLB
* @param addr The (virtual) address of the page to flush
*/
static inline void tlb_flush_one_page(uint32_t addr)
static inline void tlb_flush_one_page(size_t addr)
{
asm volatile("invlpg (%0)" : : "r"(addr) : "memory");
#if MAX_CORES > 1
@ -282,7 +373,7 @@ static inline void tlb_flush_one_page(uint32_t addr)
* => User-level applications run only on one
* and we didn't flush the TLB of the other cores
*/
if (addr <= KERNEL_SPACE)
if (addr < KERNEL_SPACE)
ipi_tlb_flush();
#endif
}
@ -293,7 +384,7 @@ static inline void tlb_flush_one_page(uint32_t addr)
*/
static inline void tlb_flush(void)
{
uint32_t val = read_cr3();
size_t val = read_cr3();
if (val)
write_cr3(val);

View file

@ -26,13 +26,6 @@
extern "C" {
#endif
/** @brief Copy a physical page to another physical destination
*
* @param dest Destination address
* @param src Source address
*/
void copy_page_physical(void* dest, const void * src);
#ifdef HAVE_ARCH_MEMCPY
#ifdef CONFIG_ROCKCREEK

View file

@ -97,17 +97,19 @@ static inline int register_task(void)
*
* @return 0 in any case
*/
static inline int jump_to_user_code(uint32_t ep, uint32_t stack)
static inline int jump_to_user_code(size_t ep, size_t stack)
{
asm volatile ("push $0x23; push %0; push $0x1B; push %1" :: "r"(stack), "r"(ep)); // fake stack, see Intel Reference Manual, Vol 1, 6.3.6
#ifdef CONFIG_X86_32
asm volatile ("mov %0, %%ds; mov %0, %%fs; mov %0, %%gs; mov %0, %%es" :: "r"(0x23));
asm volatile ("push $0x23; push %0; push $0x1B; push %1" :: "r"(stack), "r"(ep));
asm volatile ("lret" ::: "cc");
asm volatile ("mov %0, %%ds; mov %0, %%fs; mov %0, %%gs; mov %0, %%es" :: "r"(0x23)); // update segment registers
asm volatile ("lret" ::: "cc"); // far return to user level code
#elif defined (CONFIG_X86_64)
asm volatile ("lretq" ::: "cc"); // far return to user level code
#endif
return 0;
#else
return -22;
#endif
}
#ifdef __cplusplus

View file

@ -387,12 +387,14 @@ void smp_start(uint32_t id)
kprintf("Application processor %d is entering its idle task\n", apic_cpu_id());
// initialize default cpu features
#ifdef CONFIG_X86_32
// initialization for x86_64 is done in smp_entry()
cpu_init();
#endif
// use the same gdt like the boot processors
gdt_flush();
// install IDT
idt_install();

View file

@ -29,7 +29,6 @@
SECTION .mboot
global start
start:
mov byte [msg], 'H'
jmp stublet
; This part MUST be 4byte aligned, so we solve that issue using 'ALIGN 4'
@ -38,10 +37,10 @@ mboot:
; Multiboot macros to make a few lines more readable later
MULTIBOOT_PAGE_ALIGN equ 1<<0
MULTIBOOT_MEMORY_INFO equ 1<<1
; MULTIBOOT_AOUT_KLUDGE equ 1<<16
; MULTIBOOT_AOUT_KLUDGE equ 1<<16
MULTIBOOT_HEADER_MAGIC equ 0x1BADB002
MULTIBOOT_HEADER_FLAGS equ MULTIBOOT_PAGE_ALIGN | MULTIBOOT_MEMORY_INFO ; | MULTIBOOT_AOUT_KLUDGE
MULTIBOOT_CHECKSUM equ -(MULTIBOOT_HEADER_MAGIC + MULTIBOOT_HEADER_FLAGS)
MULTIBOOT_CHECKSUM equ -(MULTIBOOT_HEADER_MAGIC + MULTIBOOT_HEADER_FLAGS)
EXTERN code, bss, end
; This is the GRUB Multiboot header. A boot signature
@ -49,8 +48,6 @@ mboot:
dd MULTIBOOT_HEADER_FLAGS
dd MULTIBOOT_CHECKSUM
msg db "?ello from MetalSVM kernel!!", 0
SECTION .text
ALIGN 4
stublet:
@ -70,7 +67,7 @@ stublet:
; jump to the boot processors's C code
extern main
call main
jmp $
jmp $ ; infinitive loop
global cpu_init
cpu_init:
@ -112,7 +109,7 @@ global read_ip
read_ip:
mov eax, [esp+4]
pop DWORD [eax] ; Get the return address
add esp, 4 ; Dirty Hack! read_ip cleanup the stacl
add esp, 4 ; Dirty Hack! read_ip cleanup the stack
jmp [eax] ; Return. Can't use RET because return
; address popped off the stack.

View file

@ -30,7 +30,7 @@ extern kernel_end
extern apic_mp
; We use a special name to map this section at the begin of our kernel
; => Multiboot needs its magic number at the begin of the kernel
; => Multiboot needs its magic number at the beginning of the kernel
SECTION .mboot
global start
start:
@ -42,19 +42,19 @@ mboot:
; Multiboot macros to make a few lines more readable later
MULTIBOOT_PAGE_ALIGN equ 1<<0
MULTIBOOT_MEMORY_INFO equ 1<<1
; MULTIBOOT_AOUT_KLUDGE equ 1<<16
; MULTIBOOT_AOUT_KLUDGE equ 1<<16
MULTIBOOT_HEADER_MAGIC equ 0x1BADB002
MULTIBOOT_HEADER_FLAGS equ MULTIBOOT_PAGE_ALIGN | MULTIBOOT_MEMORY_INFO ; | MULTIBOOT_AOUT_KLUDGE
MULTIBOOT_CHECKSUM equ -(MULTIBOOT_HEADER_MAGIC + MULTIBOOT_HEADER_FLAGS)
MULTIBOOT_CHECKSUM equ -(MULTIBOOT_HEADER_MAGIC + MULTIBOOT_HEADER_FLAGS)
EXTERN code, bss, end
; This is the GRUB Multiboot header. A boot signature
dd MULTIBOOT_HEADER_MAGIC
dd MULTIBOOT_HEADER_FLAGS
dd MULTIBOOT_CHECKSUM
ALIGN 4
; we need already a valid GDT to switch in the 64bit modus
; we need already a valid GDT to switch in the 64bit mode
GDT64: ; Global Descriptor Table (64-bit).
.Null: equ $ - GDT64 ; The null descriptor.
dw 0 ; Limit (low).
@ -81,112 +81,90 @@ GDT64: ; Global Descriptor Table (64-bit).
dw $ - GDT64 - 1 ; Limit.
dq GDT64 ; Base.
times 256 DD 0
times 256 DD 0 ; Stack for booting
startup_stack:
SECTION .data
; create default page tables for the 64bit kernel
global boot_pgd ; aka PML4
; Create default page tables for the 64bit kernel
global boot_pml4
ALIGN 4096 ; of course, the page tables have to be page aligned
NOPTS equ 512
boot_pgd times 512 DQ 0
boot_pdpt times 512 DQ 0
boot_pd times 512 DQ 0
boot_pt times (NOPTS*512) DQ 0
PAGE_MAP_ENTRIES equ (1<<9)
PAGE_SIZE equ (1<<12)
boot_pml4 times PAGE_MAP_ENTRIES DQ 0
boot_pdpt times PAGE_MAP_ENTRIES DQ 0
boot_pgd times PAGE_MAP_ENTRIES DQ 0
boot_pgt times (KERNEL_SPACE/PAGE_SIZE) DQ 0
SECTION .text
ALIGN 8
%if MAX_CORES > 1
global smp_entry
smp_entry:
; enable caching, disable paging and fpu emulation
and eax, 0x1ffffffb
; ...and turn on FPU exceptions
or eax, 0x22
mov cr0, eax
; clears the current pgd entry
xor eax, eax
mov cr3, eax
; at this stage, we disable the SSE support
mov eax, cr4
and eax, 0xfffbf9ff
mov cr4, eax
; initialize page table
mov edi, boot_pgd
; Initialize cpu features
call cpu_init
; Initialize cr3 register
mov edi, boot_pml4
mov cr3, edi
; we need to enable PAE modus
; Enable PAE
mov eax, cr4
or eax, 1 << 5
mov cr4, eax
; switch to the compatibility mode (which is part of long mode)
; Enable longmode (compatibility mode)
mov ecx, 0xC0000080
rdmsr
or eax, 1 << 8
wrmsr
; enable paging
; Enable paging
mov eax, cr0
or eax, 1 << 31 | 1 << 0 ; Set the PG-bit, which is the 31nd bit, and the PM-bit, which is the 0th bit.
mov cr0, eax
or eax, 1 << 31 | 1 << 0 ; Set the PG-bit, which is the 31nd bit, and the PE-bit, which is the 0th bit.
mov cr0, eax ; According to the multiboot spec the PE-bit has to be set by bootloader already!
mov edi, [esp+4] ; set argumet for smp_start
; Jump to 64-bit longmode
mov edi, [esp+4] ; Set argumet for smp_start
lgdt [GDT64.Pointer] ; Load the 64-bit global descriptor table.
jmp GDT64.Code:smp_start64 ; Set the code segment and enter 64-bit long mode.
jmp $ ; endless loop
%endif
search_apic:
; Search MP Floating Pointer Structure
search_mps:
push ebp
mov ebp, esp
push ecx
xor eax, eax
mov ecx, [ebp+8]
L1:
.l1:
cmp [ecx], DWORD 0x5f504d5f ; MP_FLT_SIGNATURE
jne L2
jne .l2
mov al, BYTE [ecx+9]
cmp eax, 4
ja L2
ja .l2
mov al, BYTE [ecx+11]
cmp eax, 0
jne L2
jne .l2
mov eax, ecx
jmp L3
jmp .l3
L2:
.l2:
add ecx, 4
cmp ecx, [ebp+12]
jb L1
jb .l1
xor eax, eax
L3:
.l3:
pop ecx
pop ebp
ret
ALIGN 4
stublet:
mov esp, startup_stack-4
push ebx ; save pointer to the multiboot structure
mov eax, cr0
; enable caching, disable paging and fpu emulation
and eax, 0x1ffffffb
; ...and turn on FPU exceptions
or eax, 0x22
mov cr0, eax
; clears the current pgd entry
xor eax, eax
mov cr3, eax
; at this stage, we disable the SSE support
mov eax, cr4
and eax, 0xfffbf9ff
mov cr4, eax
; do we have the instruction cpuid?
check_longmode:
; Check for cpuid instruction
pushfd
pop eax
mov ecx, eax
@ -198,59 +176,22 @@ stublet:
push ecx
popfd
xor eax, ecx
jz Linvalid
; cpuid > 0x80000000?
jz .unsupported
; Check for extended cpu features (cpuid > 0x80000000)
mov eax, 0x80000000
cpuid
cmp eax, 0x80000001
jb Linvalid ; It is less, there is no long mode.
; do we have a long mode?
jb .unsupported ; It is less, there is no long mode.
; Check if longmode is supported
mov eax, 0x80000001
cpuid
test edx, 1 << 29 ; Test if the LM-bit, which is bit 29, is set in the D-register.
jz Linvalid ; They aren't, there is no long mode.
jz .unsupported ; They aren't, there is no long mode.
ret
.unsupported:
jmp $
; initialize page table
mov edi, boot_pgd
mov cr3, edi
; So lets make PML4T[0] point to the PDPT and so on:
mov DWORD [edi], boot_pdpt ; Set the double word at the destination index to pdpt.
or DWORD [edi], 0x00000003 ; Set present and writeable bit
mov edi, boot_pdpt
mov DWORD [edi], boot_pd ; Set the double word at the destination index to pd.
or DWORD [edi], 0x00000003 ; Set present and writeable bit
mov edi, boot_pd
mov ebx, boot_pt
mov ecx, NOPTS
L0:
mov DWORD [edi], ebx ; Set the double word at the destination index to pt.
or DWORD [edi], 0x00000003 ; Set present and writeable bit
add edi, 8
add ebx, 0x1000
loop L0
%ifdef CONFIG_VGA
; map the VGA address into the virtual address space
mov edi, 0xB8000
shr edi, 9 ; (edi >> 12) * 8
add edi, boot_pt
mov ebx, 0xB8000
or ebx, 0x00000013
mov DWORD [edi], ebx
%endif
; map multiboot structure into the virtual address space
mov edi, [esp]
and edi, 0xFFFFF000
shr edi, 9 ; (edi >> 12) * 8
add edi, boot_pt
mov ebx, [esp]
and ebx, 0xFFFFF000
or ebx, 0x00000003
mov DWORD [edi], ebx
; check if lapic is available
check_lapic:
push eax
push ebx
push ecx
@ -259,123 +200,186 @@ L0:
cpuid
and edx, 0x200
cmp edx, 0
je no_lapic
; map lapic at 0xFEE00000 below the kernel
je .unsupported
; Map lapic at 0xFEE00000 below the kernel
mov edi, kernel_start - 0x1000
shr edi, 9 ; (edi >> 12) * 8
add edi, boot_pt
mov ebx, 0xFEE00000
add edi, boot_pgt
mov ebx, 0xFEE00000 ; LAPIC base address
or ebx, 0x00000013
mov DWORD [edi], ebx
no_lapic:
.unsupported:
pop edx
pop ecx
pop ebx
pop eax
ret
; search APIC
cpu_init:
mov eax, cr0
; Enable caching, disable paging and fpu emulation
and eax, 0x1ffffffb
; ...and turn on FPU exceptions
or eax, 0x22
mov cr0, eax
; Clear the current pgd entry
xor eax, eax
mov cr3, eax
; at this stage, we disable the SSE support
mov eax, cr4
and eax, 0xfffbf9ff
mov cr4, eax
ret
; Identity map a single page at address eax
identity_page:
push edi
push ebx
mov edi, eax
and edi, 0xFFFFF000 ; page align in lower half
shr edi, 9 ; (edi >> 12) * 8 (index for boot_pgt)
add edi, boot_pgt
mov ebx, eax
and ebx, 0xFFFFF000 ; page align lower half
or ebx, 0x113 ; set present, global, writable and cache disable bits
mov DWORD [edi], ebx
mov DWORD [edi+4], 0x80000000 ; set execution disable bit in higher half
pop ebx
pop edi
ret
ALIGN 4
stublet:
mov esp, startup_stack-4
; Save pointer to the Multiboot structure
push ebx
; Initialize cpu features
call cpu_init
; Check if longmode is supported
call check_longmode
; Check if lapic is available
call check_lapic
; Find MP Floating Pointer structure
push DWORD 0x100000
push DWORD 0xF0000
call search_apic
call search_mps
add esp, 8
cmp eax, 0
jne La
jne map_mps
push DWORD 0xA0000
push DWORD 0x9F000
call search_apic
call search_mps
add esp, 8
cmp eax, 0
je Lb
je map_kernel
La:
; map MP Floating Pointer Structure
map_mps:
; Map MP Floating Pointer structure
mov DWORD [apic_mp], eax
mov edi, eax
and edi, 0xFFFFF000
shr edi, 9 ; (edi >> 12) * 8
add edi, boot_pt
mov ebx, eax
and ebx, 0xFFFFF000
or ebx, 0x00000013
mov DWORD [edi], ebx
call identity_page
; map mp_config
mov edi, [eax+4]
and edi, 0xFFFFF000
shr edi, 9 ; (edi >> 12) * 8
add edi, boot_pt
mov ebx, [eax+4]
and ebx, 0xFFFFF000
or ebx, 0x00000013
mov DWORD [edi], ebx
; Map MP Configuration table
mov eax, [eax+4] ; Offset for physical address of MP table
call identity_page
Lb:
%ifdef CONFIG_VGA
; Map VGA textmode plane
mov eax, 0xB8000
call identity_page
%endif
; Map Multiboot structure
mov eax, [esp] ; Pointer is still on the stack
call identity_page
map_kernel:
mov edi, kernel_start
shr edi, 9 ; (kernel_start >> 12) * 8
add edi, boot_pt
shr edi, 9 ; (edi >> 12) * 8 (index for boot_pgt)
add edi, boot_pgt
mov ebx, kernel_start
or ebx, 0x00000003
or ebx, 0x103 ; set present, global and writable flags
mov ecx, kernel_end ; determine kernel size in number of pages
sub ecx, kernel_start
shr ecx, 12
inc ecx
Lc:
mov DWORD [edi], ebx ; Set the double word at the destination index to the B-register.
.l1:
mov DWORD [edi], ebx
add edi, 8
add ebx, 0x1000
loop Lc
loop .l1
; we need to enable PAE modus
init_paging:
mov edi, boot_pml4
mov cr3, edi
mov DWORD [edi], boot_pdpt
or DWORD [edi], 0x03 ; Set present and writable flags
mov edi, boot_pdpt
mov DWORD [edi], boot_pgd
or DWORD [edi], 0x03 ; Set present and writable flags
mov edi, boot_pgd
mov ebx, boot_pgt
mov ecx, PAGE_MAP_ENTRIES ; Map all boot_pgt to the kernel space
.l1:
mov DWORD [edi], ebx
or DWORD [edi], 0x03 ; Set present and writable flags
add edi, 8
add ebx, 0x1000
loop .l1
; Enable PAE
mov eax, cr4
or eax, 1 << 5
mov cr4, eax
; switch to the compatibility mode (which is part of long mode)
; Enable longmode (compatibility mode)
mov ecx, 0xC0000080
rdmsr
or eax, 1 << 8
or eax, 1 << 8 ; IA32_EFER.LME = 1
wrmsr
; enable paging
; Enable paging
mov eax, cr0
or eax, 1 << 31 | 1 << 0 ; Set the PG-bit, which is the 31nd bit, and the PM-bit, which is the 0th bit.
or eax, 1 << 31 | 1 << 0 ; Set the PG-bit, which is the 31nd bit, and the PE-bit, which is the 0th bit.
mov cr0, eax
pop ebx ; restore pointer to multiboot structure
lgdt [GDT64.Pointer] ; Load the 64-bit global descriptor table.
jmp GDT64.Code:start64 ; Set the code segment and enter 64-bit long mode.
Linvalid:
jmp $
; Jump to 64-bit longmode
pop ebx ; Restore pointer to multiboot structure
lgdt [GDT64.Pointer] ; Load the 64-bit global descriptor table.
jmp GDT64.Code:start64 ; Set the code segment and enter 64-bit long mode.
[BITS 64]
start64:
; initialize segment registers
; Initialize segment registers
mov ax, GDT64.Data
mov ds, ax
mov es, ax
mov fs, ax
mov gs, ax
mov ss, ax
; set default stack pointer
; Set default stack pointer
mov rsp, boot_stack
add rsp, KERNEL_STACK_SIZE-16
; interpret multiboot information
; Interpret multiboot information
extern multiboot_init
mov rdi, rbx
call multiboot_init
; jump to the boot processors's C code
; Jump to the boot processors's C code
extern main
call main
jmp $
%if MAX_CORES > 1
smp_start64:
; initialize segment registers
; Initialize segment registers
mov ax, GDT64.Data
mov ds, ax
mov es, ax
@ -383,29 +387,12 @@ smp_start64:
mov gs, ax
mov ss, ax
; jump to the boot processors's C code
; Jump to the boot processors's C code
extern smp_start
call smp_start
jmp $
%endif
global cpu_init
cpu_init:
; mov eax, cr0
; enable caching, disable paging and fpu emulation
; and eax, 0x1ffffffb
; ...and turn on FPU exceptions
; or eax, 0x22
; mov cr0, eax
; clears the current pgd entry
; xor eax, eax
; mov cr3, eax
; at this stage, we disable the SSE support
; mov eax, cr4
; and eax, 0xfffbf9ff
; mov cr4, eax
; ret
; This will set up our new segment registers and is declared in
; C as 'extern void gdt_flush();'
global gdt_flush
@ -742,41 +729,41 @@ extern syscall_handler
; used to realize system calls
isrsyscall:
push r15
push r14
push r13
push r12
cli ; disable interrupts during prologue
; save caller saved registers
push r11
push r10
push r9
push r8
push rdi
push rsi
push rbp
push rsp
push rbx
push rdx
push rcx
push rax
mov rdi, rsp
; set kernel data segmenets
mov ax, 0x10
mov ds, ax
; x86-64 ABI calling convention
mov r8, rbx
mov r9, rax
mov rax, 0 ; we've not used vector registers for this va_arg call
sti ; enable interrupts during syscall
call syscall_handler
cli ; disable interrupts during prologue
pop rax
; restore caller saved registers
pop rcx
pop rdx
pop rbx
add rsp, 8
pop rbp
pop rsi
pop rdi
pop r8
pop r9
pop r10
pop r11
pop r12
pop r13
pop r14
iretq
global irq0

View file

@ -50,7 +50,7 @@ size_t* get_current_stack(void)
#endif
// use new page table
write_cr3(virt_to_phys((size_t)curr_task->pgd));
write_cr3(virt_to_phys((size_t)curr_task->page_map));
return curr_task->last_stack_pointer;
}

View file

@ -208,13 +208,18 @@ static const char *exception_messages[] = {
static void fault_handler(struct state *s)
{
if (s->int_no < 32) {
kputs(exception_messages[s->int_no]);
task_t* task = per_core(current_task);
#ifdef CONFIG_X86_32
kprintf(" Exception (%d) at 0x%x:0x%x on core %u, error code 0x%x, eflags 0x%x\n",
s->int_no, s->cs, s->eip, CORE_ID, s->error, s->eflags);
kprintf("%s Exception (%d) at cs:eip = %#x:%#lx, core = %u, task = %u, error = %#x\n",
"Register state: eflags = %#lx, eax = %#lx, ebx = %#lx, ecx = %#lx, edx = %#lx, edi = %#lx, esi = %#lx, ebp = %#llx, esp = %#lx\n",
exception_messages[s->int_no], s->int_no, s->cs, s->eip, CORE_ID, task->id, s->error,
s->eflags, s->eax, s->ebx, s->ecx, s->edx, s->edi, s->esi, s->ebp, s->esp);
#elif defined(CONFIG_X86_64)
kprintf(" Exception (%d) at 0x%llx:0x%llx on core %u, error code 0x%llx, rflags 0x%llx\n",
s->int_no, s->cs, s->rip, CORE_ID, s->error, s->rflags);
kprintf("%s Exception (%d) at cs:rip = %#x:%#lx, core = %u, task = %u, error = %#lx\n"
"Register state: rflags = %#lx, rax = %#lx, rbx = %#lx, rcx = %#lx, rdx = %#lx, rdi = %#lx, rsi = %#lx, rbp = %#llx, rsp = %#lx\n",
exception_messages[s->int_no], s->int_no, s->cs, s->rip, CORE_ID, task->id, s->error,
s->rflags, s->rax, s->rbx, s->rcx, s->rdx, s->rdi, s->rsi, s->rbp, s->rsp);
#endif
/* Now, we signalize that we have handled the interrupt */

View file

@ -37,7 +37,7 @@ void kb_init(size_t size, tid_t tid) {
}
void kb_finish(void) {
kfree(kb_buffer.buffer, (kb_buffer.maxsize * sizeof(char)));
kfree(kb_buffer.buffer);
kb_buffer.buffer = NULL;
kb_buffer.size = 0;
kb_buffer.maxsize = 0;

View file

@ -42,38 +42,4 @@ L3:
pop rax
ret
%if 0
; The following function is derived from JamesM's kernel development tutorials
; (http://www.jamesmolloy.co.uk/tutorial_html/)
global copy_page_physical
copy_page_physical:
push esi ; According to __cdecl, we must preserve the contents of ESI
push edi ; and EDI.
pushf ; push EFLAGS, so we can pop it and reenable interrupts
; later, if they were enabled anyway.
cli ; Disable interrupts, so we aren't interrupted.
; Load these in BEFORE we disable paging!
mov edi, [esp+12+4] ; Destination address
mov esi, [esp+12+8] ; Source address
mov edx, cr0 ; Get the control register...
and edx, 0x7fffffff ; and...
mov cr0, edx ; Disable paging.
cld
mov ecx, 0x400 ; 1024*4bytes = 4096 bytes = page size
rep movsd ; copy page
mov edx, cr0 ; Get the control register again
or edx, 0x80000000 ; and...
mov cr0, edx ; Enable paging.
popf ; Pop EFLAGS back.
pop edi ; Get the original value of EDI
pop esi ; and ESI back.
ret
%endif
SECTION .note.GNU-stack noalloc noexec nowrite progbits

View file

@ -46,7 +46,7 @@
* 0x00100000 - 0x0DEADFFF: Kernel (size depends on the configuration) (221MB)
* 0x0DEAE000 - 0x3FFFEFFF: Kernel heap (801MB)
* 0x3FFFF000 - 0x3FFFFFFF: Page Tables are mapped in this region (4KB)
* (The last 256 entries belongs to kernel space)
* (The first 256 entries belongs to kernel space)
*/
/*
@ -57,13 +57,14 @@ extern const void kernel_start;
extern const void kernel_end;
// boot task's page directory and page directory lock
static page_dir_t boot_pgd = {{[0 ... PGT_ENTRIES-1] = 0}};
static page_table_t pgt_container = {{[0 ... PGT_ENTRIES-1] = 0}};
static page_table_t boot_pgt[KERNEL_SPACE/(1024*PAGE_SIZE)];
static page_map_t boot_pgd = {{[0 ... MAP_ENTRIES-1] = 0}};
static page_map_t boot_pgt[KERNEL_SPACE/(MAP_ENTRIES*PAGE_SIZE)];
static page_map_t pgt_container = {{[0 ... MAP_ENTRIES-1] = 0}};
static spinlock_t kslock = SPINLOCK_INIT;
static int paging_enabled = 0;
page_dir_t* get_boot_pgd(void)
page_map_t* get_boot_page_map(void)
{
return &boot_pgd;
}
@ -71,26 +72,26 @@ page_dir_t* get_boot_pgd(void)
/*
* TODO: We create a full copy of the current task. Copy-On-Access will be the better solution.
*
* No PGD locking is needed because onls create_pgd use this function and holds already the
* No PGD locking is needed because only create_page_map use this function and holds already the
* PGD lock.
*/
inline static size_t copy_page_table(task_t* task, uint32_t pgd_index, page_table_t* pgt, int* counter)
inline static size_t copy_page_table(task_t* task, uint32_t pgd_index, page_map_t* pgt, int* counter)
{
uint32_t i;
page_table_t* new_pgt;
page_map_t* new_pgt;
size_t phyaddr;
if (BUILTIN_EXPECT(!pgt, 0))
return 0;
new_pgt = kmalloc(sizeof(page_table_t));
new_pgt = kmalloc(sizeof(page_map_t));
if (!new_pgt)
return 0;
memset(new_pgt, 0x00, sizeof(page_table_t));
memset(new_pgt, 0x00, sizeof(page_map_t));
if (counter)
(*counter)++;
for(i=0; i<PGT_ENTRIES; i++) {
for(i=0; i<MAP_ENTRIES; i++) {
if (pgt->entries[i] & PAGE_MASK) {
if (!(pgt->entries[i] & PG_USER)) {
// Kernel page => copy only page entries
@ -117,11 +118,11 @@ inline static size_t copy_page_table(task_t* task, uint32_t pgd_index, page_tabl
return phyaddr;
}
int create_pgd(task_t* task, int copy)
int create_page_map(task_t* task, int copy)
{
page_dir_t* pgd;
page_table_t* pgt;
page_table_t* pgt_container;
page_map_t* pgd;
page_map_t* pgt;
page_map_t* pgt_container;
uint32_t i;
uint32_t index1, index2;
size_t viraddr, phyaddr;
@ -133,25 +134,26 @@ int create_pgd(task_t* task, int copy)
// we already know the virtual address of the "page table container"
// (see file header)
pgt_container = (page_table_t*) ((KERNEL_SPACE - PAGE_SIZE) & PAGE_MASK);
pgt_container = (page_map_t*) ((KERNEL_SPACE - PAGE_SIZE) & PAGE_MASK);
// create new page directory for the new task
pgd = kmalloc(sizeof(page_dir_t));
pgd = kmalloc(sizeof(page_map_t));
if (!pgd)
return -ENOMEM;
memset(pgd, 0x00, sizeof(page_dir_t));
memset(pgd, 0x00, sizeof(page_map_t));
// create a new "page table container" for the new task
pgt = kmalloc(sizeof(page_table_t));
pgt = kmalloc(sizeof(page_map_t));
if (!pgt) {
kfree(pgd, sizeof(page_dir_t));
kfree(pgd, sizeof(page_map_t));
return -ENOMEM;
}
memset(pgt, 0x00, sizeof(page_table_t));
memset(pgt, 0x00, sizeof(page_map_t));
// copy kernel tables
spinlock_lock(&kslock);
for(i=0; i<PGT_ENTRIES; i++) {
for(i=0; i<MAP_ENTRIES; i++) {
pgd->entries[i] = boot_pgd.entries[i];
// only kernel entries will be copied
if (pgd->entries[i] && !(pgd->entries[i] & PG_USER))
@ -169,36 +171,33 @@ int create_pgd(task_t* task, int copy)
pgd->entries[index1] = ((size_t) virt_to_phys((size_t) pgt) & PAGE_MASK)|KERN_TABLE;
pgt->entries[index2] = ((size_t) virt_to_phys((size_t) pgt) & PAGE_MASK)|KERN_PAGE;
task->pgd = pgd;
task->page_map = pgd;
if (copy) {
spinlock_irqsave_lock(&curr_task->pgd_lock);
spinlock_irqsave_lock(&curr_task->page_lock);
for (i=KERNEL_SPACE/(1024*PAGE_SIZE); i<1024; i++) {
if (!(curr_task->pgd->entries[i]))
if (!(curr_task->page_map->entries[i]))
continue;
if (!(curr_task->pgd->entries[i] & PG_USER))
if (!(curr_task->page_map->entries[i] & PG_USER))
continue;
phyaddr = copy_page_table(task, i, (page_table_t*) ((KERNEL_SPACE - 1024*PAGE_SIZE + i*PAGE_SIZE) & PAGE_MASK), &counter);
phyaddr = copy_page_table(task, i, (page_map_t*) ((KERNEL_SPACE - 1024*PAGE_SIZE + i*PAGE_SIZE) & PAGE_MASK), &counter);
if (phyaddr) {
pgd->entries[i] = (phyaddr & PAGE_MASK) | (curr_task->pgd->entries[i] & 0xFFF);
pgd->entries[i] = (phyaddr & PAGE_MASK) | (curr_task->page_map->entries[i] & 0xFFF);
pgt->entries[i] = (phyaddr & PAGE_MASK) | KERN_PAGE;
}
}
spinlock_irqsave_unlock(&curr_task->pgd_lock);
spinlock_irqsave_unlock(&curr_task->page_lock);
}
return counter;
}
/*
* drops all page frames and the PGD of a user task
*/
int drop_pgd(void)
int drop_page_map(void)
{
page_dir_t* pgd = per_core(current_task)->pgd;
page_map_t* pgd = per_core(current_task)->page_map;
size_t phy_pgd = virt_to_phys((size_t) pgd);
task_t* task = per_core(current_task);
uint32_t i;
@ -206,9 +205,9 @@ int drop_pgd(void)
if (BUILTIN_EXPECT(pgd == &boot_pgd, 0))
return -EINVAL;
spinlock_irqsave_lock(&task->pgd_lock);
spinlock_irqsave_lock(&task->page_lock);
for(i=0; i<PGT_ENTRIES; i++) {
for(i=0; i<MAP_ENTRIES; i++) {
if (pgd->entries[i] & PG_USER) {
put_page(pgd->entries[i] & PAGE_MASK);
pgd->entries[i] = 0;
@ -218,9 +217,9 @@ int drop_pgd(void)
// freeing the page directory
put_page(phy_pgd);
task->pgd = NULL;
task->page_map = NULL;
spinlock_irqsave_unlock(&task->pgd_lock);
spinlock_irqsave_unlock(&task->page_lock);
return 0;
}
@ -229,24 +228,24 @@ size_t virt_to_phys(size_t viraddr)
{
task_t* task = per_core(current_task);
uint32_t index1, index2;
page_table_t* pgt;
page_map_t* pgt;
size_t ret = 0;
if (!paging_enabled)
return viraddr;
if (BUILTIN_EXPECT(!task || !task->pgd, 0))
if (BUILTIN_EXPECT(!task || !task->page_map, 0))
return 0;
spinlock_irqsave_lock(&task->pgd_lock);
spinlock_irqsave_lock(&task->page_lock);
index1 = viraddr >> 22;
index2 = (viraddr >> 12) & 0x3FF;
if (!(task->pgd->entries[index1] & PAGE_MASK))
if (!(task->page_map->entries[index1] & PAGE_MASK))
goto out;
pgt = (page_table_t*) ((KERNEL_SPACE - 1024*PAGE_SIZE + index1*PAGE_SIZE) & PAGE_MASK);
pgt = (page_map_t*) ((KERNEL_SPACE - 1024*PAGE_SIZE + index1*PAGE_SIZE) & PAGE_MASK);
if (!pgt || !(pgt->entries[index2]))
goto out;
@ -255,7 +254,7 @@ size_t virt_to_phys(size_t viraddr)
out:
//kprintf("vir %p to phy %p\n", viraddr, ret);
spinlock_irqsave_unlock(&task->pgd_lock);
spinlock_irqsave_unlock(&task->page_lock);
return ret;
}
@ -263,11 +262,11 @@ out:
size_t map_region(size_t viraddr, size_t phyaddr, uint32_t npages, uint32_t flags)
{
task_t* task = per_core(current_task);
page_table_t* pgt;
page_map_t* pgt;
size_t index, i;
size_t ret;
if (BUILTIN_EXPECT(!task || !task->pgd, 0))
if (BUILTIN_EXPECT(!task || !task->page_map, 0))
return 0;
if (BUILTIN_EXPECT(!paging_enabled && (viraddr != phyaddr), 0))
@ -276,7 +275,7 @@ size_t map_region(size_t viraddr, size_t phyaddr, uint32_t npages, uint32_t flag
if (flags & MAP_KERNEL_SPACE)
spinlock_lock(&kslock);
else
spinlock_irqsave_lock(&task->pgd_lock);
spinlock_irqsave_lock(&task->page_lock);
if (!viraddr) {
viraddr = vm_alloc(npages, flags);
@ -292,10 +291,10 @@ size_t map_region(size_t viraddr, size_t phyaddr, uint32_t npages, uint32_t flag
for(i=0; i<npages; i++, viraddr+=PAGE_SIZE, phyaddr+=PAGE_SIZE) {
index = viraddr >> 22;
if (!(task->pgd->entries[index])) {
page_table_t* pgt_container;
if (!(task->page_map->entries[index])) {
page_map_t* pgt_container;
pgt = (page_table_t*) get_pages(1);
pgt = (page_map_t*) get_page();
if (BUILTIN_EXPECT(!pgt, 0)) {
kputs("map_address: out of memory\n");
ret = 0;
@ -304,17 +303,17 @@ size_t map_region(size_t viraddr, size_t phyaddr, uint32_t npages, uint32_t flag
// set the new page table into the directory
if (flags & MAP_USER_SPACE)
task->pgd->entries[index] = (uint32_t)pgt|USER_TABLE;
task->page_map->entries[index] = (uint32_t)pgt|USER_TABLE;
else
task->pgd->entries[index] = (uint32_t)pgt|KERN_TABLE;
task->page_map->entries[index] = (uint32_t)pgt|KERN_TABLE;
// if paging is already enabled, we need to use the virtual address
if (paging_enabled)
// we already know the virtual address of the "page table container"
// (see file header)
pgt_container = (page_table_t*) ((KERNEL_SPACE - PAGE_SIZE) & PAGE_MASK);
pgt_container = (page_map_t*) ((KERNEL_SPACE - PAGE_SIZE) & PAGE_MASK);
else
pgt_container = (page_table_t*) (task->pgd->entries[(KERNEL_SPACE - PAGE_SIZE) >> 22] & PAGE_MASK);
pgt_container = (page_map_t*) (task->page_map->entries[(KERNEL_SPACE - PAGE_SIZE) >> 22] & PAGE_MASK);
if (BUILTIN_EXPECT(!pgt_container, 0)) {
kputs("map_address: internal error\n");
@ -330,11 +329,11 @@ size_t map_region(size_t viraddr, size_t phyaddr, uint32_t npages, uint32_t flag
memset((void*) ((KERNEL_SPACE - 1024*PAGE_SIZE + index*PAGE_SIZE) & PAGE_MASK), 0x00, PAGE_SIZE);
else
memset(pgt, 0x00, PAGE_SIZE);
} else pgt = (page_table_t*) (task->pgd->entries[index] & PAGE_MASK);
} else pgt = (page_map_t*) (task->page_map->entries[index] & PAGE_MASK);
/* convert physical address to virtual */
if (paging_enabled)
pgt = (page_table_t*) ((KERNEL_SPACE - 1024*PAGE_SIZE + index*PAGE_SIZE) & PAGE_MASK);
pgt = (page_map_t*) ((KERNEL_SPACE - 1024*PAGE_SIZE + index*PAGE_SIZE) & PAGE_MASK);
index = (viraddr >> 12) & 0x3FF;
if (pgt->entries[index] && !(flags & MAP_REMAP)) {
@ -382,7 +381,7 @@ out:
if (flags & MAP_KERNEL_SPACE)
spinlock_unlock(&kslock);
else
spinlock_irqsave_unlock(&task->pgd_lock);
spinlock_irqsave_unlock(&task->page_lock);
return ret;
}
@ -392,18 +391,18 @@ int change_page_permissions(size_t start, size_t end, uint32_t flags)
uint32_t index1, index2, newflags;
size_t viraddr = start & 0xFFFFF000;
size_t phyaddr;
page_table_t* pgt;
page_dir_t* pgd;
page_map_t* pgt;
page_map_t* pgd;
task_t* task = per_core(current_task);
if (BUILTIN_EXPECT(!paging_enabled, 0))
return -EINVAL;
pgd = per_core(current_task)->pgd;
pgd = per_core(current_task)->page_map;
if (BUILTIN_EXPECT(!pgd, 0))
return -EINVAL;
spinlock_irqsave_lock(&task->pgd_lock);
spinlock_irqsave_lock(&task->page_lock);
while (viraddr < end)
{
@ -411,7 +410,7 @@ int change_page_permissions(size_t start, size_t end, uint32_t flags)
index2 = (viraddr >> 12) & 0x3FF;
while ((viraddr < end) && (index2 < 1024)) {
pgt = (page_table_t*) (page_table_t*) ((KERNEL_SPACE - 1024*PAGE_SIZE + index1*PAGE_SIZE) & PAGE_MASK);
pgt = (page_map_t*) (page_map_t*) ((KERNEL_SPACE - 1024*PAGE_SIZE + index1*PAGE_SIZE) & PAGE_MASK);
if (pgt && pgt->entries[index2]) {
phyaddr = pgt->entries[index2] & PAGE_MASK;
newflags = pgt->entries[index2] & 0xFFF; // get old flags
@ -448,7 +447,7 @@ int change_page_permissions(size_t start, size_t end, uint32_t flags)
}
}
spinlock_irqsave_unlock(&task->pgd_lock);
spinlock_irqsave_unlock(&task->page_lock);
return 0;
}
@ -464,9 +463,9 @@ size_t vm_alloc(uint32_t npages, uint32_t flags)
uint32_t index1, index2, j;
size_t viraddr, i, ret = 0;
size_t start, end;
page_table_t* pgt;
page_map_t* pgt;
if (BUILTIN_EXPECT(!task || !task->pgd || !paging_enabled, 0))
if (BUILTIN_EXPECT(!task || !task->page_map || !paging_enabled, 0))
return 0;
if (flags & MAP_KERNEL_SPACE) {
@ -483,7 +482,7 @@ size_t vm_alloc(uint32_t npages, uint32_t flags)
if (flags & MAP_KERNEL_SPACE)
spinlock_lock(&kslock);
else
spinlock_irqsave_lock(&task->pgd_lock);
spinlock_irqsave_lock(&task->page_lock);
viraddr = i = start;
j = 0;
@ -491,7 +490,7 @@ size_t vm_alloc(uint32_t npages, uint32_t flags)
index1 = i >> 22;
index2 = (i >> 12) & 0x3FF;
pgt = (page_table_t*) ((KERNEL_SPACE - 1024*PAGE_SIZE + index1*PAGE_SIZE) & PAGE_MASK);
pgt = (page_map_t*) ((KERNEL_SPACE - 1024*PAGE_SIZE + index1*PAGE_SIZE) & PAGE_MASK);
if (!pgt || !(pgt->entries[index2])) {
i+=PAGE_SIZE;
j++;
@ -509,7 +508,7 @@ size_t vm_alloc(uint32_t npages, uint32_t flags)
if (flags & MAP_KERNEL_SPACE)
spinlock_unlock(&kslock);
else
spinlock_irqsave_unlock(&task->pgd_lock);
spinlock_irqsave_unlock(&task->page_lock);
return ret;
}
@ -519,22 +518,22 @@ int unmap_region(size_t viraddr, uint32_t npages)
task_t* task = per_core(current_task);
uint32_t i;
uint32_t index1, index2;
page_table_t* pgt;
page_map_t* pgt;
if (BUILTIN_EXPECT(!task || !task->pgd || !paging_enabled, 0))
if (BUILTIN_EXPECT(!task || !task->page_map || !paging_enabled, 0))
return -EINVAL;
if (viraddr <= KERNEL_SPACE)
spinlock_lock(&kslock);
else
spinlock_irqsave_lock(&task->pgd_lock);
spinlock_irqsave_lock(&task->page_lock);
for(i=0; i<npages; i++, viraddr+=PAGE_SIZE)
{
index1 = viraddr >> 22;
index2 = (viraddr >> 12) & 0x3FF;
pgt = (page_table_t*) ((KERNEL_SPACE - 1024*PAGE_SIZE + index1*PAGE_SIZE) & PAGE_MASK);
pgt = (page_map_t*) ((KERNEL_SPACE - 1024*PAGE_SIZE + index1*PAGE_SIZE) & PAGE_MASK);
if (!pgt)
continue;
pgt->entries[index2] &= ~PG_PRESENT;
@ -548,7 +547,7 @@ int unmap_region(size_t viraddr, uint32_t npages)
if (viraddr <= KERNEL_SPACE)
spinlock_unlock(&kslock);
else
spinlock_irqsave_unlock(&task->pgd_lock);
spinlock_irqsave_unlock(&task->page_lock);
return 0;
}
@ -558,22 +557,22 @@ int vm_free(size_t viraddr, uint32_t npages)
task_t* task = per_core(current_task);
uint32_t i;
uint32_t index1, index2;
page_table_t* pgt;
page_map_t* pgt;
if (BUILTIN_EXPECT(!task || !task->pgd || !paging_enabled, 0))
if (BUILTIN_EXPECT(!task || !task->page_map || !paging_enabled, 0))
return -EINVAL;
if (viraddr <= KERNEL_SPACE)
spinlock_lock(&kslock);
else
spinlock_irqsave_lock(&task->pgd_lock);
spinlock_irqsave_lock(&task->page_lock);
for(i=0; i<npages; i++, viraddr+=PAGE_SIZE)
{
index1 = viraddr >> 22;
index2 = (viraddr >> 12) & 0x3FF;
pgt = (page_table_t*) ((KERNEL_SPACE - 1024*PAGE_SIZE + index1*PAGE_SIZE) & PAGE_MASK);
pgt = (page_map_t*) ((KERNEL_SPACE - 1024*PAGE_SIZE + index1*PAGE_SIZE) & PAGE_MASK);
if (!pgt)
continue;
pgt->entries[index2] = 0;
@ -584,7 +583,7 @@ int vm_free(size_t viraddr, uint32_t npages)
if (viraddr <= KERNEL_SPACE)
spinlock_unlock(&kslock);
else
spinlock_irqsave_unlock(&task->pgd_lock);
spinlock_irqsave_unlock(&task->page_lock);
return 0;
}
@ -593,8 +592,8 @@ int print_paging_tree(size_t viraddr)
{
task_t* task = per_core(current_task);
uint32_t index1, index2;
page_dir_t* pgd = NULL;
page_table_t* pgt = NULL;
page_map_t* pgd = NULL;
page_map_t* pgt = NULL;
if (BUILTIN_EXPECT(!viraddr, 0))
return -EINVAL;
@ -602,20 +601,20 @@ int print_paging_tree(size_t viraddr)
index1 = viraddr >> 22;
index2 = (viraddr >> 12) & 0x3FF;
spinlock_irqsave_lock(&task->pgd_lock);
spinlock_irqsave_lock(&task->page_lock);
kprintf("Paging dump of address 0x%x\n", viraddr);
pgd = task->pgd;
pgd = task->page_map;
kprintf("\tPage directory entry %u: ", index1);
if (pgd) {
kprintf("0x%0x\n", pgd->entries[index1]);
pgt = (page_table_t*) (pgd->entries[index1] & PAGE_MASK);
pgt = (page_map_t*) (pgd->entries[index1] & PAGE_MASK);
} else
kputs("invalid page directory\n");
/* convert physical address to virtual */
// convert physical address to virtual
if (paging_enabled && pgt)
pgt = (page_table_t*) (KERNEL_SPACE - 1024*PAGE_SIZE + index1*PAGE_SIZE);
pgt = (page_map_t*) (KERNEL_SPACE - 1024*PAGE_SIZE + index1*PAGE_SIZE);
kprintf("\tPage table entry %u: ", index2);
if (pgt)
@ -623,7 +622,7 @@ int print_paging_tree(size_t viraddr)
else
kputs("invalid page table\n");
spinlock_irqsave_unlock(&task->pgd_lock);
spinlock_irqsave_unlock(&task->page_lock);
return 0;
}
@ -631,12 +630,12 @@ int print_paging_tree(size_t viraddr)
static void pagefault_handler(struct state *s)
{
task_t* task = per_core(current_task);
page_dir_t* pgd = task->pgd;
page_table_t* pgt = NULL;
size_t viraddr = read_cr2();
size_t phyaddr;
#ifdef CONFIG_ROCKCREEK
uint32_t index1, index2;
page_map_t* pgd = task->page_map;
page_map_t* pgt = NULL;
#endif
if ((viraddr >= task->start_heap) && (viraddr <= task->end_heap) && (viraddr > KERNEL_SPACE)) {
@ -650,7 +649,7 @@ static void pagefault_handler(struct state *s)
memset((void*) viraddr, 0x00, PAGE_SIZE);
return;
}
kprintf("Could not map 0x%x at 0x%x\n", phyaddr, viraddr);
put_page(phyaddr);
}
@ -661,7 +660,7 @@ static void pagefault_handler(struct state *s)
index2 = (viraddr >> 12) & 0x3FF;
if (!pgd || !(pgd->entries[index1] & PAGE_MASK))
goto default_handler;
pgt = (page_table_t*) ((KERNEL_SPACE - 1024*PAGE_SIZE + index1*PAGE_SIZE) & PAGE_MASK);
pgt = (page_map_t*) ((KERNEL_SPACE - 1024*PAGE_SIZE + index1*PAGE_SIZE) & PAGE_MASK);
if (!pgt || !(pgt->entries[index2]))
goto default_handler;
if (pgt->entries[index2] & PG_SVM_INIT) {
@ -687,14 +686,14 @@ default_handler:
int arch_paging_init(void)
{
uint32_t i, npages, index1, index2;
page_table_t* pgt;
page_map_t* pgt;
size_t viraddr;
// uninstall default handler and install our own
// replace default pagefault handler
irq_uninstall_handler(14);
irq_install_handler(14, pagefault_handler);
// Create a page table to reference to the other page tables
// create a page table to reference to the other page tables
pgt = &pgt_container;
// map this table at the end of the kernel space
@ -703,21 +702,21 @@ int arch_paging_init(void)
index2 = (viraddr >> 12) & 0x3FF;
// now, we create a self reference
per_core(current_task)->pgd->entries[index1] = (((size_t) pgt) & PAGE_MASK)|KERN_TABLE;
pgt->entries[index2] = ((size_t) pgt & 0xFFFFF000)|KERN_PAGE;
per_core(current_task)->page_map->entries[index1] = (((size_t) pgt) & PAGE_MASK)|KERN_TABLE;
pgt->entries[index2] = ((size_t) pgt & PAGE_MASK)|KERN_PAGE;
// create the other PGTs for the kernel space
for(i=0; i<KERNEL_SPACE/(1024*PAGE_SIZE)-1; i++) {
size_t phyaddr = boot_pgt+i;
memset((void*) phyaddr, 0x00, sizeof(page_table_t));
per_core(current_task)->pgd->entries[i] = (phyaddr & PAGE_MASK)|KERN_TABLE;
memset((void*) phyaddr, 0x00, sizeof(page_map_t));
per_core(current_task)->page_map->entries[i] = (phyaddr & PAGE_MASK)|KERN_TABLE;
pgt->entries[i] = (phyaddr & PAGE_MASK)|KERN_PAGE;
}
/*
* Set the page table and page directory entries for the kernel. We map the kernel's physical address
* to the same virtual address.
* Set the page table and page directory entries for the kernel.
* We map the kernel's physical address to the same virtual address.
*/
npages = ((size_t) &kernel_end - (size_t) &kernel_start) >> PAGE_SHIFT;
if ((size_t)&kernel_end & (PAGE_SIZE-1))
@ -725,7 +724,7 @@ int arch_paging_init(void)
map_region((size_t)&kernel_start, (size_t)&kernel_start, npages, MAP_KERNEL_SPACE);
#if MAX_CORES > 1
// Reserve page for smp boot code
// reserve page for smp boot code
if (!map_region(SMP_SETUP_ADDR, SMP_SETUP_ADDR, 1, MAP_KERNEL_SPACE|MAP_NO_CACHE)) {
kputs("could not reserve page for smp boot code\n");
return -ENOMEM;
@ -738,16 +737,12 @@ int arch_paging_init(void)
#endif
#ifdef CONFIG_MULTIBOOT
/*
* of course, mb_info has to map into the kernel space
*/
// map mb_info into the kernel space
if (mb_info)
map_region((size_t) mb_info & PAGE_MASK, (size_t) mb_info & PAGE_MASK, 1, MAP_KERNEL_SPACE);
#if 0
/*
* Map reserved memory regions into the kernel space
*/
// map reserved memory regions into the kernel space
if (mb_info && (mb_info->flags & MULTIBOOT_INFO_MEM_MAP)) {
multiboot_memory_map_t* mmap = (multiboot_memory_map_t*) mb_info->mmap_addr;
multiboot_memory_map_t* mmap_end = (void*) ((size_t) mb_info->mmap_addr + mb_info->mmap_length);
@ -805,7 +800,7 @@ int arch_paging_init(void)
kprintf("Map FPGA regsiters at 0x%x\n", viraddr);
#endif
/* enable paging */
// enable paging
write_cr3((uint32_t) &boot_pgd);
i = read_cr0();
i = i | (1 << 31);
@ -822,10 +817,7 @@ int arch_paging_init(void)
bootinfo->addr = viraddr;
#endif
/*
* we turned on paging
* => now, we are able to register our task
*/
// we turned on paging => now, we are able to register our task
register_task();
// APIC registers into the kernel address space

File diff suppressed because it is too large Load diff

View file

@ -70,7 +70,7 @@ static ssize_t socket_write(fildes_t* file, uint8_t* buffer, size_t size)
return -ENOMEM;
memcpy(tmp, buffer, size);
ret = lwip_write(file->offset, tmp, size);
kfree(tmp, size);
kfree(tmp);
#endif
if (ret < 0)
ret = -errno;
@ -147,7 +147,7 @@ int socket_init(vfs_node_t* node, const char* name)
} while(blist);
kfree(new_node, sizeof(vfs_node_t));
kfree(new_node);
return -ENOMEM;
}

View file

@ -83,15 +83,8 @@ static ssize_t stdio_read(fildes_t* file, uint8_t* buffer, size_t size)
static ssize_t stdio_write(fildes_t* file, uint8_t* buffer, size_t size)
{
int i;
for (i = 0; i<size; i++, buffer++) {
#ifdef CONFIG_VGA
vga_putchar(*buffer);
#elif defined(CONFIG_UART)
uart_putchar(*buffer);
#else
for (i = 0; i<size; i++, buffer++)
kputchar(*buffer);
#endif
}
file->offset += size;
return size;
@ -152,7 +145,7 @@ int null_init(vfs_node_t* node, const char* name)
} while(blist);
kfree(new_node, sizeof(vfs_node_t));
kfree(new_node);
return -ENOMEM;
}
@ -211,7 +204,7 @@ int stdin_init(vfs_node_t* node, const char* name)
} while(blist);
kfree(new_node, sizeof(vfs_node_t));
kfree(new_node);
return -ENOMEM;
}
@ -270,7 +263,7 @@ int stdout_init(vfs_node_t* node, const char* name)
} while(blist);
kfree(new_node, sizeof(vfs_node_t));
kfree(new_node);
return -ENOMEM;
}
@ -329,7 +322,7 @@ int stderr_init(vfs_node_t* node, const char* name)
} while(blist);
kfree(new_node, sizeof(vfs_node_t));
kfree(new_node);
return -ENOMEM;
}

View file

@ -210,7 +210,7 @@ static int initrd_open(fildes_t* file, const char* name)
if (file->node->type == FS_FILE) {
if ((file->flags & O_CREAT) && (file->flags & O_EXCL))
return -EEXIST;
/* in the case of O_TRUNC kfree all the nodes */
if (file->flags & O_TRUNC) {
uint32_t i;
@ -221,8 +221,7 @@ static int initrd_open(fildes_t* file, const char* name)
/* the first blist pointer have do remain valid. */
for(i=0; i<MAX_DATABLOCKS && !data; i++) {
if (blist->data[i]) {
kfree(blist->data[i],
sizeof(data_block_t));
kfree(blist->data[i]);
}
}
if (blist->next) {
@ -234,12 +233,12 @@ static int initrd_open(fildes_t* file, const char* name)
do {
for(i=0; i<MAX_DATABLOCKS && !data; i++) {
if (blist->data[i]) {
kfree(blist->data[i], sizeof(data_block_t));
kfree(blist->data[i]);
}
}
lastblist = blist;
blist = blist->next;
kfree(lastblist, sizeof(block_list_t));
kfree(lastblist);
} while(blist);
}
@ -253,7 +252,7 @@ static int initrd_open(fildes_t* file, const char* name)
/* opendir was called: */
if (name[0] == '\0')
return 0;
/* open file was called: */
if (!(file->flags & O_CREAT))
return -ENOENT;
@ -264,11 +263,11 @@ static int initrd_open(fildes_t* file, const char* name)
vfs_node_t* new_node = kmalloc(sizeof(vfs_node_t));
if (BUILTIN_EXPECT(!new_node, 0))
return -EINVAL;
blist = &file->node->block_list;
dir_block_t* dir_block;
dirent_t* dirent;
memset(new_node, 0x00, sizeof(vfs_node_t));
new_node->type = FS_FILE;
new_node->read = &initrd_read;
@ -286,7 +285,7 @@ static int initrd_open(fildes_t* file, const char* name)
if (!dirent->vfs_node) {
dirent->vfs_node = new_node;
strncpy(dirent->name, (char*) name, MAX_FNAME);
goto exit_create_file; // there might be a better Solution ***************
goto exit_create_file; // TODO: there might be a better Solution
}
}
}
@ -425,9 +424,9 @@ static vfs_node_t* initrd_mkdir(vfs_node_t* node, const char* name)
blist = blist->next;
} while(blist);
kfree(dir_block, sizeof(dir_block_t));
kfree(dir_block);
out:
kfree(new_node, sizeof(vfs_node_t));
kfree(new_node);
return NULL;
}

View file

@ -34,14 +34,14 @@ extern "C" {
#define PAGE_SHIFT 12
#define CACHE_LINE 64
#define MAILBOX_SIZE 32
#define TIMER_FREQ 100 /* in HZ */
#define CLOCK_TICK_RATE 1193182 /* 8254 chip's internal oscillator frequency */
#define TIMER_FREQ 100 // in HZ
#define CLOCK_TICK_RATE 1193182 // 8254 chip's internal oscillator frequency
#define INT_SYSCALL 0x80
#define KERNEL_SPACE (1*1024*1024*1024)
#define VIDEO_MEM_ADDR 0xB8000 // the video memora address
#define VIDEO_MEM_ADDR 0xB8000 // the video memory address
#define SMP_SETUP_ADDR 0x07000
#define BYTE_ORDER LITTLE_ENDIAN
#define UART_PORT 0x3F8 // 0x2F8 for SCC
#define BYTE_ORDER LITTLE_ENDIAN
/*
* address space / (page_size * sizeof(uint8_t))
@ -52,7 +52,7 @@ extern "C" {
#define CONFIG_PCI
#define CONFIG_LWIP
#define CONFIG_VGA
//#define CONFIG_UART
#define CONFIG_UART
#define CONFIG_KEYBOARD
#define CONFIG_MULTIBOOT
//#define CONFIG_ROCKCREEK
@ -72,7 +72,7 @@ extern "C" {
//#define SHMADD
#define SHMDBG
//#define SHMADD_CACHEABLE
#define SCC_BOOTINFO 0x80000
#define SCC_BOOTINFO 0x80000
#define BUILTIN_EXPECT(exp, b) __builtin_expect((exp), (b))
//#define BUILTIN_EXPECT(exp, b) (exp)

72
include/metalsvm/malloc.h Normal file
View file

@ -0,0 +1,72 @@
/*
* Copyright 2010 Steffen Vogel, Chair for Operating Systems,
* RWTH Aachen University
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* This file is part of MetalSVM.
*/
#ifndef __MALLOC_H__
#define __MALLOC_H__
#include <metalsvm/stddef.h>
#ifdef __cplusplus
extern "C" {
#endif
/// Binary exponent of maximal size for kmalloc()
#define BUDDY_MAX 32 // 4 GB
/// Binary exponent of minimal buddy size
#define BUDDY_MIN 3 // 8 Byte >= sizeof(buddy_t)
/// Binary exponent of the size which we allocate with buddy_fill()
#define BUDDY_ALLOC 15 // 32 KByte >= PAGE_SIZE
#define BUDDY_LISTS (BUDDY_MAX-BUDDY_MIN+1)
#define BUDDY_MAGIC 0xBABE
union buddy;
/** @brief Buddy
*
* Every free memory block is stored in a linked list according to its size.
* We can use this free memory to store store this buddy_t union which represents
* this block (the buddy_t union is alligned to the front).
* Therefore the address of the buddy_t union is equal with the address
* of the underlying free memory block.
*
* Every allocated memory block is prefixed with its binary size exponent and
* a known magic number. This prefix is hidden by the user because its located
* before the actual memory address returned by kmalloc()
*/
typedef union buddy {
/// Pointer to the next buddy in the linked list.
union buddy* next;
struct {
/// The binary exponent of the block size
uint8_t exponent;
/// Must be equal to BUDDY_MAGIC for a valid memory block
uint16_t magic;
} prefix;
} buddy_t;
/** @brief Dump free buddies */
void buddy_dump(void);
#ifdef __cplusplus
}
#endif
#endif

View file

@ -31,7 +31,6 @@
#include <metalsvm/stddef.h>
#include <asm/atomic.h>
//#include <asm/mmu.h>
#ifdef __cplusplus
extern "C" {
@ -50,33 +49,54 @@ extern atomic_int32_t total_available_pages;
*/
int mmu_init(void);
/** @brief get continuous pages
/** @brief Get continuous pages
*
* This function finds a continuous page region (first fit algorithm)
*
* @param no_pages Desired number of pages
* Use first fit algorithm to find a suitable, continous physical memory region
*
* @param npages Desired number of pages
* @return
* - physical address on success
* - 0 on failure
*/
size_t get_pages(uint32_t no_pages);
size_t get_pages(uint32_t npages);
/** @brief get a single page
/** @brief Get a single page
*
* Convenience function: uses get_pages(1);
*/
static inline size_t get_page(void) { return get_pages(1); }
/** @brief Put back a page after use
/** @brief Put back a sequence of continous pages
*
* @param phyaddr Physical address to put back
* @param phyaddr Physical address of the first page
* @param npages Number of pages
*
* @return
* - 0 on success
* - -EINVAL (-22) on failure
* @return number of pages which were marked as used before calling
*/
int put_page(size_t phyaddr);
int put_pages(size_t phyaddr, size_t npages);
/** @brief Put a single page
*
* Convenience function: uses put_pages(1);
*/
static inline int put_page(size_t phyaddr) { return put_pages(phyaddr, 1); }
/** @brief Copy a physical page frames
*
* @param psrc physical address of source page frames
* @param pdest physical address of source page frames
* @param npages number of pages
* @return
* - 0 on success
* - -1 on failure
*/
int copy_pages(size_t pdest, size_t psrc, size_t npages);
/** @brief Copy a single page
*
* Convenience function: uses copy_pages(pdest, psrc, 1);
*/
static inline int copy_page(size_t pdest, size_t psrc) { return copy_pages(pdest, psrc, 1); }
#ifdef __cplusplus
}

View file

@ -29,10 +29,7 @@
#include <metalsvm/stddef.h>
#include <asm/page.h>
/**
* Sets up the environment, page directories etc and
* enables paging.
*/
/** @brief Sets up the environment, page directories etc and enables paging. */
static inline int paging_init(void) { return arch_paging_init(); }
#endif

View file

@ -28,14 +28,10 @@
extern "C" {
#endif
#define NULL ((void*) 0)
#define NULL ((void*) 0)
typedef unsigned int tid_t;
#define PAGE_SIZE (1 << PAGE_SHIFT)
#define PAGE_MASK ~(PAGE_SIZE - 1)
#define PAGE_ALIGN(addr) (((addr) + PAGE_SIZE - 1) & PAGE_MASK)
#if MAX_CORES == 1
#define per_core(name) name
#define DECLARE_PER_CORE(type, name) extern type name;
@ -66,10 +62,10 @@ typedef unsigned int tid_t;
irq_nested_enable(flags);\
return ret; \
}
#define CORE_ID smp_id()
#define CORE_ID smp_id()
#endif
/* needed to find the task, which is currently running on this core */
// needed to find the task, which is currently running on this core
struct task;
DECLARE_PER_CORE(struct task*, current_task);

View file

@ -29,72 +29,66 @@
#ifndef __STDLIB_H__
#define __STDLIB_H__
#include <metalsvm/config.h>
#include <metalsvm/tasks_types.h>
#include <asm/stddef.h>
#include <metalsvm/stddef.h>
#ifdef __cplusplus
extern "C" {
#endif
#define MAP_KERNEL_SPACE (1 << 0)
#define MAP_USER_SPACE (1 << 1)
#define MAP_PAGE_TABLE (1 << 2)
#define MAP_NO_CACHE (1 << 3)
#define MAP_WT (1 << 5)
#define MAP_CODE (1 << 6)
#define MAP_READONLY (1 << 7)
#ifdef CONFIG_ROCKCREEK
#define MAP_MPE (1 << 8)
#endif
#define MAP_SVM_STRONG (1 << 9)
#define MAP_SVM_LAZYRELEASE (1 << 10)
#define MAP_SVM_INIT (1 << 11)
#define MAP_NO_ACCESS (1 << 12)
#define MAP_REMAP (1 << 13)
#define MAP_NO_ACCESS (1 << 0)
#define MAP_READ_ONLY (1 << 1)
#define MAP_USER_SPACE (1 << 2)
#define MAP_CODE (1 << 3)
#define MAP_WT (1 << 4)
#define MAP_NO_CACHE (1 << 5)
#define MAP_MPE (1 << 6)
#define MAP_SVM_STRONG (1 << 7)
#define MAP_SVM_LAZYRELEASE (1 << 8)
#define MAP_SVM_INIT (1 << 9)
#define MAP_KERNEL_SPACE (0 << 2) // legacy compatibility
#define MAP_REMAP (1 << 12)
//#define MAP_NON_CONTINUOUS (1 << 13) // TODO
void NORETURN abort(void);
/** @brief Kernel's memory allocator function.
/** @brief General page allocator function
*
* This will just call mem_allocation with
* the flags MAP_KERNEL_SPACE and MAP_HEAP.
*
* @return Pointer to the new memory range
*/
void* kmalloc(size_t);
/** @brief Kernel's more general memory allocator function.
*
* This function lets you choose flags for the newly allocated memory.
* This function allocates and maps whole pages.
* To avoid fragmentation you should use kmalloc() and kfree()!
*
* @param sz Desired size of the new memory
* @param flags Flags to specify
* @param flags Flags to for map_region(), vma_add()
*
* @return Pointer to the new memory range
*/
void* mem_allocation(size_t sz, uint32_t flags);
void* palloc(size_t sz, uint32_t flags);
/** @brief Free memory
/** @brief Free general kernel memory
*
* The kernel malloc doesn't track how
* much memory was allocated for which pointer,
* The pmalloc() doesn't track how much memory was allocated for which pointer,
* so you have to specify how much memory shall be freed.
*/
void kfree(void*, size_t);
/** @brief Create a new stack for a new task
*
* @return start address of the new stack
* @param sz The size which should freed
*/
void* create_stack(void);
void pfree(void* addr, size_t sz);
/** @brief Delete stack of a finished task
/** @brief The memory allocator function
*
* @param addr Pointer to the stack
* @return 0 on success
* This allocator uses a buddy system to manage free memory.
*
* @return Pointer to the new memory range
*/
int destroy_stack(task_t* addr);
void* kmalloc(size_t sz);
/** @brief The memory free function
*
* Releases memory allocated by malloc()
*
* @param addr The address to the memory block allocated by malloc()
*/
void kfree(void* addr);
/** @brief String to long
*
@ -113,7 +107,7 @@ unsigned long strtoul(const char* nptr, char** endptr, int base);
*/
static inline int atoi(const char *str)
{
return (int)strtol(str, (char **)NULL, 10);
return (int)strtol(str, (char **) NULL, 10);
}
#ifdef __cplusplus

View file

@ -147,9 +147,7 @@ tid_t wait(int32_t* result);
*/
void update_load(void);
/** @brief Print the current cpu load
*
*/
/** @brief Print the current cpu load */
void dump_load(void);
#if MAX_CORES > 1
@ -201,9 +199,7 @@ int block_current_task(void);
*/
int set_timer(uint64_t deadline);
/** @brief check is a timer is expired
*
*/
/** @brief check is a timer is expired */
void check_timers(void);
/** @brief Abort current task */

View file

@ -62,7 +62,7 @@ extern "C" {
#define TASK_L2 (1 << 3)
typedef int (*entry_point_t)(void*);
struct page_dir;
typedef struct page_map page_map_t;
/** @brief The task_t structure */
typedef struct task {
@ -86,12 +86,12 @@ typedef struct task {
struct task* prev;
/// last core id on which the task was running
uint32_t last_core;
/// usage in number of pages
/// usage in number of pages (including page map tables)
atomic_int32_t user_usage;
/// avoids concurrent access to the page directory
spinlock_irqsave_t pgd_lock;
/// pointer to the page directory
struct page_dir* pgd;
/// locks access to all page maps with PG_USER flag set
spinlock_irqsave_t page_lock;
/// pointer to page directory (32bit) or page map level 4 (64bit) table respectively
page_map_t* page_map;
/// lock for the VMA_list
spinlock_t vma_lock;
/// list of VMAs
@ -104,10 +104,12 @@ typedef struct task {
size_t start_heap;
/// end address of the heap
size_t end_heap;
/// the userspace heap
vma_t* heap;
/// LwIP error code
int lwip_err;
/// mail inbox
mailbox_wait_msg_t inbox;
mailbox_wait_msg_t inbox;
/// mail outbox array
mailbox_wait_msg_t* outbox[MAX_TASKS];
/// FPU state

View file

@ -27,56 +27,101 @@
#define __VMA_H__
#include <metalsvm/stddef.h>
#include <asm/page.h>
#ifdef __cplusplus
extern "C" {
#endif
/// Read access to this VMA is allowed
#define VMA_READ (1 << 0)
/// Write access to this VMA is allowed
#define VMA_WRITE (1 << 1)
/// Instructions fetches in this VMA are allowed
#define VMA_EXECUTE (1 << 2)
/// This VMA is cacheable
#define VMA_CACHEABLE (1 << 3)
#define VMA_NOACCESS (1 << 4)
/// This VMA is not accessable
#define VMA_NO_ACCESS (1 << 4)
/// This VMA should be part of the userspace
#define VMA_USER (1 << 5)
/// A collection of flags used for the kernel heap (kmalloc)
#define VMA_HEAP (VMA_READ|VMA_WRITE|VMA_CACHEABLE)
// boundaries for VAS allocation
#define VMA_KERN_MIN PAGE_SIZE // we skip the first page
#define VMA_KERN_MAX KERNEL_SPACE
#define VMA_USER_MAX PAGE_MAP_PGT
struct vma;
/** @brief VMA structure definition */
/** @brief VMA structure definition
*
* Each item in this linked list marks a used part of the virtual address space.
* Its used by vm_alloc() to find holes between them.
*/
typedef struct vma {
/// Start address of the memory area
size_t start;
/// End address of the memory area
size_t end;
/// Type flags field
uint32_t type;
uint32_t flags;
/// Pointer of next VMA element in the list
struct vma* next;
/// Pointer to previous VMA element in the list
struct vma* prev;
} vma_t;
/** @brief Add a new virtual memory region to the list of VMAs
/** @brief Add a new virtual memory area to the list of VMAs
*
* @param task Pointer to the task_t structure of the task
* @param start Start address of the new region
* @param end End address of the new region
* @param type Type flags the new region shall have
* @param start Start address of the new area
* @param end End address of the new area
* @param flags Type flags the new area shall have
*
* @return
* - 0 on success
* - -EINVAL (-22) or -EINVAL (-12) on failure
*/
int vma_add(struct task* task, size_t start, size_t end, uint32_t type);
int vma_add(size_t start, size_t end, uint32_t flags);
/** @brief Dump information about this task's VMAs into the terminal.
/** @brief Search for a free memory area
*
* This will print out Start, end and flags for each VMA in the task's list
* @param size Size of requestes VMA in bytes
* @param flags
* @return Type flags the new area shall have
* - 0 on failure
* - the start address of a free area
*/
size_t vma_alloc(size_t size, uint32_t flags);
/** @brief Free an allocated memory area
*
* @param task The task's task_t structure
* @param start Start address of the area to be freed
* @param end End address of the to be freed
* @return
* - 0 on success
* - -EINVAL (-22) on failure
*/
int vma_dump(struct task* task);
int vma_free(size_t start, size_t end);
/** @brief Free all virtual memory areas
*
* @return
* - 0 on success
*/
int drop_vma_list();
/** @brief Copy the VMA list of the current task to task
*
* @param task The task where the list should be copied to
* @return
* - 0 on success
*/
int copy_vma_list(struct task* task);
/** @brief Dump information about this task's VMAs into the terminal. */
void vma_dump();
#ifdef __cplusplus
}

View file

@ -63,7 +63,7 @@ extern const void bss_end;
int lowlevel_init(void)
{
// initialize .bss section
memset((void*)&bss_start, 0x00, ((size_t) &bss_end - (size_t) &bss_start));
memset((char*) &bss_start, 0x00, (char*) &bss_end - (char*) &bss_start);
koutput_init();

View file

@ -29,6 +29,7 @@
#include <metalsvm/fs.h>
#include <asm/irq.h>
#include <asm/irqflags.h>
#include <asm/page.h>
#include <asm/kb.h>
#ifdef CONFIG_ROCKCREEK
#include <asm/icc.h>
@ -71,8 +72,9 @@ int main(void)
pushbg(COL_BLUE);
kprintf("This is MetalSVM %s Build %u, %u\n",
METALSVM_VERSION, &__BUILD_DATE, &__BUILD_TIME);
METALSVM_VERSION, &__BUILD_DATE, &__BUILD_TIME);
popbg();
system_init();
irq_init();
timer_init();
@ -85,7 +87,7 @@ int main(void)
icc_init();
svm_init();
#endif
initrd_init();
initrd_init();
irq_enable();
@ -101,9 +103,10 @@ int main(void)
disable_timer_irq();
#endif
sleep(5);
sleep(2);
create_kernel_task(&id, initd, NULL, NORMAL_PRIO);
kprintf("Create initd with id %u\n", id);
reschedule();
while(1) {

View file

@ -105,11 +105,11 @@ static int sys_open(const char* name, int flags, int mode)
/* file doesn't exist! */
if (check < 0) {
/* tidy up the fildescriptor */
kfree(curr_task->fildes_table[fd], sizeof(fildes_t));
kfree(curr_task->fildes_table[fd]);
curr_task->fildes_table[fd] = NULL;
return check;
}
return fd;
}
@ -196,7 +196,7 @@ static int sys_socket(int domain, int type, int protocol)
/* file doesn't exist! */
if (curr_task->fildes_table[fd]->node == NULL) {
/* tidy up the fildescriptor */
kfree(curr_task->fildes_table[fd], sizeof(fildes_t));
kfree(curr_task->fildes_table[fd]);
curr_task->fildes_table[fd] = NULL;
return -ENOENT;
}
@ -236,11 +236,11 @@ static int sys_accept(int s, struct sockaddr* addr, socklen_t* addrlen)
curr_task->fildes_table[fd]->offset = sock2;
curr_task->fildes_table[fd]->count = 1;
curr_task->fildes_table[fd]->node = findnode_fs("/dev/socket");
/* file doesn't exist! */
if (curr_task->fildes_table[fd]->node == NULL) {
/* tidy up the fildescriptor */
kfree(curr_task->fildes_table[fd], sizeof(fildes_t));
kfree(curr_task->fildes_table[fd]);
curr_task->fildes_table[fd] = NULL;
return -ENOENT;
}
@ -273,7 +273,7 @@ static int sys_close(int fd)
/* close command failed -> return check = errno */
if (BUILTIN_EXPECT(check < 0, 0))
return check;
kfree(curr_task->fildes_table[fd], sizeof(fildes_t));
kfree(curr_task->fildes_table[fd]);
curr_task->fildes_table[fd] = NULL;
} else {
curr_task->fildes_table[fd]->count--;
@ -356,7 +356,7 @@ static int sys_dup(int fd)
* free the memory which was allocated in get_fildes()
* cause will link it to another existing memory space
*/
kfree(curr_task->fildes_table[new_fd], sizeof(fildes_t));
kfree(curr_task->fildes_table[new_fd]);
/* and link it to another existing memory space */
curr_task->fildes_table[new_fd] = curr_task->fildes_table[fd];
@ -381,7 +381,7 @@ static int sys_dup2(int fd, int fd2)
/* If fd and fd2 are equal, then dup2() just returns fd2 */
if (fd == fd2)
return fd2;
/*
* if descriptor fd2 is already in use, it is first deallocated
* as if a close(2) call had been done first
@ -398,30 +398,32 @@ static int sys_dup2(int fd, int fd2)
static int sys_sbrk(int incr)
{
task_t* task = per_core(current_task);
vma_t* tmp = NULL;
vma_t* heap = task->heap;
int ret;
spinlock_lock(&task->vma_lock);
tmp = task->vma_list;
while(tmp && !((task->end_heap >= tmp->start) && (task->end_heap <= tmp->end)))
tmp = tmp->next;
if (BUILTIN_EXPECT(!heap,0 )) {
kprintf("sys_sbrk: missing heap!\n");
abort();
}
ret = (int) task->end_heap;
task->end_heap += incr;
if (task->end_heap < task->start_heap)
task->end_heap = task->start_heap;
// resize virtual memory area
if (tmp && (tmp->end <= task->end_heap))
tmp->end = task->end_heap;
ret = heap->end;
heap->end += incr;
if (heap->end < heap->start)
heap->end = heap->start;
// allocation and mapping of new pages for the heap
// is catched by the pagefault handler
kprintf("sys_sbrk: task = %d, heap->start = %#lx, heap->end = %#lx, incr = %#4x\n", task->id, heap->start, heap->end, incr); // TOD0: remove
spinlock_unlock(&task->vma_lock);
return ret;
}
int syscall_handler(uint32_t sys_nr, ...)
int syscall_handler(size_t sys_nr, ...)
{
int ret = -EINVAL;
va_list vl;
@ -500,7 +502,7 @@ int syscall_handler(uint32_t sys_nr, ...)
break;
case __NR_wait: {
int32_t* status = va_arg(vl, int32_t*);
ret = wait(status);
break;
}
@ -549,7 +551,7 @@ int syscall_handler(uint32_t sys_nr, ...)
ret = -ENOTSOCK;
break;
}
//kprintf("lwip_connect: %p with lenght %i and Socket %i", name, namelen, per_core(current_task)->fildes_table[fd].offset);
//kprintf("lwip_connect: %p with lenght %i and Socket %i", name, namelen, per_core(current_task)->fildes_table[fd].offset); // TODO: remove
ret = lwip_connect(per_core(current_task)->fildes_table[fd]->offset, name, namelen);
@ -601,7 +603,7 @@ int syscall_handler(uint32_t sys_nr, ...)
}
#endif
default:
kputs("invalid system call\n");
kprintf("syscall_handler: invalid system call %u\n", sys_nr);
ret = -ENOSYS;
break;
};

View file

@ -78,6 +78,7 @@ DEFINE_PER_CORE(task_t*, current_task, task_table+0);
extern const void boot_stack;
/** @brief helper function for the assembly code to determine the current task
*
* @return Pointer to the task_t structure of current task
*/
task_t* get_current_task(void) {
@ -96,6 +97,37 @@ uint32_t get_highest_priority(void)
return msb(runqueues[CORE_ID].prio_bitmap);
}
/** @brief Create a new stack for a new task
*
* @return start address of the new stack
*/
static void* create_stack(void)
{
/*
* TODO: our stack should be non-executable!
* We need this atm because nested functions in page64.c
* are using trampolines on the stack.
*/
return palloc(KERNEL_STACK_SIZE, MAP_CODE);
}
/** @brief Delete stack of a finished task
*
* @param addr Pointer to the stack
* @return
* - 0 on success
* - -EINVAL on failure
*/
static int destroy_stack(task_t* task)
{
if (BUILTIN_EXPECT(!task || !task->stack, 0))
return -EINVAL;
pfree(task->stack, KERNEL_STACK_SIZE);
return 0;
}
int multitasking_init(void) {
if (BUILTIN_EXPECT(task_table[0].status != TASK_IDLE, 0)) {
kputs("Task 0 is not an idle task\n");
@ -104,7 +136,7 @@ int multitasking_init(void) {
mailbox_wait_msg_init(&task_table[0].inbox);
memset(task_table[0].outbox, 0x00, sizeof(mailbox_wait_msg_t*)*MAX_TASKS);
task_table[0].pgd = get_boot_pgd();
task_table[0].page_map = get_boot_page_map();
task_table[0].flags = TASK_DEFAULT_FLAGS;
task_table[0].prio = IDLE_PRIO;
task_table[0].stack = (void*) &boot_stack;
@ -128,7 +160,7 @@ size_t get_idle_task(uint32_t id)
atomic_int32_set(&task_table[id].user_usage, 0);
mailbox_wait_msg_init(&task_table[id].inbox);
memset(task_table[id].outbox, 0x00, sizeof(mailbox_wait_msg_t*)*MAX_TASKS);
task_table[id].pgd = get_boot_pgd();
task_table[id].page_map = get_boot_page_map();
current_task[id].var = task_table+id;
runqueues[id].idle = task_table+id;
@ -193,10 +225,8 @@ static void wakeup_blocked_tasks(int result)
spinlock_irqsave_unlock(&table_lock);
}
/** @brief A procedure to be called by
* procedures which are called by exiting tasks. */
/** @brief A procedure to be called by procedures which are called by exiting tasks. */
static void NORETURN do_exit(int arg) {
vma_t* tmp;
task_t* curr_task = per_core(current_task);
uint32_t flags, core_id, fd, status;
@ -204,17 +234,17 @@ static void NORETURN do_exit(int arg) {
for (fd = 0; fd < NR_OPEN; fd++) {
if(curr_task->fildes_table[fd] != NULL) {
/*
* delete a descriptor from the per-process object
* reference table. If this is not the last reference to the underlying
* object, the object will be ignored.
*/
* Delete a descriptor from the per-process object
* reference table. If this is not the last reference to the underlying
* object, the object will be ignored.
*/
if (curr_task->fildes_table[fd]->count == 1) {
/* try to close the file */
// try to close the file
status = close_fs(curr_task->fildes_table[fd]);
/* close command failed -> return check = errno */
// close command failed -> return check = errno
if (BUILTIN_EXPECT(status < 0, 0))
kprintf("Task %u was not able to close file descriptor %i. close_fs returned %d", curr_task->id, fd, -status);
kfree(curr_task->fildes_table[fd], sizeof(fildes_t));
kfree(curr_task->fildes_table[fd]);
curr_task->fildes_table[fd] = NULL;
} else {
curr_task->fildes_table[fd]->count--;
@ -222,37 +252,33 @@ static void NORETURN do_exit(int arg) {
}
}
}
//finally the table has to be cleared.
kfree(curr_task->fildes_table, sizeof(filp_t)*NR_OPEN);
kfree(curr_task->fildes_table); // finally the table has to be cleared
}
kprintf("Terminate task: %u, return value %d\n", curr_task->id, arg);
wakeup_blocked_tasks(arg);
flags = irq_nested_disable();
//vma_dump(curr_task);
spinlock_lock(&curr_task->vma_lock);
drop_vma_list();
// remove memory regions
while((tmp = curr_task->vma_list) != NULL) {
kfree((void*) tmp->start, tmp->end - tmp->start + 1);
curr_task->vma_list = tmp->next;
kfree((void*) tmp, sizeof(vma_t));
}
/*
* This marks all userpages as free. Nevertheless they are still existing
* and used by the MMU until the task finishes. Therefore we need to disable
* context switching by disabling interrupts (see above)! We may also make use
* of the TLB and global kernel pages.
*/
drop_page_map();
spinlock_unlock(&curr_task->vma_lock);
drop_pgd(); // delete page directory and its page tables
#if 0
#if 1
if (atomic_int32_read(&curr_task->user_usage))
kprintf("Memory leak! Task %d did not release %d pages\n",
curr_task->id, atomic_int32_read(&curr_task->user_usage));
curr_task->id, atomic_int32_read(&curr_task->user_usage));
#endif
curr_task->status = TASK_FINISHED;
// decrease the number of active tasks
flags = irq_nested_disable();
core_id = CORE_ID;
spinlock_irqsave_lock(&runqueues[core_id].lock);
runqueues[core_id].nr_tasks--;
@ -262,9 +288,7 @@ static void NORETURN do_exit(int arg) {
reschedule();
kprintf("Kernel panic: scheduler on core %d found no valid task\n", CORE_ID);
while(1) {
HALT;
}
while(1) HALT;
}
/** @brief A procedure to be called by kernel tasks */
@ -300,6 +324,7 @@ void NORETURN abort(void) {
static int create_task(tid_t* id, entry_point_t ep, void* arg, uint8_t prio, uint32_t core_id)
{
task_t* curr_task;
task_t* new_task = NULL;
int ret = -ENOMEM;
uint32_t i;
@ -319,64 +344,74 @@ static int create_task(tid_t* id, entry_point_t ep, void* arg, uint8_t prio, uin
#endif
{
core_id = CORE_ID;
kprintf("Inavlid core id! Set id to %u!\n", core_id);
kprintf("create_task: invalid core id! Set id to %u!\n", core_id);
}
curr_task = per_core(current_task);
// search free entry in task table
for(i=0; i<MAX_TASKS; i++) {
if (task_table[i].status == TASK_INVALID) {
atomic_int32_set(&task_table[i].user_usage, 0);
ret = create_pgd(task_table+i, 0);
if (ret < 0) {
ret = -ENOMEM;
goto create_task_out;
}
task_table[i].id = i;
task_table[i].status = TASK_READY;
task_table[i].last_stack_pointer = NULL;
task_table[i].stack = create_stack();
task_table[i].flags = TASK_DEFAULT_FLAGS;
task_table[i].prio = prio;
task_table[i].last_core = 0;
spinlock_init(&task_table[i].vma_lock);
task_table[i].vma_list = NULL;
task_table[i].fildes_table = NULL;
mailbox_wait_msg_init(&task_table[i].inbox);
memset(task_table[i].outbox, 0x00, sizeof(mailbox_wait_msg_t*)*MAX_TASKS);
task_table[i].outbox[curr_task->id] = &curr_task->inbox;
if (id)
*id = i;
ret = create_default_frame(task_table+i, ep, arg);
task_table[i].start_heap = 0;
task_table[i].end_heap = 0;
task_table[i].lwip_err = 0;
task_table[i].start_tick = get_clock_tick();
// add task in the runqueue
spinlock_irqsave_lock(&runqueues[core_id].lock);
runqueues[core_id].prio_bitmap |= (1 << prio);
runqueues[core_id].nr_tasks++;
if (!runqueues[core_id].queue[prio-1].first) {
task_table[i].next = task_table[i].prev = NULL;
runqueues[core_id].queue[prio-1].first = task_table+i;
runqueues[core_id].queue[prio-1].last = task_table+i;
} else {
task_table[i].prev = runqueues[core_id].queue[prio-1].last;
task_table[i].next = NULL;
runqueues[core_id].queue[prio-1].last->next = task_table+i;
runqueues[core_id].queue[prio-1].last = task_table+i;
}
spinlock_irqsave_unlock(&runqueues[core_id].lock);
new_task = &task_table[i];
break;
}
}
create_task_out:
if (BUILTIN_EXPECT(!new_task, 0)) {
ret = -ENOMEM;
goto out;
}
atomic_int32_set(&new_task->user_usage, 0);
ret = copy_page_map(new_task, 0);
if (ret < 0) {
ret = -ENOMEM;
goto out;
}
new_task->id = i;
new_task->status = TASK_READY;
new_task->last_stack_pointer = NULL;
new_task->stack = create_stack();
new_task->flags = TASK_DEFAULT_FLAGS;
new_task->prio = prio;
new_task->last_core = 0;
spinlock_init(&new_task->vma_lock);
new_task->vma_list = NULL;
new_task->fildes_table = NULL;
mailbox_wait_msg_init(&new_task->inbox);
memset(new_task->outbox, 0x00, sizeof(mailbox_wait_msg_t*)*MAX_TASKS);
new_task->outbox[curr_task->id] = &curr_task->inbox;
if (id)
*id = i;
ret = create_default_frame(new_task, ep, arg);
new_task->start_heap = 0;
new_task->end_heap = 0;
new_task->lwip_err = 0;
new_task->start_tick = get_clock_tick();
// add task in the runqueue
spinlock_irqsave_lock(&runqueues[core_id].lock);
runqueues[core_id].prio_bitmap |= (1 << prio);
runqueues[core_id].nr_tasks++;
if (!runqueues[core_id].queue[prio-1].first) {
new_task->next = new_task->prev = NULL;
runqueues[core_id].queue[prio-1].first = new_task;
runqueues[core_id].queue[prio-1].last = new_task;
}
else {
new_task->prev = runqueues[core_id].queue[prio-1].last;
new_task->next = NULL;
runqueues[core_id].queue[prio-1].last->next = new_task;
runqueues[core_id].queue[prio-1].last = new_task;
}
spinlock_irqsave_unlock(&runqueues[core_id].lock);
out:
spinlock_irqsave_unlock(&table_lock);
return ret;
@ -387,109 +422,99 @@ int sys_fork(void)
int ret = -ENOMEM;
unsigned int i, core_id, fd_i;
task_t* parent_task = per_core(current_task);
vma_t** child;
vma_t* parent;
vma_t* tmp;
task_t* child_task = NULL;
spinlock_lock(&parent_task->vma_lock);
spinlock_irqsave_lock(&table_lock);
core_id = CORE_ID;
// search free entry in task_table
for(i=0; i<MAX_TASKS; i++) {
if (task_table[i].status == TASK_INVALID) {
atomic_int32_set(&task_table[i].user_usage, 0);
ret = create_pgd(task_table+i, 1);
if (ret < 0) {
ret = -ENOMEM;
goto create_task_out;
}
task_table[i].id = i;
task_table[i].last_stack_pointer = NULL;
task_table[i].stack = create_stack();
spinlock_init(&task_table[i].vma_lock);
// copy VMA list
child = &task_table[i].vma_list;
parent = parent_task->vma_list;
tmp = NULL;
while(parent) {
*child = (vma_t*) kmalloc(sizeof(vma_t));
if (BUILTIN_EXPECT(!child, 0))
break;
(*child)->start = parent->start;
(*child)->end = parent->end;
(*child)->type = parent->type;
(*child)->prev = tmp;
(*child)->next = NULL;
parent = parent->next;
tmp = *child;
child = &((*child)->next);
}
/* init fildes_table */
task_table[i].fildes_table = kmalloc(sizeof(filp_t)*NR_OPEN);
memcpy(task_table[i].fildes_table, parent_task->fildes_table, sizeof(filp_t)*NR_OPEN);
for (fd_i = 0; fd_i < NR_OPEN; fd_i++)
if ((task_table[i].fildes_table[fd_i]) != NULL)
task_table[i].fildes_table[fd_i]->count++;
mailbox_wait_msg_init(&task_table[i].inbox);
memset(task_table[i].outbox, 0x00, sizeof(mailbox_wait_msg_t*)*MAX_TASKS);
task_table[i].outbox[parent_task->id] = &parent_task->inbox;
task_table[i].flags = parent_task->flags;
memcpy(&(task_table[i].fpu), &(parent_task->fpu), sizeof(union fpu_state));
task_table[i].start_tick = get_clock_tick();
task_table[i].start_heap = 0;
task_table[i].end_heap = 0;
task_table[i].lwip_err = 0;
task_table[i].prio = parent_task->prio;
task_table[i].last_core = parent_task->last_core;
// add task in the runqueue
spinlock_irqsave_lock(&runqueues[core_id].lock);
runqueues[core_id].prio_bitmap |= (1 << parent_task->prio);
runqueues[core_id].nr_tasks++;
if (!runqueues[core_id].queue[parent_task->prio-1].first) {
task_table[i].next = task_table[i].prev = NULL;
runqueues[core_id].queue[parent_task->prio-1].first = task_table+i;
runqueues[core_id].queue[parent_task->prio-1].last = task_table+i;
} else {
task_table[i].prev = runqueues[core_id].queue[parent_task->prio-1].last;
task_table[i].next = NULL;
runqueues[core_id].queue[parent_task->prio-1].last->next = task_table+i;
runqueues[core_id].queue[parent_task->prio-1].last = task_table+i;
}
spinlock_irqsave_unlock(&runqueues[core_id].lock);
ret = arch_fork(task_table+i);
if (parent_task != per_core(current_task)) {
// Oh, the current task is the new child task!
// Leave the function without releasing the locks
// because the locks are already released
// by the parent task!
return 0;
}
if (!ret) {
task_table[i].status = TASK_READY;
ret = i;
}
child_task = &task_table[i];
break;
}
}
create_task_out:
if (BUILTIN_EXPECT(!child_task, 0)) {
ret = -ENOMEM;
goto out;
}
atomic_int32_set(&child_task->user_usage, 0);
ret = copy_page_map(child_task, 1);
if (ret < 0) {
ret = -ENOMEM;
goto out;
}
ret = copy_vma_list(child_task);
if (BUILTIN_EXPECT(!ret, 0)) {
ret = -ENOMEM;
goto out;
}
child_task->id = i;
child_task->last_stack_pointer = NULL;
child_task->stack = create_stack();
// init fildes_table
child_task->fildes_table = kmalloc(sizeof(filp_t)*NR_OPEN);
memcpy(child_task->fildes_table, parent_task->fildes_table, sizeof(filp_t)*NR_OPEN);
for (fd_i=0; fd_i<NR_OPEN; fd_i++) {
if ((child_task->fildes_table[fd_i]) != NULL)
child_task->fildes_table[fd_i]->count++;
}
// init mailbox
mailbox_wait_msg_init(&child_task->inbox);
memset(child_task->outbox, 0x00, sizeof(mailbox_wait_msg_t*)*MAX_TASKS);
child_task->outbox[parent_task->id] = &parent_task->inbox;
child_task->flags = parent_task->flags;
memcpy(&child_task->fpu, &parent_task->fpu, sizeof(union fpu_state));
child_task->start_tick = get_clock_tick();
child_task->start_heap = 0;
child_task->end_heap = 0;
child_task->lwip_err = 0;
child_task->prio = parent_task->prio;
child_task->last_core = parent_task->last_core;
// add task in the runqueue
spinlock_irqsave_lock(&runqueues[core_id].lock);
runqueues[core_id].prio_bitmap |= (1 << parent_task->prio);
runqueues[core_id].nr_tasks++;
if (!runqueues[core_id].queue[parent_task->prio-1].first) {
child_task->next = child_task->prev = NULL;
runqueues[core_id].queue[parent_task->prio-1].first = child_task;
runqueues[core_id].queue[parent_task->prio-1].last = child_task;
}
else {
child_task->prev = runqueues[core_id].queue[parent_task->prio-1].last;
child_task->next = NULL;
runqueues[core_id].queue[parent_task->prio-1].last->next = child_task;
runqueues[core_id].queue[parent_task->prio-1].last = child_task;
}
spinlock_irqsave_unlock(&runqueues[core_id].lock);
ret = arch_fork(child_task);
if (parent_task != per_core(current_task))
/*
* Oh, the current task is the new child task!
* Leave the function without releasing the locks
* because the locks are already released by the parent task!
*/
return 0;
if (!ret) {
child_task->status = TASK_READY;
ret = i;
}
out:
spinlock_irqsave_unlock(&table_lock);
spinlock_unlock(&parent_task->vma_lock);
return ret;
}
@ -515,7 +540,7 @@ static int kernel_entry(void* args)
ret = kernel_args->func(kernel_args->args);
kfree(kernel_args, sizeof(kernel_args_t));
kfree(kernel_args);
return ret;
}
@ -561,16 +586,15 @@ static int load_task(load_args_t* largs)
{
uint32_t i, offset, idx, fd_i;
uint32_t addr, npages, flags;
size_t stack = 0;
size_t stack = 0, heap = 0;
elf_header_t header;
elf_program_header_t prog_header;
//elf_section_header_t sec_header;
///!!! kfree is missing!
fildes_t *file = kmalloc(sizeof(fildes_t));
fildes_t *file = kmalloc(sizeof(fildes_t)); // TODO: kfree is missing!
file->offset = 0;
file->flags = 0;
//TODO: init the hole fildes_t struct!
// TODO: init the hole fildes_t struct!
task_t* curr_task = per_core(current_task);
int err;
@ -581,22 +605,22 @@ static int load_task(load_args_t* largs)
if (!file->node)
return -EINVAL;
/* init fildes_table */
// init fildes_table
spinlock_irqsave_lock(&table_lock);
if (!task_table[curr_task->id].fildes_table) {
task_table[curr_task->id].fildes_table = kmalloc(sizeof(filp_t)*NR_OPEN);
if (BUILTIN_EXPECT(!task_table[curr_task->id].fildes_table, 0)) {
if (!curr_task->fildes_table) {
curr_task->fildes_table = kmalloc(sizeof(filp_t)*NR_OPEN);
if (BUILTIN_EXPECT(!curr_task->fildes_table, 0)) {
spinlock_irqsave_unlock(&table_lock);
return -ENOMEM;
}
memset(task_table[curr_task->id].fildes_table, 0x00, sizeof(filp_t)*NR_OPEN);
memset(curr_task->fildes_table, 0x00, sizeof(filp_t)*NR_OPEN);
for (fd_i = 0; fd_i < 3; fd_i++) {
task_table[curr_task->id].fildes_table[fd_i] = kmalloc(sizeof(fildes_t));
task_table[curr_task->id].fildes_table[fd_i]->count = 1;
curr_task->fildes_table[fd_i] = kmalloc(sizeof(fildes_t));
curr_task->fildes_table[fd_i]->count = 1;
}
task_table[curr_task->id].fildes_table[0]->node = findnode_fs("/dev/stdin");
task_table[curr_task->id].fildes_table[1]->node = findnode_fs("/dev/stdout");
task_table[curr_task->id].fildes_table[2]->node = findnode_fs("/dev/stderr");
curr_task->fildes_table[0]->node = findnode_fs("/dev/stdin");
curr_task->fildes_table[1]->node = findnode_fs("/dev/stdout");
curr_task->fildes_table[2]->node = findnode_fs("/dev/stderr");
}
spinlock_irqsave_unlock(&table_lock);
@ -617,43 +641,43 @@ static int load_task(load_args_t* largs)
if (BUILTIN_EXPECT(header.ident._class != ELF_CLASS_32, 0))
goto invalid;
#else
#elif defined(CONFIG_X86_64)
if (BUILTIN_EXPECT(header.machine != ELF_EM_X86_64, 0))
goto invalid;
if (BUILTIN_EXPECT(header.ident._class != ELF_CLASS_64, 0))
goto invalid;
#else
#error "unknown arch"
#endif
if (BUILTIN_EXPECT(header.ident.data != ELF_DATA_2LSB, 0))
goto invalid;
if (header.entry <= KERNEL_SPACE)
if (header.entry < KERNEL_SPACE)
goto invalid;
// interpret program header table
for (i=0; i<header.ph_entry_count; i++) {
file->offset = header.ph_offset+i*header.ph_entry_size;
if (read_fs(file, (uint8_t*)&prog_header, sizeof(elf_program_header_t)) == 0) {
if (read_fs(file, (uint8_t*) &prog_header, sizeof(elf_program_header_t)) == 0) {
kprintf("Could not read programm header!\n");
continue;
}
switch(prog_header.type)
{
switch(prog_header.type) {
case ELF_PT_LOAD: // load program segment
if (!prog_header.virt_addr)
continue;
npages = (prog_header.mem_size >> PAGE_SHIFT);
if (prog_header.mem_size & (PAGE_SIZE-1))
npages++;
npages = PAGE_FLOOR(prog_header.mem_size) >> PAGE_BITS;
addr = get_pages(npages);
flags = MAP_USER_SPACE;
if (prog_header.flags & PF_X)
flags |= MAP_CODE;
if (!(prog_header.flags & PF_W))
flags |= MAP_READ_ONLY;
// map page frames in the address space of the current task
if (!map_region(prog_header.virt_addr, addr, npages, flags)) {
@ -662,35 +686,30 @@ static int load_task(load_args_t* largs)
}
// clear pages
memset((void*) prog_header.virt_addr, 0x00, npages*PAGE_SIZE);
memset((void*) prog_header.virt_addr, 0x00, npages * PAGE_SIZE);
// set starting point of the heap
if (curr_task->start_heap < prog_header.virt_addr+prog_header.mem_size)
curr_task->start_heap = curr_task->end_heap = prog_header.virt_addr+prog_header.mem_size;
// update heap location
if (heap < prog_header.virt_addr + prog_header.mem_size)
heap = prog_header.virt_addr+prog_header.mem_size;
// load program
file->offset = prog_header.offset;
read_fs(file, (uint8_t*)prog_header.virt_addr, prog_header.file_size);
read_fs(file, (uint8_t*) prog_header.virt_addr, prog_header.file_size);
flags = VMA_CACHEABLE;
flags = VMA_CACHEABLE | VMA_USER;
if (prog_header.flags & PF_R)
flags |= VMA_READ;
if (prog_header.flags & PF_W)
flags |= VMA_WRITE;
if (prog_header.flags & PF_X)
flags |= VMA_EXECUTE;
vma_add(curr_task, prog_header.virt_addr, prog_header.virt_addr+npages*PAGE_SIZE-1, flags);
if (!(prog_header.flags & PF_W))
change_page_permissions(prog_header.virt_addr, prog_header.virt_addr+npages*PAGE_SIZE-1, flags);
vma_add(prog_header.virt_addr, prog_header.virt_addr+npages*PAGE_SIZE, flags);
break;
case ELF_PT_GNU_STACK: // Indicates stack executability
// create user-level stack
npages = DEFAULT_STACK_SIZE >> PAGE_SHIFT;
if (DEFAULT_STACK_SIZE & (PAGE_SIZE-1))
npages++;
npages = PAGE_FLOOR(DEFAULT_STACK_SIZE) >> PAGE_BITS;
addr = get_pages(npages);
stack = header.entry*2; // virtual address of the stack
@ -708,7 +727,8 @@ static int load_task(load_args_t* largs)
flags |= VMA_WRITE;
if (prog_header.flags & PF_X)
flags |= VMA_EXECUTE;
vma_add(curr_task, stack, stack+npages*PAGE_SIZE-1, flags);
vma_add(stack, stack+npages*PAGE_SIZE, flags);
break;
}
}
@ -726,8 +746,23 @@ static int load_task(load_args_t* largs)
}
#endif
// setup heap
if (!curr_task->heap)
curr_task->heap = (vma_t*) kmalloc(sizeof(vma_t));
if (BUILTIN_EXPECT(!curr_task->heap || !heap, 0)) {
kprintf("load_task: heap is missing!\n");
return -ENOMEM;
}
curr_task->heap->flags = VMA_HEAP|VMA_USER;
curr_task->heap->start = heap;
curr_task->heap->end = heap;
// TODO: insert into list
if (BUILTIN_EXPECT(!stack, 0)) {
kprintf("Stack is missing!\n");
kprintf("load_task: stack is missing!\n");
return -ENOMEM;
}
@ -740,9 +775,9 @@ static int load_task(load_args_t* largs)
// push argv on the stack
offset -= largs->argc * sizeof(char*);
for(i=0; i<largs->argc; i++) {
for (i=0; i<largs->argc; i++) {
((char**) (stack+offset))[i] = (char*) (stack+idx);
while(((char*) stack)[idx] != '\0')
idx++;
idx++;
@ -750,7 +785,7 @@ static int load_task(load_args_t* largs)
// push env on the stack
offset -= (largs->envc+1) * sizeof(char*);
for(i=0; i<largs->envc; i++) {
for (i=0; i<largs->envc; i++) {
((char**) (stack+offset))[i] = (char*) (stack+idx);
while(((char*) stack)[idx] != '\0')
@ -771,10 +806,10 @@ static int load_task(load_args_t* largs)
*((char***) (stack+offset)) = (char**) (stack + offset + 2*sizeof(char**) + (largs->envc+1) * sizeof(char*));
// push argc on the stack
offset -= sizeof(int);
offset -= sizeof(size_t);
*((int*) (stack+offset)) = largs->argc;
kfree(largs, sizeof(load_args_t));
kfree(largs);
// clear fpu state
curr_task->flags &= ~(TASK_FPU_USED|TASK_FPU_INIT);
@ -785,12 +820,12 @@ static int load_task(load_args_t* largs)
invalid:
kprintf("Invalid executable!\n");
kprintf("magic number 0x%x\n", (uint32_t) header.ident.magic);
kprintf("header type 0x%x\n", (uint32_t) header.type);
kprintf("machine type 0x%x\n", (uint32_t) header.machine);
kprintf("elf ident class 0x%x\n", (uint32_t) header.ident._class);
kprintf("elf identdata !0x%x\n", header.ident.data);
kprintf("program entry point 0x%x\n", (size_t) header.entry);
kprintf("Magic number: 0x%x\n", (uint32_t) header.ident.magic);
kprintf("Header type: 0x%x\n", (uint32_t) header.type);
kprintf("Machine type: 0x%x\n", (uint32_t) header.machine);
kprintf("ELF ident class: 0x%x\n", (uint32_t) header.ident._class);
kprintf("ELF ident data: 0x%x\n", header.ident.data);
kprintf("Program entry point: 0x%x\n", (size_t) header.entry);
return -EINVAL;
}
@ -806,7 +841,7 @@ static int user_entry(void* arg)
ret = load_task((load_args_t*) arg);
kfree(arg, sizeof(load_args_t));
kfree(arg);
return ret;
}
@ -824,7 +859,6 @@ static int user_entry(void* arg)
*/
int create_user_task_on_core(tid_t* id, const char* fname, char** argv, uint32_t core_id)
{
#ifdef CONFIG_X86_32
vfs_node_t* node;
int argc = 0;
size_t i, buffer_size = 0;
@ -860,24 +894,19 @@ int create_user_task_on_core(tid_t* id, const char* fname, char** argv, uint32_t
while ((*dest++ = *src++) != 0);
}
/* create new task */
// create new task
return create_task(id, user_entry, load_args, NORMAL_PRIO, core_id);
#else
return -EINVAL;
#endif
}
/** @brief Used by the execve-Systemcall */
int sys_execve(const char* fname, char** argv, char** env)
{
vfs_node_t* node;
vma_t* tmp;
size_t i, buffer_size = 0;
load_args_t* load_args = NULL;
char *dest, *src;
int ret, argc = 0;
int envc = 0;
task_t* curr_task = per_core(current_task);
node = findnode_fs((char*) fname);
if (!node || !(node->type == FS_FILE))
@ -920,16 +949,8 @@ int sys_execve(const char* fname, char** argv, char** env)
while ((*dest++ = *src++) != 0);
}
spinlock_lock(&curr_task->vma_lock);
// remove old program
while((tmp = curr_task->vma_list) != NULL) {
kfree((void*) tmp->start, tmp->end - tmp->start + 1);
curr_task->vma_list = tmp->next;
kfree((void*) tmp, sizeof(vma_t));
}
spinlock_unlock(&curr_task->vma_lock);
drop_vma_list();
/*
* we use a trap gate to enter the kernel
@ -940,7 +961,7 @@ int sys_execve(const char* fname, char** argv, char** env)
ret = load_task(load_args);
kfree(load_args, sizeof(load_args_t));
kfree(load_args);
return ret;
}

View file

@ -34,13 +34,7 @@
#define VGA_EARLY_PRINT 1
#define UART_EARLY_PRINT 2
#ifdef CONFIG_VGA
static uint32_t early_print = VGA_EARLY_PRINT;
#elif defined(CONFIG_UART)
static uint32_t early_print = UART_EARLY_PRINT;
#else
static uint32_t early_print = NO_EARLY_PRINT;
#endif
static spinlock_irqsave_t olock = SPINLOCK_IRQSAVE_INIT;
static atomic_int32_t kmsg_counter = ATOMIC_INIT(0);
static unsigned char kmessages[KMSG_SIZE] __attribute__ ((section(".kmsg"))) = {[0 ... KMSG_SIZE-1] = 0x00};
@ -136,7 +130,7 @@ int kmsg_init(vfs_node_t * node, const char *name)
}
} while (blist);
kfree(new_node, sizeof(vfs_node_t));
kfree(new_node);
return -ENOMEM;
}
@ -145,6 +139,10 @@ int koutput_init(void)
{
#ifdef CONFIG_VGA
vga_init();
early_print |= VGA_EARLY_PRINT;
#endif
#ifdef CONFIG_UART
early_print |= UART_EARLY_PRINT;
#endif
return 0;
@ -161,11 +159,11 @@ int kputchar(int c)
kmessages[pos % KMSG_SIZE] = (unsigned char) c;
#ifdef CONFIG_VGA
if (early_print == VGA_EARLY_PRINT)
if (early_print & VGA_EARLY_PRINT)
vga_putchar(c);
#endif
#ifdef CONFIG_UART
if (early_print == UART_EARLY_PRINT)
if (early_print & UART_EARLY_PRINT)
uart_putchar(c);
#endif
@ -186,11 +184,11 @@ int kputs(const char *str)
pos = atomic_int32_inc(&kmsg_counter);
kmessages[pos % KMSG_SIZE] = str[i];
#ifdef CONFIG_VGA
if (early_print == VGA_EARLY_PRINT)
if (early_print & VGA_EARLY_PRINT)
vga_putchar(str[i]);
#endif
#ifdef CONFIG_UART
if (early_print == UART_EARLY_PRINT)
if (early_print & UART_EARLY_PRINT)
uart_putchar(str[i]);
#endif
}

View file

@ -1,4 +1,4 @@
C_source := memory.c vma.c
C_source := memory.c vma.c malloc.c
MODULE := mm
include $(TOPDIR)/Makefile.inc

207
mm/malloc.c Normal file
View file

@ -0,0 +1,207 @@
/*
* Copyright 2010 Steffen Vogel, Chair for Operating Systems,
* RWTH Aachen University
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* This file is part of MetalSVM.
*/
#include <metalsvm/malloc.h>
#include <metalsvm/spinlock.h>
#include <metalsvm/stdio.h>
#include <metalsvm/mmu.h>
/// A linked list for each binary size exponent
static buddy_t* buddy_lists[BUDDY_LISTS] = { NULL };
/// Lock for the buddy lists
static spinlock_t buddy_lock = SPINLOCK_INIT;
/** @brief Check if larger free buddies are available */
static inline int buddy_large_avail(uint8_t exp)
{
while (exp<BUDDY_MAX && !buddy_lists[exp-BUDDY_MIN])
exp++;
return exp != BUDDY_MAX;
}
/** @brief Calculate the required buddy size */
static inline int buddy_exp(size_t sz)
{
int exp;
for (exp=0; sz>(1<<exp); exp++);
if (exp > BUDDY_MAX)
exp = 0;
if (exp < BUDDY_MIN)
exp = BUDDY_MIN;
return exp;
}
/** @brief Get a free buddy by potentially splitting a larger one */
static buddy_t* buddy_get(int exp)
{
spinlock_lock(&buddy_lock);
buddy_t** list = &buddy_lists[exp-BUDDY_MIN];
buddy_t* buddy = *list;
buddy_t* split;
if (buddy)
// there is already a free buddy =>
// we remove it from the list
*list = buddy->next;
else if (exp >= BUDDY_ALLOC && !buddy_large_avail(exp))
// theres no free buddy larger than exp =>
// we can allocate new memory
buddy = (buddy_t*) palloc(1<<exp, 0);
else {
// we recursivly request a larger buddy...
buddy = buddy_get(exp+1);
if (BUILTIN_EXPECT(!buddy, 0))
goto out;
// ... and split it, by putting the second half back to the list
split = (buddy_t*) ((size_t) buddy + (1<<exp));
split->next = *list;
*list = split;
}
out:
spinlock_unlock(&buddy_lock);
return buddy;
}
/** @brief Put a buddy back to its free list
*
* TODO: merge adjacent buddies (memory compaction)
*/
static void buddy_put(buddy_t* buddy)
{
spinlock_lock(&buddy_lock);
buddy_t** list = &buddy_lists[buddy->prefix.exponent-BUDDY_MIN];
buddy->next = *list;
*list = buddy;
spinlock_unlock(&buddy_lock);
}
void buddy_dump()
{
size_t free = 0;
int i;
for (i=0; i<BUDDY_LISTS; i++) {
buddy_t* buddy;
int exp = i+BUDDY_MIN;
if (buddy_lists[i])
kprintf("buddy_list[%u] (exp=%u, size=%lu bytes):\n", i, exp, 1<<exp);
for (buddy=buddy_lists[i]; buddy; buddy=buddy->next) {
kprintf(" %p -> %p \n", buddy, buddy->next);
free += 1<<exp;
}
}
kprintf("free buddies: %lu bytes\n", free);
}
void* palloc(size_t sz, uint32_t flags)
{
size_t phyaddr, viraddr;
uint32_t npages = PAGE_FLOOR(sz) >> PAGE_BITS;
kprintf("palloc(%lu) (%lu pages)\n", sz, npages); // TODO: remove
// get free virtual address space
viraddr = vma_alloc(npages*PAGE_SIZE, VMA_HEAP);
if (BUILTIN_EXPECT(!viraddr, 0))
return NULL;
// get continous physical pages
phyaddr = get_pages(npages);
if (BUILTIN_EXPECT(!phyaddr, 0)) {
vma_free(viraddr, viraddr+npages*PAGE_SIZE);
return NULL;
}
// map physical pages to VMA
viraddr = map_region(viraddr, phyaddr, npages, flags);
if (BUILTIN_EXPECT(!viraddr, 0)) {
vma_free(viraddr, viraddr+npages*PAGE_SIZE);
put_pages(phyaddr, npages);
return NULL;
}
return (void*) viraddr;
}
void pfree(void* addr, size_t sz)
{
if (BUILTIN_EXPECT(!addr || !sz, 0))
return;
size_t i;
size_t phyaddr;
size_t viraddr = (size_t) addr & PAGE_MASK;
uint32_t npages = PAGE_FLOOR(sz) >> PAGE_BITS;
// memory is propably not continously mapped!
for (i=0; i<npages; i++) {
phyaddr = virt_to_phys(viraddr+i*PAGE_SIZE);
put_page(phyaddr);
}
unmap_region(viraddr, npages);
vma_free(viraddr, viraddr+npages*PAGE_SIZE);
}
void* kmalloc(size_t sz)
{
if (BUILTIN_EXPECT(!sz, 0))
return NULL;
// add space for the prefix
sz += sizeof(buddy_t);
kprintf("kmalloc(%lu)\n", sz); // TODO: remove
int exp = buddy_exp(sz);
if (BUILTIN_EXPECT(!exp, 0))
return NULL;
buddy_t* buddy = buddy_get(exp);
if (BUILTIN_EXPECT(!buddy, 0))
return NULL;
// setup buddy prefix
buddy->prefix.magic = BUDDY_MAGIC;
buddy->prefix.exponent = exp;
// pointer arithmetic: we hide the prefix
return buddy+1;
}
void kfree(void *addr)
{
if (BUILTIN_EXPECT(!addr, 0))
return;
buddy_t* buddy = (buddy_t*) addr - 1; // get prefix
// check magic
if (BUILTIN_EXPECT(buddy->prefix.magic != BUDDY_MAGIC, 0))
return;
buddy_put(buddy);
}

View file

@ -37,17 +37,15 @@
#endif
/*
* 0 => free
* 1 => occupied
*
* Set whole address space as occupied
* Set whole address space as occupied:
* 0 => free, 1 => occupied
*/
static uint8_t bitmap[BITMAP_SIZE]; // = {[0 ... BITMAP_SIZE-1] = 0xFF};
static spinlock_t bitmap_lock = SPINLOCK_INIT;
static size_t alloc_start;
atomic_int32_t total_pages = ATOMIC_INIT(0);
atomic_int32_t total_allocated_pages = ATOMIC_INIT(0);
atomic_int32_t total_available_pages = ATOMIC_INIT(0);
static uint8_t bitmap[BITMAP_SIZE] = {[0 ... BITMAP_SIZE-1] = 0xFF};
static spinlock_t bitmap_lock = SPINLOCK_INIT;
atomic_int32_t total_pages = ATOMIC_INIT(0);
atomic_int32_t total_allocated_pages = ATOMIC_INIT(0);
atomic_int32_t total_available_pages = ATOMIC_INIT(0);
/*
* Note that linker symbols are not variables, they have no memory allocated for
@ -64,20 +62,12 @@ inline static int page_marked(size_t i)
return (bitmap[index] & (1 << mod));
}
inline static int page_unmarked(size_t i)
{
return !page_marked(i);
}
inline static void page_set_mark(size_t i)
{
size_t index = i >> 3;
size_t mod = i & 0x7;
//if (page_marked(i))
// kprintf("page %u is alread marked\n", i);
bitmap[index] = bitmap[index] | (1 << mod);
bitmap[index] = bitmap[index] | (1 << mod);
}
inline static void page_clear_mark(size_t i)
@ -85,178 +75,16 @@ inline static void page_clear_mark(size_t i)
size_t index = i / 8;
size_t mod = i % 8;
if (page_unmarked(i))
kprintf("page %u is already unmarked\n", i);
bitmap[index] = bitmap[index] & ~(1 << mod);
}
int mmu_init(void)
{
size_t kernel_size;
unsigned int i;
size_t addr;
int ret = 0;
// at first, set default value of the bitmap
memset(bitmap, 0xFF, sizeof(uint8_t)*BITMAP_SIZE);
#ifdef CONFIG_MULTIBOOT
if (mb_info && (mb_info->flags & MULTIBOOT_INFO_MEM_MAP)) {
size_t end_addr;
multiboot_memory_map_t* mmap = (multiboot_memory_map_t*) ((size_t) mb_info->mmap_addr);
multiboot_memory_map_t* mmap_end = (void*) ((size_t) mb_info->mmap_addr + mb_info->mmap_length);
while (mmap < mmap_end) {
if (mmap->type == MULTIBOOT_MEMORY_AVAILABLE) {
/* set the available memory as "unused" */
addr = mmap->addr;
end_addr = addr + mmap->len;
while (addr < end_addr) {
page_clear_mark(addr >> PAGE_SHIFT);
addr += PAGE_SIZE;
atomic_int32_inc(&total_pages);
atomic_int32_inc(&total_available_pages);
}
}
mmap++;
}
} else {
kputs("Unable to initialize the memory management subsystem\n");
while(1) {
HALT;
}
}
#elif defined(CONFIG_ROCKCREEK)
/* of course, the first slots belong to the private memory */
for(addr=0x00; addr<1*0x1000000; addr+=PAGE_SIZE) {
page_clear_mark(addr >> PAGE_SHIFT);
if (addr > addr + PAGE_SIZE)
break;
atomic_int32_inc(&total_pages);
atomic_int32_inc(&total_available_pages);
}
// Note: The last slot belongs always to the private memory.
for(addr=0xFF000000; addr<0xFFFFFFFF; addr+=PAGE_SIZE) {
page_clear_mark(addr >> PAGE_SHIFT);
if (addr > addr + PAGE_SIZE)
break;
atomic_int32_inc(&total_pages);
atomic_int32_inc(&total_available_pages);
}
/*
* Mark the bootinfo as used.
*/
page_set_mark((size_t)bootinfo >> PAGE_SHIFT);
atomic_int32_inc(&total_allocated_pages);
atomic_int32_dec(&total_available_pages);
#else
#error Currently, MetalSVM supports only the Multiboot specification or the RockCreek processor!
#endif
kernel_size = (size_t) &kernel_end - (size_t) &kernel_start;
if (kernel_size & (PAGE_SIZE-1))
kernel_size += PAGE_SIZE - (kernel_size & (PAGE_SIZE-1));
atomic_int32_add(&total_allocated_pages, kernel_size >> PAGE_SHIFT);
atomic_int32_sub(&total_available_pages, kernel_size >> PAGE_SHIFT);
/* set kernel space as used */
for(i=(size_t) &kernel_start >> PAGE_SHIFT; i < (size_t) &kernel_end >> PAGE_SHIFT; i++)
page_set_mark(i);
if ((size_t) &kernel_end & (PAGE_SIZE-1))
page_set_mark(i);
alloc_start = (size_t) &kernel_end >> PAGE_SHIFT;
if ((size_t) &kernel_end & (PAGE_SIZE-1))
alloc_start++;
#if MAX_CORES > 1
// reserve physical page for SMP boot code
page_set_mark(SMP_SETUP_ADDR >> PAGE_SHIFT);
atomic_int32_add(&total_allocated_pages, 1);
atomic_int32_sub(&total_available_pages, 1);
#endif
ret = paging_init();
if (ret) {
kprintf("Failed to initialize paging: %d\n", ret);
return ret;
}
#ifdef CONFIG_MULTIBOOT
/*
* Modules like the init ram disk are already loaded.
* Therefore, we set these pages as used.
*/
if (mb_info && (mb_info->flags & MULTIBOOT_INFO_MODS)) {
multiboot_module_t* mmodule = (multiboot_module_t*) ((size_t) mb_info->mods_addr);
/*
* Mark the mb_info as used.
*/
page_set_mark((size_t)mb_info >> PAGE_SHIFT);
atomic_int32_inc(&total_allocated_pages);
atomic_int32_dec(&total_available_pages);
for(addr = mb_info->mods_addr; addr < mb_info->mods_addr + mb_info->mods_count * sizeof(multiboot_module_t); addr += PAGE_SIZE) {
page_set_mark(addr >> PAGE_SHIFT);
atomic_int32_inc(&total_allocated_pages);
atomic_int32_dec(&total_available_pages);
}
for(i=0; i<mb_info->mods_count; i++, mmodule++) {
for(addr=mmodule->mod_start; addr<mmodule->mod_end; addr+=PAGE_SIZE) {
page_set_mark(addr >> PAGE_SHIFT);
atomic_int32_inc(&total_allocated_pages);
atomic_int32_dec(&total_available_pages);
}
}
}
#elif defined(CONFIG_ROCKCREEK)
/*
* Now, we are able to read the FPGA registers and to
* determine the number of slots for private memory.
*/
uint32_t slots = *((volatile uint8_t*) (FPGA_BASE + 0x8244));
if (slots == 0)
slots = 1;
kprintf("MetalSVM use %d slots for private memory\n", slots);
// define the residual private slots as free
for(addr=1*0x1000000; addr<slots*0x1000000; addr+=PAGE_SIZE) {
page_clear_mark(addr >> PAGE_SHIFT);
if (addr > addr + PAGE_SIZE)
break;
atomic_int32_inc(&total_pages);
atomic_int32_inc(&total_available_pages);
}
/*
* The init ram disk are already loaded.
* Therefore, we set these pages as used.
*/
for(addr=bootinfo->addr; addr < bootinfo->addr+bootinfo->size; addr+=PAGE_SIZE) {
// This area is already mapped, so we need to virt_to_phys() these addresses.
page_set_mark(virt_to_phys(addr) >> PAGE_SHIFT);
atomic_int32_inc(&total_allocated_pages);
atomic_int32_dec(&total_available_pages);
}
#endif
return ret;
}
/*
* Use first fit algorithm to find a suitable physical memory region
*/
size_t get_pages(uint32_t npages)
{
uint32_t i, j, l;
uint32_t k = 0;
// first page is reserved
static size_t start = 1;
size_t i, j, l;
size_t k = 0;
size_t ret = 0;
if (BUILTIN_EXPECT(!npages, 0))
@ -266,7 +94,8 @@ size_t get_pages(uint32_t npages)
return ret;
spinlock_lock(&bitmap_lock);
i = alloc_start;
i = start;
next_try:
while((k < BITMAP_SIZE) && page_marked(i)) {
k++;
@ -284,7 +113,7 @@ next_try:
}
if (i+j >= BITMAP_SIZE) {
i = 0;
i = 1;
goto next_try;
}
@ -292,11 +121,12 @@ next_try:
goto oom;
ret = i*PAGE_SIZE;
//kprintf("alloc: ret 0x%x, i = %d, j = %d, npages = %d\n", ret, i, j, npages);
kprintf("get_pages: ret 0x%x, i = %d, j = %d, npages = %d\n", ret, i, j, npages); // TODO: remove
for(l=i; l<i+j; l++)
page_set_mark(l);
alloc_start = i+j;
start = i+j;
spinlock_unlock(&bitmap_lock);
atomic_int32_add(&total_allocated_pages, npages);
@ -310,88 +140,253 @@ oom:
return ret;
}
int put_page(size_t phyaddr)
int put_pages(size_t phyaddr, size_t npages)
{
uint32_t index = phyaddr >> PAGE_SHIFT;
if (BUILTIN_EXPECT(!phyaddr, 0))
if (BUILTIN_EXPECT(!phyaddr || !npages, 0))
return -EINVAL;
spinlock_lock(&bitmap_lock);
page_clear_mark(index);
spinlock_unlock(&bitmap_lock);
atomic_int32_sub(&total_allocated_pages, 1);
atomic_int32_add(&total_available_pages, 1);
return 0;
}
void* mem_allocation(size_t sz, uint32_t flags)
{
size_t phyaddr, viraddr;
uint32_t npages = sz >> PAGE_SHIFT;
if (sz & (PAGE_SIZE-1))
npages++;
phyaddr = get_pages(npages);
if (BUILTIN_EXPECT(!phyaddr, 0))
return 0;
viraddr = map_region(0, phyaddr, npages, flags);
return (void*) viraddr;
}
void* kmalloc(size_t sz)
{
return mem_allocation(sz, MAP_KERNEL_SPACE);
}
void kfree(void* addr, size_t sz)
{
uint32_t index, npages, i;
size_t phyaddr;
if (BUILTIN_EXPECT(!addr && !sz, 0))
return;
npages = sz >> PAGE_SHIFT;
if (sz & (PAGE_SIZE-1))
npages++;
size_t i, ret = 0;
size_t base = phyaddr >> PAGE_BITS;
spinlock_lock(&bitmap_lock);
for(i=0; i<npages; i++) {
unmap_region((size_t) addr+i*PAGE_SIZE, 1);
phyaddr = virt_to_phys((size_t) addr+i*PAGE_SIZE);
if (!phyaddr)
continue;
index = phyaddr >> PAGE_SHIFT;
page_clear_mark(index);
for (i=0; i<npages; i++) {
if (page_marked(base+i)) {
page_clear_mark(base+i);
ret++;
}
}
spinlock_unlock(&bitmap_lock);
vm_free((size_t) addr, npages);
atomic_int32_sub(&total_allocated_pages, ret);
atomic_int32_add(&total_available_pages, ret);
atomic_int32_sub(&total_allocated_pages, npages);
atomic_int32_add(&total_available_pages, npages);
kprintf("put_pages: phyaddr=%#lx, npages = %d, ret = %d\n", phyaddr, npages, ret); // TODO: remove
return ret;
}
void* create_stack(void)
int copy_pages(size_t pdest, size_t psrc, size_t npages)
{
return kmalloc(KERNEL_STACK_SIZE);
size_t viraddr;
size_t vdest, vsrc;
// allocate virtual memory areas
viraddr = vma_alloc(2*npages*PAGE_SIZE, VMA_HEAP);
if (BUILTIN_EXPECT(!viraddr, 0))
return -ENOMEM;
// map pages
vsrc = map_region(viraddr, psrc, npages, MAP_KERNEL_SPACE);
vdest = map_region(viraddr+npages*PAGE_SIZE, pdest, npages, MAP_KERNEL_SPACE);
if (BUILTIN_EXPECT(!vsrc || !vdest, 0)) {
unmap_region(viraddr, 2*npages);
return -ENOMEM;
}
kprintf("copy_pages: copy %u pages from: %#lx (%#lx) to %#lx (%#lx)\n", npages, vsrc, psrc, vdest, pdest); // TODO remove
// copy the whole page
memcpy((void*) vdest, (void*) vsrc, npages*PAGE_SIZE);
// householding
unmap_region(viraddr, 2*npages);
vma_free(viraddr, viraddr+2*npages*PAGE_SIZE);
return pdest;
}
int destroy_stack(task_t* task)
int mmu_init(void)
{
if (BUILTIN_EXPECT(!task || !task->stack, 0))
return -EINVAL;
unsigned int i;
size_t addr;
int ret = 0;
kfree(task->stack, KERNEL_STACK_SIZE);
#ifdef CONFIG_MULTIBOOT
if (mb_info) {
if (mb_info->flags & MULTIBOOT_INFO_MEM_MAP) {
multiboot_memory_map_t* mmap = (multiboot_memory_map_t*) ((size_t) mb_info->mmap_addr);
multiboot_memory_map_t* mmap_end = (void*) ((size_t) mb_info->mmap_addr + mb_info->mmap_length);
return 0;
// mark available memory as free
while (mmap < mmap_end) {
if (mmap->type == MULTIBOOT_MEMORY_AVAILABLE) {
for (addr=mmap->addr; addr < mmap->addr + mmap->len; addr += PAGE_SIZE) {
page_clear_mark(addr >> PAGE_BITS);
atomic_int32_inc(&total_pages);
atomic_int32_inc(&total_available_pages);
}
}
mmap++;
}
}
else if (mb_info->flags & MULTIBOOT_INFO_MEM) {
size_t page;
size_t pages_lower = mb_info->mem_lower >> 2;
size_t pages_upper = mb_info->mem_upper >> 2;
for (page=0; page<pages_lower; page++)
page_clear_mark(page);
for (page=0x100000; page<pages_upper+0x100000; page++)
page_clear_mark(page);
atomic_int32_add(&total_pages, pages_lower + pages_upper);
atomic_int32_add(&total_available_pages, pages_lower + pages_upper);
}
else {
kputs("Unable to initialize the memory management subsystem\n");
while (1) HALT;
}
// mark mb_info as used
page_set_mark((size_t) mb_info >> PAGE_BITS);
atomic_int32_inc(&total_allocated_pages);
atomic_int32_dec(&total_available_pages);
// mark modules list as used
if (mb_info->flags & MULTIBOOT_INFO_MODS) {
for(addr=mb_info->mods_addr; addr<mb_info->mods_addr+mb_info->mods_count*sizeof(multiboot_module_t); addr+=PAGE_SIZE) {
page_set_mark(addr >> PAGE_BITS);
atomic_int32_inc(&total_allocated_pages);
atomic_int32_dec(&total_available_pages);
}
}
}
#elif defined(CONFIG_ROCKCREEK)
// of course, the first slots belong to the private memory
for(addr=0x00; addr<1*0x1000000; addr+=PAGE_SIZE) {
page_clear_mark(addr >> PAGE_BITS);
if (addr > addr + PAGE_SIZE)
break;
atomic_int32_inc(&total_pages);
atomic_int32_inc(&total_available_pages);
}
// Note: The last slot belongs always to the private memory.
for(addr=0xFF000000; addr<0xFFFFFFFF; addr+=PAGE_SIZE) {
page_clear_mark(addr >> PAGE_BITS);
if (addr > addr + PAGE_SIZE)
break;
atomic_int32_inc(&total_pages);
atomic_int32_inc(&total_available_pages);
}
// mark the bootinfo as used.
page_set_mark((size_t)bootinfo >> PAGE_BITS);
atomic_int32_inc(&total_allocated_pages);
atomic_int32_dec(&total_available_pages);
#else
#error Currently, MetalSVM supports only the Multiboot specification or the RockCreek processor!
#endif
// mark kernel as used
for(addr=(size_t) &kernel_start; addr<(size_t) &kernel_end; addr+=PAGE_SIZE) {
page_set_mark(addr >> PAGE_BITS);
atomic_int32_inc(&total_allocated_pages);
atomic_int32_dec(&total_available_pages);
}
#if MAX_CORES > 1
page_set_mark(SMP_SETUP_ADDR >> PAGE_BITS);
atomic_int32_inc(&total_allocated_pages);
atomic_int32_dec(&total_available_pages);
#endif
// enable paging and map SMP, VGA, Multiboot modules etc.
ret = paging_init();
if (ret) {
kprintf("Failed to initialize paging: %d\n", ret);
return ret;
}
// add kernel to VMA list
vma_add(PAGE_CEIL((size_t) &kernel_start),
PAGE_FLOOR((size_t) &kernel_end),
VMA_READ|VMA_WRITE|VMA_EXECUTE|VMA_CACHEABLE);
// add LAPIC tp VMA list
vma_add((size_t) &kernel_start - PAGE_SIZE,
(size_t) &kernel_start,
VMA_READ|VMA_WRITE);
#ifdef CONFIG_VGA
// add VGA to VMA list
vma_add(PAGE_CEIL(VIDEO_MEM_ADDR),
PAGE_FLOOR(VIDEO_MEM_ADDR) + PAGE_SIZE,
VMA_READ|VMA_WRITE);
#endif
#if MAX_CORES > 1
// reserve page for SMP boot code
vma_add(PAGE_CEIL(SMP_SETUP_ADDR),
PAGE_FLOOR(SMP_SETUP_ADDR) + PAGE_SIZE,
VMA_READ|VMA_WRITE|VMA_EXECUTE|VMA_CACHEABLE);
#endif
#ifdef CONFIG_MULTIBOOT
/*
* Modules like the init ram disk are already loaded.
* Therefore, we set these pages as used.
*/
if (mb_info) {
vma_add(PAGE_CEIL((size_t) mb_info),
PAGE_FLOOR((size_t) mb_info + sizeof(multiboot_info_t)),
VMA_READ|VMA_CACHEABLE);
if (mb_info->flags & MULTIBOOT_INFO_MODS) {
multiboot_module_t* mmodule = (multiboot_module_t*) ((size_t) mb_info->mods_addr);
vma_add(PAGE_CEIL((size_t) mb_info->mods_addr),
PAGE_FLOOR((size_t) mb_info->mods_addr + mb_info->mods_count*sizeof(multiboot_module_t)),
VMA_READ|VMA_CACHEABLE);
for(i=0; i<mb_info->mods_count; i++) {
vma_add(PAGE_CEIL(mmodule[i].mod_start),
PAGE_FLOOR(mmodule[i].mod_end),
VMA_READ|VMA_WRITE|VMA_CACHEABLE);
for(addr=mmodule[i].mod_start; addr<mmodule[i].mod_end; addr+=PAGE_SIZE) {
page_set_mark(addr >> PAGE_BITS);
atomic_int32_inc(&total_allocated_pages);
atomic_int32_dec(&total_available_pages);
}
}
}
}
#elif defined(CONFIG_ROCKCREEK)
/*
* Now, we are able to read the FPGA registers and to
* determine the number of slots for private memory.
*/
uint32_t slots = *((volatile uint8_t*) (FPGA_BASE + 0x8244));
if (slots == 0)
slots = 1;
kprintf("MetalSVM use %d slots for private memory\n", slots);
// define the residual private slots as free
for(addr=1*0x1000000; addr<slots*0x1000000; addr+=PAGE_SIZE) {
page_clear_mark(addr >> PAGE_BITS);
if (addr > addr + PAGE_SIZE)
break;
atomic_int32_inc(&total_pages);
atomic_int32_inc(&total_available_pages);
}
/*
* The init ram disk are already loaded.
* Therefore, we set these pages as used.
*/
for(addr=bootinfo->addr; addr<bootinfo->addr+bootinfo->size; addr+=PAGE_SIZE) {
// this area is already mapped, so we need to virt_to_phys() these addresses.
page_set_mark(virt_to_phys(addr) >> PAGE_BITS);
atomic_int32_inc(&total_allocated_pages);
atomic_int32_dec(&total_available_pages);
}
#endif
return ret;
}

341
mm/vma.c
View file

@ -1,5 +1,5 @@
/*
* Copyright 2011 Stefan Lankes, Chair for Operating Systems,
* Copyright 2011 Steffen Vogel, Chair for Operating Systems,
* RWTH Aachen University
*
* Licensed under the Apache License, Version 2.0 (the "License");
@ -17,85 +17,318 @@
* This file is part of MetalSVM.
*/
#include <metalsvm/vma.h>
#include <metalsvm/stdlib.h>
#include <metalsvm/stdio.h>
#include <metalsvm/tasks_types.h>
#include <metalsvm/spinlock.h>
#include <metalsvm/vma.h>
#include <metalsvm/errno.h>
/*
* add a new virtual memory region to the list of VMAs
* Kernel space VMA list and lock
*
* For bootstrapping we initialize the VMA list with one empty VMA
* (start == end) and expand this VMA by calls to vma_alloc()
*/
int vma_add(task_t* task, size_t start, size_t end, uint32_t type)
static vma_t vma_boot = { VMA_KERN_MIN, VMA_KERN_MIN, VMA_HEAP };
static vma_t* vma_list = &vma_boot;
static spinlock_t vma_lock = SPINLOCK_INIT;
size_t vma_alloc(size_t size, uint32_t flags)
{
vma_t* new_vma;
if (BUILTIN_EXPECT(!task || start > end, 0))
task_t* task = per_core(current_task);
spinlock_t* lock;
vma_t** list;
kprintf("vma_alloc(0x%lx, 0x%x)\n", size, flags);
size_t base, limit; // boundaries for search
size_t start, end;
if (BUILTIN_EXPECT(!size, 0))
return 0;
if (flags & VMA_USER) {
base = VMA_KERN_MAX;
limit = VMA_USER_MAX;
list = &task->vma_list;
lock = &task->vma_lock;
}
else {
base = VMA_KERN_MIN;
limit = VMA_KERN_MAX;
list = &vma_list;
lock = &vma_lock;
}
spinlock_lock(lock);
// first fit search for free memory area
vma_t* pred = NULL; // vma before current gap
vma_t* succ = *list; // vma after current gap
do {
start = (pred) ? pred->end : base;
end = (succ) ? succ->start : limit;
if (end > start && end - start > size)
break; // we found a gap
pred = succ;
succ = (succ) ? succ->next : NULL;
} while (pred || succ);
if (BUILTIN_EXPECT(end > limit || end < start || end - start < size, 0)) {
spinlock_unlock(lock);
return 0;
}
if (pred && pred->flags == flags) {
pred->end = start+size;
}
else {
// insert new VMA
vma_t* new = kmalloc(sizeof(vma_t));
if (BUILTIN_EXPECT(!new, 0)) {
spinlock_unlock(lock);
return 0;
}
new->start = start;
new->end = start+size;
new->flags = flags;
new->next = succ;
new->prev = pred;
if (succ)
succ->prev = new;
if (pred)
pred->next = new;
else
*list = new;
}
spinlock_unlock(lock);
return start;
}
int vma_free(size_t start, size_t end)
{
task_t* task = per_core(current_task);
spinlock_t* lock;
vma_t* vma;
vma_t** list;
if (BUILTIN_EXPECT(start >= end, 0))
return -EINVAL;
new_vma = kmalloc(sizeof(new_vma));
if (!new_vma)
if (end < VMA_KERN_MAX) {
lock = &vma_lock;
list = &vma_list;
}
else if (start >= VMA_KERN_MAX) {
lock = &task->vma_lock;
list = &task->vma_list;
}
if (BUILTIN_EXPECT(!*list, 0))
return -EINVAL;
spinlock_lock(lock);
// search vma
vma = *list;
while (vma) {
if (start >= vma->start && end <= vma->end) break;
vma = vma->next;
}
if (BUILTIN_EXPECT(!vma, 0)) {
spinlock_unlock(lock);
return -EINVAL;
}
// free/resize vma
if (start == vma->start && end == vma->end) {
if (vma == *list)
*list = vma->next; // update list head
if (vma->prev)
vma->prev->next = vma->next;
if (vma->next)
vma->next->prev = vma->prev;
kfree(vma);
}
else if (start == vma->start)
vma->start = end;
else if (end == vma->end)
vma->end = start;
else {
vma_t* new = kmalloc(sizeof(vma_t));
if (BUILTIN_EXPECT(!new, 0)) {
spinlock_unlock(lock);
return -ENOMEM;
}
new->end = vma->end;
vma->end = start;
new->start = end;
new->next = vma->next;
vma->next = new;
new->prev = vma;
}
spinlock_unlock(lock);
return 0;
}
int vma_add(size_t start, size_t end, uint32_t flags)
{
task_t* task = per_core(current_task);
spinlock_t* lock;
vma_t** list;
kprintf("vma_add(0x%lx, 0x%lx, 0x%x)\n", start, end, flags);
if (BUILTIN_EXPECT(start >= end, 0))
return -EINVAL;
if (flags & VMA_USER) {
list = &task->vma_list;
lock = &task->vma_lock;
// check if address is in userspace
if (BUILTIN_EXPECT(start < VMA_KERN_MAX, 0))
return -EINVAL;
}
else {
list = &vma_list;
lock = &vma_lock;
// check if address is in kernelspace
if (BUILTIN_EXPECT(end >= VMA_KERN_MAX, 0))
return -EINVAL;
}
spinlock_lock(lock);
// search gap
vma_t* pred = NULL;
vma_t* succ = *list;
do {
if ((!pred || pred->end <= start) &&
(!succ || succ->start >= end))
break;
pred = succ;
succ = succ->next;
} while (pred || succ);
// TODO: check bounds
// insert new VMA
vma_t* new = kmalloc(sizeof(vma_t));
if (BUILTIN_EXPECT(!new, 0)) {
spinlock_unlock(lock);
return -ENOMEM;
spinlock_lock(&task->vma_lock);
new_vma->start = start;
new_vma->end = end;
new_vma->type = type;
if (!(task->vma_list)) {
new_vma->next = new_vma->prev = NULL;
task->vma_list = new_vma;
} else {
vma_t* tmp = task->vma_list;
while (tmp->next && tmp->start < start)
tmp = tmp->next;
new_vma->next = tmp->next;
new_vma->prev = tmp;
tmp->next = new_vma;
}
spinlock_unlock(&task->vma_lock);
new->start = start;
new->end = end;
new->flags = flags;
new->next = succ;
new->prev = pred;
if (succ)
succ->prev = new;
if (pred)
pred->next = new;
else
*list = new;
spinlock_unlock(lock);
return 0;
}
int vma_dump(task_t* task)
int copy_vma_list(task_t* task)
{
vma_t* tmp;
task_t* parent_task = per_core(current_task);
if (BUILTIN_EXPECT(!task, 0))
return -EINVAL;
spinlock_init(&task->vma_lock);
spinlock_lock(&parent_task->vma_lock);
spinlock_lock(&task->vma_lock);
vma_t* last = NULL;
vma_t* parent = parent_task->vma_list;
while (parent) {
vma_t *new = kmalloc(sizeof(vma_t));
if (BUILTIN_EXPECT(!new, 0)) {
spinlock_unlock(&task->vma_lock);
spinlock_unlock(&parent_task->vma_lock);
return -ENOMEM;
}
new->start = parent->start;
new->end = parent->end;
new->flags = parent->flags;
new->prev = last;
if (last)
last->next = new;
else
task->vma_list = new;
last = new;
parent = parent->next;
}
return 0;
}
int drop_vma_list()
{
task_t* task = per_core(current_task);
vma_t* vma;
kprintf("drop_vma_list: task = %u\n", task->id); // TODO: remove
spinlock_lock(&task->vma_lock);
tmp = task->vma_list;
while (tmp) {
kprintf("%8x - %8x: ", tmp->start, tmp->end);
if (tmp->type & VMA_READ)
kputs("r");
else
kputs("-");
if (tmp->type & VMA_WRITE)
kputs("w");
else
kputs("-");
if (tmp->type & VMA_EXECUTE)
kputs("x");
else
kputs("-");
kputs("\n");
tmp = tmp->next;
while (vma = task->vma_list) {
task->vma_list = vma->next;
kfree(vma);
}
spinlock_unlock(&task->vma_lock);
return 0;
}
void vma_dump()
{
void print_vma(vma_t *vma) {
while (vma) {
kprintf("0x%lx - 0x%lx: size=%x, flags=%c%c%c\n", vma->start, vma->end, vma->end - vma->start,
(vma->flags & VMA_READ) ? 'r' : '-',
(vma->flags & VMA_WRITE) ? 'w' : '-',
(vma->flags & VMA_EXECUTE) ? 'x' : '-');
vma = vma->next;
}
}
task_t* task = per_core(current_task);
kputs("Kernelspace VMAs:\n");
spinlock_lock(&vma_lock);
print_vma(vma_list);
spinlock_unlock(&vma_lock);
kputs("Userspace VMAs:\n");
spinlock_lock(&task->vma_lock);
print_vma(task->vma_list);
spinlock_unlock(&task->vma_lock);
}

View file

@ -68,7 +68,7 @@ int main(int argc, char** argv)
exit(1);
}
testdirent = readdir(testdir);
printf("1. Dirent: %s", testdirent->d_name);
printf("1. Dirent: %s\n", testdirent->d_name);
closedir(testdir);
return errno;

View file

@ -20,41 +20,82 @@
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <unistd.h>
#include <fcntl.h>
#include <errno.h>
#include <dirent.h>
#include <time.h>
#include <sys/times.h>
void sleep(int sec) {
struct tms tms;
clock_t t, s = times(&tms);
do {
t = times(&tms);
}
while (t - s <= 1000 * sec);
}
int print_usage() {
printf("usage: [size mb/kb/b]");
exit(0);
printf("usage: size mb/kb/b [chunks]\n");
exit(-1);
}
int main(int argc, char** argv)
{
int m = 0;
uint32_t size = 0;
if(argc <= 2)
print_usage();
if(argc == 3) {
if(!strcmp(argv[2], "mb"))
m = 1024*1024;
else if(!strcmp(argv[2], "kb"))
m = 1024;
else if(!strcmp(argv[2], "b"))
m = 0;
else
print_usage();
}
if(argc > 3)
print_usage();
size = atoi(argv[1]);
if(size <= 0)
int multp = 0;
int size = 0;
int chunks = 1;
void **test;
if (argc <= 2 || argc > 4)
print_usage();
size *= m;
uint8_t* test = malloc(size);
printf("malloc(%d) - START: %p END: %p \n", size, test, test + size);
size = atoi(argv[1]);
if (size <= 0)
print_usage();
if (!strcasecmp(argv[2], "mb"))
multp = (1 << 20);
else if (!strcasecmp(argv[2], "kb"))
multp = (1 << 10);
else if (!strcasecmp(argv[2], "b"))
multp = (1 << 0);
else
print_usage();
size *= multp;
if (argc == 4)
chunks = atoi(argv[3]);
test = malloc(chunks * sizeof(void *));
printf("malloc(%lu)\n", chunks * sizeof(void *));
if (!test) {
printf("malloc(%lu) - FAILED!\n", chunks * sizeof(void *));
exit(-1);
}
// allocate...
for (i = 0; i < chunks; i++) {
test[i] = malloc(size);
if (test[i])
printf("malloc(%d)\tCHUNK: %d START: %p END: %p\n", size, i, test[i], test[i] + size);
else
printf("malloc(%d)\tFAILED! Abort allocation, start with freeing memory\n", size);
sleep(1);
}
// and release again
for (i = 0; i < chunks; i++) {
if (test[i]) {
free(test[i]);
printf("free(%p)\tCHUNK: %d\n", test[i], i);
}
sleep(1);
}
free(test);
printf("free(%p)\n", test);
return 0;
}

View file

@ -56,9 +56,8 @@ L1:
call rax
L2:
; register a function to be called at normal process termination
push __do_global_dtors
mov rdi, __do_global_dtors
call atexit
pop rax
; call init function
call __do_global_ctors
@ -76,13 +75,17 @@ L4:
; arguments are already on the stack
; call the user's function
pop rdi ; argc
pop rsi ; argv pointer
pop rdx ; env pointer
call main
; call exit from the C library so atexit gets called, and the
; C++ destructors get run. This calls our exit routine below
; when it's done.
; call "exit"
push rax
mov rdi, rax
call exit
; endless loop

View file

@ -85,7 +85,7 @@ syscall(int nr, unsigned long arg0, unsigned long arg1, unsigned long arg2,
asm volatile (_SYSCALLSTR(INT_SYSCALL)
: "=a" (res)
: "0" (nr), "b" (arg0), "c" (arg1), "d" (arg2), "S" (arg3), "D" (arg4)
: "D" (nr), "S" (arg0), "d" (arg1), "c" (arg2), "b" (arg3), "a" (arg4)
: "memory", "cc");
return res;

View file

@ -2,6 +2,17 @@
symbol-file metalsvm.sym
target remote localhost:1234
# Configure breakpoints and everything as you wish here.
break main
# Debugging 32bit code
#set architecture i386
#break stublet
#continue
# Debugging 64bit code
set architecture i386:x86-64
# Debugging userspace
#add-symbol-file newlib/examples/memtest.sym 0x40200000
#break main
#continue # skip kernel main()
continue