Compare commits

..

154 commits

Author SHA1 Message Date
a5087905df simplified example Makefile
(now the same one as in my eduOS repo)
2014-08-14 20:18:52 +02:00
2f0dc49a7e removed temporary and automatically generated files 2014-08-14 20:07:47 +02:00
1ce6a27fc5 applied some bugfixes from eduOS 2014-06-30 00:37:05 +02:00
7d756306cf removed more 32 bit only paging code which is broken anyway 2014-06-14 17:57:52 +02:00
4f5e2ce13b heading towards merging the 32/64 paging code 2014-05-14 18:56:15 +02:00
e2c8222f86 fixed Makefile for 32 bit building 2014-05-14 18:54:37 +02:00
ff130c4538 moved VMA initialization of APIC and MultiProcessing related stuff to apic.c 2014-05-14 18:01:58 +02:00
8180734e19 some more @authors 2014-05-14 17:43:51 +02:00
44f6905e4d added missing vma region 2014-05-14 17:43:20 +02:00
6f5a7157bb rewrite of multiprocessing table parsing 2014-05-14 17:43:03 +02:00
be3749739d smaller fix for membench 2014-05-14 17:41:36 +02:00
8287ab79d3 fixed SMP with new paging code 2014-05-14 15:17:34 +02:00
1f641e440c simplified macros 2014-05-14 15:13:11 +02:00
c1e52dc8c2 updated PMC driver 2014-05-14 15:12:02 +02:00
d358a78e56 added comments and @author 2014-05-14 15:11:02 +02:00
1d70e0781e added simple stride/range membench 2014-04-23 18:37:34 +02:00
255a0f1636 fixed deadlock 2014-04-23 18:36:35 +02:00
9f7295672f fixed rdtsc on x86_64 (upper 32bits were missing) 2014-04-23 18:36:23 +02:00
cb26b6fe01 fixed typo 2014-04-22 16:59:54 +02:00
ab35409aac the LAPIC page should be marked as not executable 2014-04-15 16:57:03 +02:00
00127d06cf some cleanup 2014-04-15 16:28:02 +02:00
6bd2f64687 set PG_GLOBAL flag on pagetables
this leads to global self-mapped page tables
and therefore reduces to effect of TLB shootdowns.
2014-04-15 16:27:16 +02:00
8cf888bba2 changed arguments of *_vma_list() functions 2014-04-07 15:54:09 +02:00
f3729186e5 fixed bug in vma_alloc boundary checks 2014-04-07 15:53:35 +02:00
91e725bfce added example for memory watchpoint 2014-02-18 13:13:10 +01:00
273137f2e7 fixed sys_fork and sys_execve for new paging code 2014-02-18 13:12:19 +01:00
314fa85389 changed page_stats and page_dump to new prototype 2014-02-18 13:09:25 +01:00
385d14ef67 we do not need executable stacks anymore :-) 2014-02-18 13:08:53 +01:00
3203d53a83 another rewrite of the paging code without nested functions 2014-02-18 13:08:22 +01:00
6e8ecad91f disable irqs during rdtsc bench
wait for child task to terminate
2014-02-18 13:06:51 +01:00
02b312e360 changed memory regions and flags 2014-02-18 13:05:59 +01:00
61ec065da1 some smaller codestyle improvements 2014-02-18 13:03:52 +01:00
7a08120fd2 moved VMA initialization to vma_init() 2014-02-18 12:50:59 +01:00
aa6abef955 added missing kernel VMA regions 2014-02-18 11:11:02 +01:00
39bb5e8d56 fixed mapping for APIC and IOAPIC 2014-02-18 11:06:13 +01:00
94b8897d58 added some debugging printfs() 2014-02-07 11:04:12 +01:00
aa74d98737 removed unused variable 2014-02-07 11:01:21 +01:00
eb0298fc2f moved some helper functions to page_helpers.h (mostly static inline address calculation) 2014-02-07 11:01:10 +01:00
82d681f4a7 added missing #include for uart output 2014-02-07 10:56:52 +01:00
d51237f3f2 fixed missing page map: Multiboot mmap_* fields are not required to be on the same page as the Multiboot info structure itself 2014-02-07 10:56:04 +01:00
93cfc43c55 fixed little bug in bit mask 2014-02-07 10:54:27 +01:00
580007945e statically allocate VMA for copy_page() 2014-02-03 10:28:24 +01:00
990d39afea added inheritance of BIT for makefiles 2014-02-03 10:01:06 +01:00
2f903deebd fixed vma_add() when area is already in use 2014-02-03 09:58:32 +01:00
96e7a99b38 added iPXE script with a short comment about the hardware environment 2014-02-03 09:44:08 +01:00
4ec5733de0 fixing huge initrd by defaulting to a smaller linker page-size 2014-02-03 09:43:49 +01:00
0e6f7b1e32 added more exception names 2014-01-30 14:38:08 +01:00
a755ac5d5e implemented Performance Monitoring Counters and added a simple test benchmark 2014-01-30 14:36:22 +01:00
c70c488336 added userspace testcase for fork() and execve() syscalls 2014-01-28 14:36:59 +01:00
e740cf265e removed obsolete heap_{start,end} from task_t 2014-01-28 11:13:57 +01:00
c6d5656c63 moved list_fs() dump routine to fs/ and improved output 2014-01-28 11:10:54 +01:00
a3d621142a added paging and memory related CPUID feature flags 2014-01-28 11:09:00 +01:00
f0f3a6d4f6 added seperate x86 UART routines for HW debugging (115200 baud, 8N1) 2014-01-28 11:07:09 +01:00
5a20e0953f we do not need syncronization for virt_to_phys (atomic memory access) 2014-01-09 16:55:05 +01:00
af9bda06f8 renamed mmu.h to memory.h as its not related to the MMU and contains
prototypes for memory.c
2014-01-09 16:20:18 +01:00
03d1505acc wait for userspace task to finish 2014-01-09 16:12:54 +01:00
d5f60ef542 fixed two smaller bugs and some compiler warnings 2014-01-09 16:12:13 +01:00
75633d0697 updated tests for memory subsystem 2014-01-09 14:17:50 +01:00
da54e953f9 added note to userspace debugging with gdb (dont forget to add debug flags for newlib
!)
2014-01-09 14:10:02 +01:00
594a454b1f updated userspace test application 2014-01-09 14:08:33 +01:00
98197ac076 fixed little bug in stdout handler (output appearing twice) 2014-01-09 14:06:55 +01:00
11977e40bc implemented userspace task loading and userpsace heap management with the new vma_list 2014-01-09 14:06:09 +01:00
a00177ec09 adapted c runtime and syscalls to x86-64 ABI calling convention 2014-01-09 14:04:02 +01:00
d7644300a8 code cleanup of task creation and some additions to the previous commit 2014-01-09 13:59:01 +01:00
6699886358 changed vma_list order 2014-01-09 13:45:20 +01:00
ab67350783 thats a huge commit: reimplemented all remaining page map functions according to the new page_iterate() 2014-01-09 13:44:20 +01:00
269bffc208 unified comment fontcase 2014-01-09 13:41:22 +01:00
88a2e573c6 replaced old 32bit only page_copy by new generic one 2014-01-09 13:33:21 +01:00
494ee1299a unified and extended pagefault exception handler
some cleanup
2014-01-09 13:32:00 +01:00
d5ac6e6092 added macros for x86 control registers & some MSRs 2014-01-09 13:13:59 +01:00
066e29fde9 added NX bit support
renamed some CPP macros
2014-01-09 12:49:04 +01:00
c21ea42058 added page_dump() and page_stats() for testing and demonstration of page_iterate() 2013-12-03 16:40:14 +01:00
8fe165c162 added page_iterate(): a recursive page tree walker 2013-12-03 16:34:34 +01:00
4514080014 enable global pages for the kernelspace 2013-12-03 15:54:58 +01:00
fff2708c5a move page map setup to arch_paging_init() 2013-12-03 15:52:16 +01:00
443ffdf012 replaced PAGE_ALIGN macro by PAGE_CEIL, PAGE_FLOOR pair 2013-12-03 15:29:05 +01:00
2923b1a7ed cleanup of macros and comments, idention etc... 2013-12-03 15:26:21 +01:00
edf178f39a Merge branch 'vogel' into x64_paging 2013-11-26 17:25:53 +01:00
949500fe6c moved mm subsystem tests to apps/memory.c 2013-11-26 17:24:03 +01:00
3f63d80b9c fixed some compiler warnings 2013-11-26 17:18:47 +01:00
60f8f53169 added test for new buddy malloc implementation 2013-11-20 14:12:57 +01:00
7a3e77c82d palloc()/pfree() replace our old kmalloc()/kfree() with PAGE_SIZE granularity 2013-11-20 14:11:19 +01:00
9018781eee replaced old kfree() calls with new signature 2013-11-20 14:11:19 +01:00
0153fb538d removed old kmalloc() 2013-11-20 14:11:19 +01:00
954ccf1379 added malloc.c to Makefile 2013-11-20 14:11:19 +01:00
1e98d0e410 added first implementation of a buddy system kmalloc() allocator 2013-11-20 14:11:18 +01:00
a972efe288 Merge branch 'vma_kernel' into vogel
Conflicts:
	kernel/tasks.c
2013-11-20 14:00:04 +01:00
1fea8eb13b Merge branch 'qemu_debug' into vogel 2013-11-20 13:54:23 +01:00
acc6e2124e disable VGA output and kill remaining processes 2013-11-20 13:51:03 +01:00
9db28ec380 using PAGE_ALIGN macro to calc pages 2013-11-20 13:43:18 +01:00
de33962e9d removed old vm_alloc()/vm_free() which have been replaced by vma_alloc()/vma_free() 2013-11-20 13:22:09 +01:00
71f55f0a89 ported userspace tasks to new VMA implementation (untested!) 2013-11-20 13:19:58 +01:00
76e52aa473 time is precious... 2013-11-20 12:06:53 +01:00
af5fa15d8d fixed possible wrap-around in tlb_flush functions 2013-11-20 12:04:55 +01:00
aa1730919e standardized comment format and code cleanup 2013-11-20 12:03:24 +01:00
707d7132c8 added test for the new VMA implementation 2013-11-20 11:30:04 +01:00
79c4f2703e setup kernelspace VMA list 2013-11-20 11:27:49 +01:00
3cd5a5853b added VMA list implementation 2013-11-20 11:26:55 +01:00
421e7ec66e added missing multiboot #defines 2013-11-20 11:20:52 +01:00
df99b4dfff some rewrite of mmu_init concerning the initialization of the memory bitmap 2013-11-20 11:18:10 +01:00
0d7aa3d0ca allow memory initialization without multiboot mmap 2013-11-20 11:15:10 +01:00
06877ff108 bitmap cleanup 2013-11-18 15:47:26 +01:00
fa07bdee53 improved tests for the paging system 2013-11-14 13:17:14 +01:00
ec171dfcce implemented map_region() (more testing needed; will propably replaced by a iterative solution) 2013-11-14 13:12:35 +01:00
892154c9f1 implemented drop_page_map() (more testing needed) 2013-11-14 13:09:56 +01:00
bbb8c5c186 implemented copy_page_frame() (more testing needed) 2013-11-14 13:09:31 +01:00
92b2badf71 implemented copy_page_map() (more testing needed) 2013-11-14 13:08:56 +01:00
cdcd9e7d20 implemented create_page_map() 2013-11-14 12:25:52 +01:00
14938ef7e1 added some helper functions to for the recursive mapping structures 2013-11-14 12:25:07 +01:00
4b485f5733 implemented virt_to_phys() 2013-11-14 12:23:42 +01:00
9441d21d89 more cleanup of old 32bit code relicts, typos and indention 2013-11-14 12:22:52 +01:00
90d884ec8d changed naming of 64bit boot paging tables according to 32bit boot tables
calculate required boot page tables as a function of KERNEL_SPACE
2013-10-25 12:02:04 +02:00
fca96e9851 unified paging preprocessor #defines and naming scheme 2013-10-24 12:36:05 +02:00
143de82f3d added test case for vm_alloc() and MAP_REMAP flag 2013-10-22 21:49:03 +02:00
cd57f5ec28 added kernel app to test the paging and mm subsystem 2013-10-22 21:30:30 +02:00
d59676dbf5 more work to make our assembly initialization more beautiful 2013-10-17 21:35:13 +02:00
3e73d6384e fixed regression 2013-10-17 13:09:20 +02:00
3be25b99d2 reorganized 64bit paging initialization 2013-10-17 11:36:02 +02:00
403c529e8b fixed copy&paste bug and moved cpu initialization in cpu_init() 2013-10-16 17:50:37 +02:00
Steffen Vogel
9b47b3ef45 refactored paging code to be more universial and suitable for 64bit paging 2013-10-16 17:50:37 +02:00
e290d41149 Merge branch 'memtest' into vogel 2013-10-16 17:36:13 +02:00
f361783f4b Merge branch 'cleanup' into vogel 2013-10-16 17:34:18 +02:00
6826e0374d replace awk script for NASM's config.inc by univerial sed scripts 2013-10-16 16:50:04 +02:00
3ee658d008 Merge branch 'qemu_debug' into vogel
Conflicts:
	Makefile.example
2013-10-16 15:15:31 +02:00
5ab075df9b added an example to debug 64bit code in gdb (QEmu is buggy when switching from 32 to 64bit code) 2013-10-16 15:13:04 +02:00
db21f7cf05 simplified Makefile for building 64bit code
and added new debug target (see L²P: Howto QEmu und GDB)y
2013-10-16 14:58:05 +02:00
dac9b20c18 some comments cleaned up 2013-10-16 13:42:58 +02:00
40e5d83217 Merge branch 'cleanup' into vogel 2013-10-10 11:51:31 +02:00
Steffen Vogel
2e230a609e added more output to vma_dump() 2013-10-10 11:50:38 +02:00
d275c0a00a added more output to vma_dump() 2013-10-10 11:50:38 +02:00
Steffen Vogel
b0749fc448 added some comments 2013-10-10 11:48:00 +02:00
2f2dd1d3c7 added some comments 2013-10-10 11:48:00 +02:00
Steffen Vogel
2f02db8dc0 fixed typo 2013-10-10 11:46:04 +02:00
9621509e78 fixed typo 2013-10-10 11:46:04 +02:00
Steffen Vogel
6b7b70903e removed non-existant header asm/mmu.h 2013-10-10 11:45:03 +02:00
2e62ee2966 removed non-existant header asm/mmu.h 2013-10-10 11:45:03 +02:00
Steffen Vogel
7ffaec04f2 added missing newline at end of printf() 2013-10-10 11:44:31 +02:00
38eb3d5167 added missing newline at end of printf() 2013-10-10 11:44:31 +02:00
ce66d261b5 Merge branch 'memtest' into vogel 2013-10-10 11:42:36 +02:00
e757ac5c08 Merge branch 'qemu_uart' into vogel 2013-10-10 11:42:27 +02:00
Steffen Vogel
e731d60256 debug kernel messages over virtual uart port in qemu
use 'telnet localhost 12346' to listen
2013-10-10 11:39:41 +02:00
5424397b47 debug kernel messages over virtual uart port in qemu
use 'telnet localhost 12346' to listen
2013-10-10 11:39:41 +02:00
Steffen Vogel
3c8de24349 fixed some typos, added comments and some code cleanup 2013-10-10 11:09:36 +02:00
1fc3e40c4e fixed some typos, added comments and some code cleanup 2013-10-10 11:09:36 +02:00
Steffen Vogel
ae1af7a053 fixed segmention fault
abort allocation after first malloc fail
2013-10-07 17:22:53 +02:00
16c65de934 fixed segmention fault
abort allocation after first malloc fail
2013-10-07 17:22:53 +02:00
Steffen Vogel
b3fa94b0e0 free chunks before terminate 2013-08-19 00:44:24 +02:00
feec2b7de8 free chunks before terminate 2013-08-19 00:44:24 +02:00
Steffen Vogel
e766295d68 sequential allocation of multiple chunks 2013-08-19 00:43:34 +02:00
9c85f88333 sequential allocation of multiple chunks 2013-08-19 00:43:34 +02:00
Steffen Vogel
264146a7e1 bugfix: invalid multp for single byte allocation 2013-08-19 00:37:05 +02:00
0376d06594 bugfix: invalid multp for single byte allocation 2013-08-19 00:37:05 +02:00
Steffen Vogel
030ba0d75f code cleanup 2013-08-19 00:35:30 +02:00
8159ad78d7 code cleanup 2013-08-19 00:35:30 +02:00
69 changed files with 4431 additions and 16749 deletions

1
.gitignore vendored
View file

@ -17,6 +17,7 @@ tools/make_initrd
tools/scc_bootinfo.asm
newlib/examples/hello
newlib/examples/memtest
newlib/examples/fork
newlib/examples/jacobi
newlib/examples/echo
newlib/examples/tests

View file

@ -1,8 +1,13 @@
TOPDIR = $(shell pwd)
ARCH = x86
# For 64bit support, you have define BIT as 64
BIT=32
NAME = metalsvm
# For 64bit support, you have define BIT as 64
# Note: do not forget to 'make veryclean' after changing BIT!!!
BIT = 64
ARCH = x86
SMP = 2
DEBUG = 1
TOPDIR = $(shell pwd)
LWIPDIRS = lwip/src/arch lwip/src/api lwip/src/core lwip/src/core/ipv4 lwip/src/netif
DRIVERDIRS = drivers/net drivers/char
KERNDIRS = libkern kernel mm fs apps arch/$(ARCH)/kernel arch/$(ARCH)/mm arch/$(ARCH)/scc $(LWIPDIRS) $(DRIVERDIRS)
@ -30,35 +35,64 @@ RANLIB_FOR_TARGET = $(CROSSCOMPREFIX)ranlib
STRIP_FOR_TARGET = $(CROSSCOMPREFIX)strip
READELF_FOR_TARGET = $(CROSSCOMPREFIX)readelf
# Tools
MAKE = make
RM = rm -rf
NASM = nasm
# For 64bit code, you have to use qemu-system-x86_64
QEMU = qemu-system-i386
GDB = gdb
# For 64bit support, you have to define -felf64 instead of -felf32
NASMFLAGS = -felf32 -g -i$(TOPDIR)/include/metalsvm/
INCLUDE = -I$(TOPDIR)/include -I$(TOPDIR)/arch/$(ARCH)/include -I$(TOPDIR)/lwip/src/include -I$(TOPDIR)/lwip/src/include/ipv4 -I$(TOPDIR)/drivers
# For 64bit support, you have to define "-m64 -mno-red-zone" instead of "-m32 -march=i586"
# Compiler options for final code
CFLAGS = -g -m32 -march=i586 -Wall -O2 -fstrength-reduce -fomit-frame-pointer -finline-functions -ffreestanding $(INCLUDE) $(STACKPROT)
# Compiler options for debuging
#CFLAGS = -g -O -m32 -march=i586 -Wall -fomit-frame-pointer -ffreestanding $(INCLUDE) $(STACKPROT)
ifeq ($(BIT), 32)
QEMU = qemu-system-i386
else ifeq ($(BIT), 64)
QEMU = qemu-system-x86_64
endif
INCLUDE = -I$(TOPDIR)/include \
-I$(TOPDIR)/arch/$(ARCH)/include \
-I$(TOPDIR)/lwip/src/include \
-I$(TOPDIR)/lwip/src/include/ipv4 \
-I$(TOPDIR)/drivers
GDBFLAGS = -x script.gdb
QEMUFLAGS = -smp $(SMP) -serial tcp::12346,server \
-nographic -monitor stdio \
-net nic,model=rtl8139 \
-net user,hostfwd=tcp::12345-:4711
NASMFLAGS = -felf$(BIT) -g -i$(TOPDIR)/include/metalsvm/
CFLAGS = -m$(BIT) -Wall -ffreestanding $(INCLUDE) $(STACKPROT)
ARFLAGS = rsv
LDFLAGS = -T link$(BIT).ld -z max-page-size=4096 --defsym __BUILD_DATE=$(shell date +'%Y%m%d') --defsym __BUILD_TIME=$(shell date +'%H%M%S')
LDFLAGS = -T link$(BIT).ld -z max-page-size=0x1000 --defsym __BUILD_DATE=$(shell date +'%Y%m%d') --defsym __BUILD_TIME=$(shell date +'%H%M%S')
STRIP_DEBUG = --strip-debug
KEEP_DEBUG = --only-keep-debug
# Do not change to elf64!
# The Multiboot spec can only boot elf32 binaries
OUTPUT_FORMAT = -O elf32-i386
# For 64bit support, you have to define -m64 instead of "-m32 -march=i586"
CFLAGS_FOR_NEWLIB = -m32 -march=i586 -O2 $(STACKPROT)
# For 64bit support, you have to define -m64 instead of "-m32 -march=i586"
LDFLAGS_FOR_NEWLIB = -m32 -march=i586
# For 64bit support, you have to define -m64 instead of "-m32"
CFLAGS_FOR_TOOLS = -m32 -O2 -Wall
CFLAGS_FOR_NEWLIB = -m$(BIT) $(STACKPROT)
LDFLAGS_FOR_NEWLIB = -m$(BIT)
CFLAGS_FOR_TOOLS = -m$(BIT) -O2 -Wall
LDFLAGS_FOR_TOOLS =
# For 64bit support, you have to define -felf64 instead of -felf32
NASMFLAGS_FOR_NEWLIB = -felf32
NASMFLAGS_FOR_NEWLIB = -felf$(BIT)
# Setup debugging flags here
ifndef DEBUG
CFLAGS += -O2 -fomit-frame-pointer -fstrength-reduce -finline-functions
CFLAGS_FOR_NEWLIB += -O2
else
CFLAGS += -O0 -gdwarf-2
CFLAGS_FOR_NEWLIB += -O0 -gdwarf-2
endif
ifeq ($(BIT), 32)
CFLAGS += -march=i586
CFLAGS_FOR_NEWLIB += -march=i586
LDFLAGS_FOR_NEWLIB += -march=i586
else ifeq ($(BIT), 64)
CFLAGS += -mno-red-zone
endif
# Prettify output
V = 0
@ -68,11 +102,15 @@ ifeq ($V,0)
endif
default: all
all: newlib tools $(NAME).elf
newlib:
$(MAKE) ARCH=$(ARCH) BIT=$(BIT) LDFLAGS="$(LDFLAGS_FOR_NEWLIB)" CFLAGS="$(CFLAGS_FOR_NEWLIB)" NASMFLAGS="$(NASMFLAGS_FOR_NEWLIB)" CC_FOR_TARGET=$(CC_FOR_TARGET) \
$(MAKE) ARCH=$(ARCH) BIT=$(BIT) \
LDFLAGS="$(LDFLAGS_FOR_NEWLIB)" \
CFLAGS="$(CFLAGS_FOR_NEWLIB)" \
NASMFLAGS="$(NASMFLAGS_FOR_NEWLIB)" \
CC_FOR_TARGET=$(CC_FOR_TARGET) \
CXX_FOR_TARGET=$(CXX_FOR_TARGET) \
GCC_FOR_TARGET=$(GCC_FOR_TARGET) \
AR_FOR_TARGET=$(AR_FOR_TARGET) \
@ -86,7 +124,7 @@ newlib:
READELF_FOR_TARGET=$(READELF_FOR_TARGET) -C newlib
tools:
$(MAKE) CFLAGS="$(CFLAGS_FOR_TOOLS)" LDFLAGS="$(LDFLAGS_FOR_TOOLS)" -C tools
$(MAKE) ARCH=$(ARCH) BIT=$(BIT) CFLAGS="$(CFLAGS_FOR_TOOLS)" LDFLAGS="$(LDFLAGS_FOR_TOOLS)" -C tools
$(NAME).elf:
$Q$(LD_FOR_TARGET) $(LDFLAGS) -o $(NAME).elf $^
@ -96,23 +134,28 @@ $(NAME).elf:
$Q$(OBJCOPY_FOR_TARGET) $(STRIP_DEBUG) $(OUTPUT_FORMAT) $(NAME).elf
qemu: newlib tools $(NAME).elf
$(QEMU) -monitor stdio -smp 2 -net nic,model=rtl8139 -net user,hostfwd=tcp::12345-:4711 -net dump -kernel metalsvm.elf -initrd tools/initrd.img
$(QEMU) $(QEMUFLAGS) -kernel metalsvm.elf -initrd tools/initrd.img
qemudbg: newlib tools $(NAME).elf
$(QEMU) -s -S -smp 2 -net nic,model=rtl8139 -net user,hostfwd=tcp::12345-:4711 -net dump -kernel metalsvm.elf -initrd tools/initrd.img
debug: newlib tools $(NAME).elf
$(TERM) -e $(GDB) $(GDBFLAGS) &
$(TERM) -e telnet localhost 12346 &
$(TERM) -e telnet localhost 12345 &
$(QEMU) $(QEMUFLAGS) -s -S -kernel metalsvm.elf -initrd tools/initrd.img
gdb: $(NAME).elf
make qemudbg > /dev/null &
$(GDB) -x script.gdb
hwdebug: newlib tools $(NAME).elf
killall -qw mongoose cu || true
mongoose -p 8080 &
hwreset push reset 300
cu -s 115200 -l /dev/ttyUSB0
clean:
$Q$(RM) $(NAME).elf $(NAME).sym *~
$Q$(RM) $(NAME).elf $(NAME).sym *~ core core.*
$Q$(MAKE) -C tools clean
@echo Cleaned.
veryclean: clean
$Q$(MAKE) -C newlib veryclean
@echo Very cleaned
@echo Very cleaned.
#depend:
# for i in $(SUBDIRS); do $(MAKE) -k -C $$i depend; done
@ -124,16 +167,15 @@ veryclean: clean
$Q$(CPP_FOR_TARGET) -MF $*.dep -MT $*.o -MM -D__KERNEL__ $(CFLAGS) $<
include/metalsvm/config.inc: include/metalsvm/config.h
@echo "; This file is generated automatically from the config.h file." > include/metalsvm/config.inc
@echo "; Before editing this, you should consider editing config.h." >> include/metalsvm/config.inc
@awk '/^#define MAX_CORES/{ print "%define MAX_CORES", $$3 }' include/metalsvm/config.h >> include/metalsvm/config.inc
@awk '/^#define KERNEL_STACK_SIZE/{ print "%define KERNEL_STACK_SIZE", $$3 }' include/metalsvm/config.h >> include/metalsvm/config.inc
@awk '/^#define CONFIG_VGA/{ print "%define CONFIG_VGA", $$3 }' include/metalsvm/config.h >> include/metalsvm/config.inc
@echo "; This file is generated automatically from the config.h file." > $@
@echo "; Before editing this, you should consider editing config.h." >> $@
@sed -nre 's/^[\t ]*#define[\t ]+([a-z_0-9]+)([\t ]+.*)*/%define \1/ip' $< >> $@
@sed -nre 's/^[\t ]*#define[\t ]+([a-z_0-9]+)[\t ]+([a-z_0-9.]+)([\t ]+.*)*/%define \1 \2/ip' $< >> $@
%.o : %.asm include/metalsvm/config.inc
@echo [ASM] $@
$Q$(NASM) $(NASMFLAGS) -o $@ $<
.PHONY: default all clean emu gdb newlib tools
.PHONY: default all clean qemu qemudbg gdb debug newlib tools
include $(addsuffix /Makefile,$(SUBDIRS))

View file

@ -1,4 +1,4 @@
C_source := tests.c echo.c netio.c jacobi.c laplace.c gfx_client.c gfx_generic.c
C_source := tests.c echo.c netio.c jacobi.c laplace.c gfx_client.c gfx_generic.c memory.c membench.c
MODULE := apps
include $(TOPDIR)/Makefile.inc

175
apps/membench.c Normal file
View file

@ -0,0 +1,175 @@
/*
* Copyright 2014 Steffen Vogel, Chair for Operating Systems,
* RWTH Aachen University
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* This file is part of MetalSVM.
*/
/**
* @author Steffen Vogel <steffen.vogel@rwth-aachen.de>
*
* Based on:
* www.cs.berkeley.edu/~richie/bebop/notes/matmul/files/membench/
*/
#include <metalsvm/stdio.h>
#include <metalsvm/vma.h>
#include <metalsvm/malloc.h>
#include <asm/processor.h>
#include <asm/pmc.h>
#define SAMPLES 1000
#define CLKS 1000000000L
#define STRIDE_MIN 1
#define RANGE_MIN (32) /* 32 Byte */
#define RANGE_MAX (32*1024*1024) /* 32 MiB */
#define FLUSH 1
static inline void warmup(char* memory)
{
#ifdef FLUSH
tlb_flush();
flush_cache();
#else
int index;
for (index = 0; index < RANGE_MAX; index++) {
memory[index] = 0;
}
#endif
}
int membench(void* arg)
{
kprintf("======= Starting membench\n");
/* Init PMCs */
struct pmc_caps* cap = pmc_init();
kprintf("PMC architecural version: %u\n", cap->version);
kprintf("There are %u general purpose PMCs (%u bit wide) available\n", cap->gp_count, cap->gp_width);
kprintf("There are %u fixed function PMCs (%u bit wide) available\n", cap->ff_count, cap->ff_width);
int i;
uint16_t tests[][2] = {
#if 0
{PMC_EVT_MEM_LOAD_RETIRED_DTLB_MISS, 0}
{PMC_EVT_MEM_LOAD_RETIRED_L1D_MISS, 0}
{PMC_EVT_MEM_LOAD_RETIRED_L2_MISS, 0}
#elif 0
{PMC_EVT_PAGE_WALK_CLKS, PMC_EVT_PAGE_WALK_COUNT}
#else
{PMC_EVT_DTLB_MISS_ANY, PMC_EVT_DTLB_MISS_LD},
{PMC_EVT_DTLB_MISS_ANY, PMC_EVT_DTLB_MISS_L0_LD},
{PMC_EVT_DTLB_MISS_ANY, PMC_EVT_DTLB_MISS_ST},
#endif
};
char *memory = palloc(RANGE_MAX, 0);
kprintf("Allocated test memory: %#lx bytes at %p\n", RANGE_MAX, memory);
kprintf("#%12s%12s%12s%12s%12s%12s\n", "range", "stride", "steps", "runs", "reads", "results");
irq_disable();
/* Setup PMCs */
pmc_stop_all();
pmc_ff_config(1, PMC_FIXED_OS); // CPU_CLK_UNHALTED.CORE
/* Variables for PMC values */
uint64_t gp[2], ff[3];
uint64_t gp_ovh[2], ff_ovh[3];
int64_t gp_real[2], ff_real[3];
uint64_t run, steps, tsteps, index, runs, reads;
uint64_t range, stride;
volatile char temp;
/* Run the timing experiments */
for (range = RANGE_MIN; range <= RANGE_MAX; range *= 2) {
for (stride = STRIDE_MIN; stride < range; stride *= 2) {
runs = SAMPLES * stride / range + 1;
for (i = 0; i < sizeof(tests) / 4; i++) {
pmc_gp_config(0, tests[i][0], PMC_EVTSEL_OS | PMC_EVTSEL_EN, 0, 0);
pmc_gp_config(1, tests[i][1], PMC_EVTSEL_OS | PMC_EVTSEL_EN, 0, 0);
steps = 0;
tsteps = 0;
/* Time the loop with strided access + loop overhead */
warmup(memory);
pmc_reset_all();
pmc_start_all();
do {
for (run = 0; run < runs; run++) {
for (index = 0; index < range; index += stride) {
temp = memory[index];
}
}
steps++;
} while (pmc_ff_read(1) < CLKS);
pmc_stop_all();
gp[0] = pmc_gp_read(0);
gp[1] = pmc_gp_read(1);
ff[1] = pmc_ff_read(1);
/* Try to time just the overheads */
warmup(memory);
pmc_reset_all();
pmc_start_all();
do {
for (run = 0; run < runs; run++) {
for (index = 0; index < range; index += stride) {
temp++;
}
}
tsteps++;
} while (tsteps < steps);
pmc_stop_all();
gp_ovh[0] = pmc_gp_read(0);
gp_ovh[1] = pmc_gp_read(1);
ff_ovh[1] = pmc_ff_read(1);
gp_real[0] = gp[0] - gp_ovh[0];
gp_real[1] = gp[1] - gp_ovh[1];
ff_real[1] = ff[1] - ff_ovh[1];
reads = runs * steps * range / stride;
if (i == 0)
kprintf("%12llu%12llu%12llu%12llu%12llu%12llu", range, stride, steps, runs, reads, ff_real[1]);
kprintf("%12llu", gp_real[0]);
kprintf("%12llu", gp_real[1]);
kprintf("\t");
}
kprintf("\n");
}
kprintf("\n");
}
irq_enable();
return 0;
}

445
apps/memory.c Normal file
View file

@ -0,0 +1,445 @@
/*
* Copyright 2014 Steffen Vogel, Chair for Operating Systems,
* RWTH Aachen University
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* This file is part of MetalSVM.
*/
/**
* @author Steffen Vogel <steffen.vogel@rwth-aachen.de>
*/
#include <metalsvm/stdlib.h>
#include <metalsvm/stdio.h>
#include <metalsvm/stdarg.h>
#include <metalsvm/memory.h>
#include <metalsvm/time.h>
#include <metalsvm/tasks.h>
#include <metalsvm/vma.h>
#include <metalsvm/malloc.h>
#include <asm/page.h>
#include <asm/irqflags.h>
#include <asm/processor.h>
#include <asm/pmc.h>
#define ITERATIONS 1000
#define PAGE_COUNT 40
#define SIZE (PAGE_COUNT*PAGE_SIZE)
#define VIRT_FROM_ADDR 0x50000000 // Userspace
#define VIRT_TO_ADDR 0x30000000 // Kernelspace
extern atomic_int32_t total_page;
extern atomic_int32_t total_allocated_pages;
extern atomic_int32_t total_available_pages;
/** @brief Simple helper to format our test results */
static void test(size_t expr, char *fmt, ...)
{
void _putchar(int c, void *arg) { kputchar(c); } // for kvprintf
static int c = 1;
va_list ap;
va_start(ap, fmt);
kprintf("%s #%u:\t", (expr) ? "PASSED" : "FAILED", c++);
kvprintf(fmt, _putchar, NULL, 10, ap);
kputs("\n");
va_end(ap);
if (!expr)
abort();
}
/** @brief Linear feedback shift register PRNG */
static uint16_t rand()
{
static uint16_t lfsr = 0xACE1u;
static uint16_t bit;
bit = ((lfsr >> 0) ^ (lfsr >> 2) ^ (lfsr >> 3) ^ (lfsr >> 5) ) & 1;
return lfsr = (lfsr >> 1) | (bit << 15);
}
/** @brief BSD sum algorithm ('sum' Unix command) and used by QEmu */
uint16_t checksum(size_t start, size_t end)
{
size_t addr;
uint16_t sum;
for(addr = start, sum = 0; addr < end; addr++) {
uint8_t val = *((uint8_t *) addr);
sum = (sum >> 1) | (sum << 15);
sum += val;
}
return sum;
}
static int paging_stage2(void *arg);
/** @brief Test of the paging subsystem
*
* We will map a single physical memory region to two virtual regions.
* When writing to the first one, we should be able to read the same contents
* from the second one.
*/
static void paging(void)
{
size_t c, sum;
size_t *p1, *p2;
size_t virt_from, virt_to;
size_t phys;
size_t t;
int ret;
int flags;
// disable irqs to prevent context switches for rdtsc measurement
flags = irq_nested_disable();
// show original page maps
t = rdtsc();
page_dump(PG_XD | PG_GLOBAL | PG_USER | PG_RW);
kprintf("delta_t = %lu\n", rdtsc() - t);
t = rdtsc();
page_stats(1); // reset accessed and dirty bits
kprintf("delta_t = %lu\n", rdtsc() - t);
irq_nested_enable(flags);
kprintf("bookkeeping pages:\n");
kprintf(" - total:\t%lu\n", atomic_int32_read(&total_pages));
kprintf(" - alloc:\t%lu\n", atomic_int32_read(&total_allocated_pages));
kprintf(" - avail:\t%lu\n", atomic_int32_read(&total_available_pages));
// allocate physical page frames
phys = get_pages(PAGE_COUNT);
test(phys, "get_pages(%lu) = %#lx", PAGE_COUNT, phys);
// create first mapping
virt_from = map_region(VIRT_FROM_ADDR, phys, PAGE_COUNT, MAP_USER_SPACE);
test(virt_from, "map_region(%#lx, %#lx, %lu, %#x) = %#lx", VIRT_FROM_ADDR, phys, PAGE_COUNT, MAP_USER_SPACE, virt_from);
// check address translation
phys = virt_to_phys(virt_from);
test(phys, "virt_to_phys(%#lx) = %#lx", virt_from, phys);
page_dump(PG_XD | PG_GLOBAL | PG_USER | PG_RW);
// test set_page_flags()
ret = set_page_flags(virt_from, PAGE_COUNT, MAP_CODE);
test(!ret, "set_page_flags(%#lx, %u, %x)", virt_from, PAGE_COUNT, MAP_USER_SPACE|MAP_CODE); // now executable
page_dump(PG_XD | PG_GLOBAL | PG_USER | PG_RW);
// write test data
p1 = (size_t *) virt_from;
for (c = 0; c < SIZE/sizeof(size_t); c++) {
p1[c] = c;
}
// create second mapping pointing to the same page frames
virt_to = map_region(VIRT_TO_ADDR, phys, PAGE_COUNT, 0);
test(virt_to, "map_region(%#lx, %#lx, %lu, %#x) = %#lx", VIRT_TO_ADDR, phys, PAGE_COUNT, 0, virt_to);
// check address translation
phys = virt_to_phys(virt_to);
test(phys, "virt_to_phys(%#lx) = %#lx", virt_to, phys);
page_dump(PG_XD | PG_GLOBAL | PG_USER | PG_RW);
// check if both mapped areas are equal
p2 = (size_t *) virt_to;
for (c = 0; c < SIZE/sizeof(size_t); c++) {
if (p1[c] != p2[c])
test(0, "data mismatch: *(%p) != *(%p)", &p1[c], &p2[c]);
}
test(1, "data is equal");
// try to remap without MAP_REMAP
virt_to = map_region(VIRT_TO_ADDR, phys+PAGE_SIZE, PAGE_COUNT, 0);
test(!virt_to, "map_region(%#lx, %#lx, %lu, %#x) = %#lx (without MAP_REMAP flag)", VIRT_TO_ADDR, phys+PAGE_SIZE, PAGE_COUNT, 0, virt_to);
// try to remap with MAP_REMAP
virt_to = map_region(VIRT_TO_ADDR, phys+PAGE_SIZE, PAGE_COUNT, MAP_REMAP);
test(virt_to, "map_region(%#lx, %#lx, %lu, %#x) = %#lx (with MAP_REMAP flag)", VIRT_TO_ADDR, phys+PAGE_SIZE, PAGE_COUNT, MAP_REMAP, virt_to);
// check if data is not equal anymore (we remapped with +PAGE_SIZE offset)
p1 = (size_t *) (virt_from + PAGE_SIZE);
for (c = 0; c < (SIZE-PAGE_SIZE)/sizeof(size_t); c++) {
if (p1[c] != p2[c])
test(0, "data mismatch at *(%p) != *(%p)", &p1[c], &p2[c]);
}
test(1, "data is equal");
// try to remap with MAP_REMAP
virt_to = map_region(VIRT_TO_ADDR, phys, PAGE_COUNT, MAP_REMAP);
test(virt_to, "map_region(%#lx, %#lx, %lu, %#x) = %#lx (with MAP_REMAP flag)", VIRT_TO_ADDR, phys, PAGE_COUNT, MAP_REMAP, virt_to);
// test unmapping
ret = unmap_region(VIRT_FROM_ADDR, PAGE_COUNT);
test(!ret, "unmap_region(%#lx, %lu) = %u", VIRT_FROM_ADDR, PAGE_COUNT, ret);
page_dump(PG_XD | PG_GLOBAL | PG_USER | PG_RW);
// calc checksum
sum = checksum(virt_to, virt_to + SIZE);
test(sum == 23196, "checksum(%p, %p) = %lu", virt_to, virt_to + SIZE, sum);
size_t cr3 = read_cr3();
kprintf("cr3 old = %#lx\n", cr3);
create_kernel_task(0, paging_stage2, &sum, NORMAL_PRIO);
wait(&ret);
test(!ret, "paging stage 2 returned with code = %i", ret);
}
static int paging_stage2(void *arg)
{
size_t old, new;
kprintf("PAGING: entering stage 2...\n");
size_t cr3 = read_cr3();
kprintf("cr3 new = %#lx\n", cr3);
old = *((size_t *) arg);
kprintf("old sum: %lu\n", old);
page_dump(PG_XD | PG_GLOBAL | PG_USER | PG_RW);
new = checksum(VIRT_TO_ADDR, VIRT_TO_ADDR + SIZE);
test(old == new, "checksum(%p, %p) = %lu", VIRT_TO_ADDR, VIRT_TO_ADDR + SIZE, new);
return 0;
}
/** @brief Test of the VMA allocator */
static void vma(void)
{
int ret;
vma_dump();
// vma_alloc
size_t a1 = vma_alloc(SIZE, VMA_HEAP);
test(a1, "vma_alloc(%#x, %#x) = %#lx", SIZE, VMA_HEAP, a1);
size_t a2 = vma_alloc(SIZE, VMA_HEAP|VMA_USER);
test(a2 != 0, "vma_alloc(%#x, %#x) = %#lx", SIZE, VMA_HEAP|VMA_USER, a2);
vma_dump();
// vma_free
ret = vma_free(a1, a1+SIZE);
test(ret >= 0, "vma_free(%#lx, %#lx) = %i", a1, a1+SIZE, ret);
ret = vma_free(a2, a2+SIZE);
test(ret >= 0, "vma_free(%#lx, %#lx) = %i", a2, a2+SIZE, ret);
vma_dump();
// vma_add
ret = vma_add(VIRT_FROM_ADDR, VIRT_FROM_ADDR+SIZE, VMA_HEAP|VMA_USER);
test(ret >= 0, "vma_add(%#lx, %#lx, %#x) = %u", VIRT_FROM_ADDR, VIRT_FROM_ADDR+SIZE, VMA_HEAP|VMA_USER, ret);
ret = vma_add(VIRT_FROM_ADDR+SIZE, VIRT_FROM_ADDR+2*SIZE, VMA_HEAP|VMA_USER);
test(ret >= 0, "vma_add(%#lx, %#lx, %#x) = %u", VIRT_FROM_ADDR+SIZE, VIRT_FROM_ADDR+2*SIZE, VMA_HEAP|VMA_USER, ret);
ret = vma_add(VIRT_FROM_ADDR-SIZE, VIRT_FROM_ADDR, VMA_HEAP|VMA_USER);
test(ret >= 0, "vma_add(%#lx, %#lx, %#x) = %u", VIRT_FROM_ADDR-SIZE, VIRT_FROM_ADDR, VMA_HEAP|VMA_USER, ret);
vma_dump();
// vma_free
ret = vma_free(VIRT_FROM_ADDR-SIZE, VIRT_FROM_ADDR);
test(ret >= 0, "vma_free(%#lx, %#lx) = %u", VIRT_FROM_ADDR-SIZE, VIRT_FROM_ADDR, ret);
ret = vma_free(VIRT_FROM_ADDR+SIZE, VIRT_FROM_ADDR+2*SIZE);
test(ret >= 0, "vma_free(%#lx, %#lx) = %u", VIRT_FROM_ADDR+SIZE, VIRT_FROM_ADDR+2*SIZE, ret);
ret = vma_free(VIRT_FROM_ADDR, VIRT_FROM_ADDR+SIZE);
test(ret >= 0, "vma_free(%#lx, %#lx) = %u", VIRT_FROM_ADDR, VIRT_FROM_ADDR+SIZE, ret);
vma_dump();
}
/** @brief Test of the kernel malloc allocator */
static void malloc(void)
{
int i;
int* p[20];
int* a;
// kmalloc() test
buddy_dump();
a = kmalloc(SIZE);
test(a != NULL, "kmalloc(%lu) = %p", SIZE, a);
buddy_dump();
// simple write/read test
for (i=0; i<SIZE/sizeof(int); i++)
a[i] = i;
for (i=0; i<SIZE/sizeof(int); i++) {
if (a[i] != i)
test(0, "data mismatch: *(%p) != %lu", &a[i], i);
}
test(1, "data is equal");
// kfree() test
kfree(a);
test(1, "kfree(%p)", a);
buddy_dump();
// some random malloc/free patterns to stress the buddy system
for (i=0; i<20; i++) {
uint16_t sz = rand();
p[i] = kmalloc(sz);
test(p[i] != NULL, "kmalloc(%u) = %p", sz, p[i]);
}
buddy_dump();
for (i=0; i<20; i++) {
kfree(p[i]);
test(1, "kfree(%p)", p[i]);
}
buddy_dump();
}
/** @brief A memory benchmark for page table walks and TLB misses */
int bench(void)
{
// init hardware performance counters
struct pmc_caps* cap = pmc_init();
if (cap->version == 0x21) { // QEmu returns garbage
kputs("QEMU does not support PMCs.. skipping benchmark!\n");
return -1;
}
kprintf("PMC architecural version: %u\n", cap->version);
kprintf("There are %u general purpose PMCs (%u bit wide) available\n", cap->gp_count, cap->gp_width);
kprintf("There are %u fixed function PMCs (%u bit wide) available\n", cap->ff_count, cap->ff_width);
// setup PMCs
pmc_stop_all();
pmc_gp_config(0, PMC_EVT_PAGE_WALK_CLKS, PMC_EVTSEL_OS | PMC_EVTSEL_EN, 0, 0);
pmc_gp_config(1, PMC_EVT_PAGE_WALK_COUNT, PMC_EVTSEL_OS | PMC_EVTSEL_EN, 0, 0);
size_t phyaddr = get_page();
size_t viraddr;
size_t pages = 512*511;
size_t virbase = 2*KERNEL_SPACE;
kprintf("virbase %#llx KERNEL_SPACE %#llx\n", virbase, KERNEL_SPACE);
for (viraddr = virbase; viraddr < virbase+pages*PAGE_SIZE; viraddr += PAGE_SIZE) {
kprintf("map at %#llx\n", viraddr);
size_t ret = map_region(viraddr, phyaddr, 1, MAP_KERNEL_SPACE);
if (ret != viraddr) {
kprintf("map failed at %#llx\n", viraddr);
break;
}
}
int i;
for (i=0; i < ITERATIONS; i++) {
tlb_flush();
pmc_reset_all();
pmc_start_all();
for (viraddr = virbase; viraddr < virbase+pages*PAGE_SIZE; viraddr += PAGE_SIZE) {
char * p = (char *) viraddr;
(*p)++;
}
pmc_stop_all();
uint64_t clks = pmc_gp_read(0);
uint64_t count = pmc_gp_read(1);
kprintf("%llu\n", 1000000 * clks / count);
}
return 0;
}
int smp(void* arg)
{
kprintf("Hello from Core %d\n", smp_id());
page_dump(PG_XD | PG_GLOBAL | PG_USER | PG_RW);
return 33;
}
/** @brief This is a simple procedure to test memory management subsystem */
int memory(void* arg)
{
int ret;
tid_t id;
#if 0
size_t t0, t1, t2, t3;
size_t pages;
for (pages = 1; pages < (1 << 25); pages++) {
t0 = rdtsc();
size_t ret = map_region((1 << 28), 0x1000, pages, MAP_KERNEL_SPACE);
t1 = rdtsc();
if (!ret)
break;
t2 = rdtsc();
ret = unmap_region((1 << 28), pages);
t3 = rdtsc();
kprintf("%llu\t%llu\t%llu\n", pages, t1-t0, t3-t2);
}
kprintf("======== USER: malloc test...\n");
char* argv[] = {"/bin/memtest", "25", "10"};
ret = create_user_task(&id, argv[0], argv);
test(!ret, "calling %s %s %s with id = %i, ret = %i", argv[0], argv[1], argv[2], id, ret);
wait(&ret);
test(!ret, "userspace task returned with code = %d", ret);
return 0;
kprintf("======== PAGING: test started...\n");
paging();
kprintf("======== VMA: test started...\n");
vma();
kprintf("======== MALLOC: test started...\n");
malloc();
kprintf("======== USER: test fork...\n");
char* argv2[] = {"/bin/fork", NULL};
ret = create_user_task(&id, argv2[0], argv2);
test(!ret, "calling %s with id = %i, ret = %i", argv2[0], id, ret);
wait(&ret);
test(!ret, "userspace task returned with code = %d", ret);
#endif
kprintf("======== BENCH: memory and TLB benchmark started...\n");
bench();
kprintf("======== SMP: test multicore...\n");
ret = create_kernel_task_on_core(&id, smp, NULL, NORMAL_PRIO, 1);
wait(&ret);
test(!ret, "smp task returned with code = %d", ret);
kprintf("======== All tests finished successfull...\n");
return 0;
}

View file

@ -43,6 +43,8 @@
int laplace(void* arg);
int jacobi(void* arg);
int memory(void* arg);
int membench(void* arg);
void echo_init(void);
void netio_init(void);
@ -744,8 +746,13 @@ int test_init(void)
create_user_task(NULL, "/bin/jacobi", jacobi_argv);
//create_user_task_on_core(NULL, "/bin/jacobi", jacobi_argv, 1);
#endif
#ifdef START_MMNIF_TEST
#if defined(CONFIG_LWIP) && LWIP_SOCKET
#ifdef START_MEMORY
create_kernel_task(NULL, memory, NULL, NORMAL_PRIO);
#endif
#ifdef START_MEMBENCH
create_kernel_task(NULL, membench, NULL, NORMAL_PRIO);
#endif
#if defined(START_MMNIF_TEST) && defined(CONFIG_LWIP) && LWIP_SOCKET
if (RCCE_IAM == 0) {
kprintf("Start /bin/server...\n");
create_user_task(NULL, "/bin/server", server_argv);
@ -754,7 +761,6 @@ int test_init(void)
kprintf("Start /bin/client...\n");
create_user_task(NULL, "/bin/client", client_argv);
}
#endif
#endif
return 0;

View file

@ -46,6 +46,7 @@
//#define START_HELLO
//#define START_TESTS
//#define START_JACOBI
//#define START_MEMORY
//#define START_CHIEFTEST

View file

@ -34,7 +34,7 @@
// ____ _ _
// / ___| _ _ _ __ ___ | |__ ___ | |___
// \___ \| | | | '_ ` _ \| '_ \ / _ \| / __|
// ___) | |_| | | | | | | |_) | (_) | \__ \
// ___) | |_| | | | | | | |_) | (_) | \__
// |____/ \__, |_| |_| |_|_.__/ \___/|_|___/
// |___/
//
@ -253,7 +253,7 @@
// _____ _ _
// | ___| _ _ __ ___| |_(_) ___ _ __ ___
// | |_ | | | | '_ \ / __| __| |/ _ \| '_ \/ __|
// | _|| |_| | | | | (__| |_| | (_) | | | \__ \
// | _|| |_| | | | | (__| |_| | (_) | | | \__
// |_| \__,_|_| |_|\___|\__|_|\___/|_| |_|___/
//
// #########################################################################################

View file

@ -100,11 +100,6 @@ inline static void outportl(unsigned short _port, unsigned int _data)
asm volatile("outl %1, %0"::"dN"(_port), "a"(_data));
}
inline static void uart_putchar(unsigned char _data)
{
outportb(0x2F8, _data);
}
/**
* read a byte from CMOS
* @param offset CMOS offset

View file

@ -35,10 +35,12 @@
#ifdef CONFIG_MULTIBOOT
/* are there modules to do something with? */
#define MULTIBOOT_INFO_MODS 0x00000008
/* is there a full memory map? */
#define MULTIBOOT_INFO_MEM_MAP 0x00000040
/// Does the bootloader provide mem_* fields?
#define MULTIBOOT_INFO_MEM (1 << 0)
/// Does the bootloader provide a list of modules?
#define MULTIBOOT_INFO_MODS (1 << 3)
/// Does the bootloader provide a full memory map?
#define MULTIBOOT_INFO_MEM_MAP (1 << 6)
typedef uint16_t multiboot_uint16_t;
typedef uint32_t multiboot_uint32_t;
@ -114,7 +116,6 @@ struct multiboot_info
multiboot_uint16_t vbe_interface_off;
multiboot_uint16_t vbe_interface_len;
};
typedef struct multiboot_info multiboot_info_t;
struct multiboot_mmap_entry

View file

@ -21,6 +21,7 @@
* @file arch/x86/include/asm/page.h
* @brief Definitions and functions related to paging
* @author Stefan Lankes
* @author Steffen Vogel <steffen.vogel@rwth-aachen.de>
*
* This file defines the interface for paging as like structures related to paging.
*/
@ -31,86 +32,107 @@
#include <metalsvm/stddef.h>
#include <metalsvm/stdlib.h>
#define _PAGE_BIT_PRESENT 0 /* is present */
#define _PAGE_BIT_RW 1 /* writeable */
#define _PAGE_BIT_USER 2 /* userspace addressable */
#define _PAGE_BIT_PWT 3 /* page write through */
#define _PAGE_BIT_PCD 4 /* page cache disabled */
#define _PAGE_BIT_ACCESSED 5 /* was accessed (raised by CPU) */
#define _PAGE_BIT_DIRTY 6 /* was written to (raised by CPU) */
#define _PAGE_BIT_PSE 7 /* 4 MB (or 2MB) page */
#define _PAGE_BIT_PAT 7 /* on 4KB pages */
#define _PAGE_BIT_GLOBAL 8 /* Global TLB entry PPro+ */
#define _PAGE_BIT_SVM_STRONG 9 /* mark a virtual address range as used by the SVM system */
#define _PAGE_BIT_SVM_LAZYRELEASE 10 /* mark a virtual address range as used by the SVM system */
#define _PAGE_BIT_SVM_INIT 11 /* mark if the MBP proxy is used */
/// Page offset bits
#define PAGE_BITS 12
/// The size of a single page in bytes
#define PAGE_SIZE ( 1L << PAGE_BITS)
#ifdef CONFIG_X86_32
/// Total operand width in bits
#define BITS 32
/// Linear/virtual address width
#define VIRT_BITS BITS
/// Physical address width (we dont support PAE)
#define PHYS_BITS BITS
/// Page map bits
#define PAGE_MAP_BITS 10
/// Number of page map indirections
#define PAGE_MAP_LEVELS 2
/// Mask the page address without page map flags
#define PAGE_MASK 0xFFFFF000
#elif defined(CONFIG_X86_64)
/// Total operand width in bits
#define BITS 64
/// Linear/virtual address width
#define VIRT_BITS 48
/// Physical address width (maximum value)
#define PHYS_BITS 52
/// Page map bits
#define PAGE_MAP_BITS 9
/// Number of page map indirections
#define PAGE_MAP_LEVELS 4
/// Mask the page address without page map flags
#define PAGE_MASK 0x000FFFFFFFFFF000
#endif
/// The number of entries in a page map table
#define PAGE_MAP_ENTRIES (1L << PAGE_MAP_BITS)
// Base addresses of the self-mapped pagetables
#ifdef CONFIG_X86_32
#define PAGE_MAP_PGD 0xFFFFF000
#define PAGE_MAP_PGT 0xFFC00000
#elif defined(CONFIG_X86_64)
#define PAGE_MAP_PML4 0xFFFFFFFFFFFFF000
#define PAGE_MAP_PDPT 0xFFFFFFFFFFE00000
#define PAGE_MAP_PGD 0xFFFFFFFFC0000000
#define PAGE_MAP_PGT 0xFFFFFF8000000000
#endif
/// Align to next page
#define PAGE_FLOOR(addr) (((addr) + PAGE_SIZE - 1) & PAGE_MASK)
/// Align to page
#define PAGE_CEIL(addr) ( (addr) & PAGE_MASK)
// Canonical address format
#ifdef CONFIG_X86_32
#define CANONICAL(addr) (addr)
#elif defined(CONFIG_X86_64)
#define CANONICAL(addr) sign_extend(addr, VIRT_BITS)
#endif
/// Page is present
#define PG_PRESENT (1 << _PAGE_BIT_PRESENT)
#define PG_PRESENT (1 << 0)
/// Page is read- and writable
#define PG_RW (1 << _PAGE_BIT_RW)
#define PG_RW (1 << 1)
/// Page is addressable from userspace
#define PG_USER (1 << _PAGE_BIT_USER)
#define PG_USER (1 << 2)
/// Page write through is activated
#define PG_PWT (1 << _PAGE_BIT_PWT)
#define PG_PWT (1 << 3)
/// Page cache is disabled
#define PG_PCD (1 << _PAGE_BIT_PCD)
#define PG_PCD (1 << 4)
/// Page was recently accessed (set by CPU)
#define PG_ACCESSED (1 << _PAGE_BIT_ACCESSED)
#define PG_ACCESSED (1 << 5)
/// Page is dirty due to recentwrite-access (set by CPU)
#define PG_DIRTY (1 << _PAGE_BIT_DIRTY)
/// Big page: 4MB (or 2MB)
#define PG_PSE (1 << _PAGE_BIT_PSE)
#define PG_DIRTY (1 << 6)
/// Huge page: 4MB (or 2MB, 1GB)
#define PG_PSE (1 << 7)
/// Page is part of the MPB (SCC specific entry)
#define PG_MPE PG_PSE
#define PG_MPE PG_PSE
/// Page attribute table
#define PG_PAT PG_PSE
/// Global TLB entry (Pentium Pro and later)
#define PG_GLOBAL (1 << _PAGE_BIT_GLOBAL)
/// Pattern flag
#define PG_PAT (1 << _PAGE_BIT_PAT)
#define PG_GLOBAL (1 << 8)
/// This virtual address range is used by SVM system as marked
#define PG_SVM PG_SVM_STRONG
#define PG_SVM_STRONG (1 << _PAGE_BIT_SVM_STRONG)
#define PG_SVM (1 << 9)
#define PG_SVM_STRONG PG_SVM
/// This virtual address range is used by SVM system as marked
#define PG_SVM_LAZYRELEASE (1 << _PAGE_BIT_SVM_LAZYRELEASE)
#define PG_SVM_LAZYRELEASE (1 << 10)
/// Currently, no page frame is behind this page (only the MBP proxy)
#define PG_SVM_INIT (1 << _PAGE_BIT_SVM_INIT)
#define PG_SVM_INIT (1 << 11)
/// Disable execution for this page
#define PG_XD (1L << 63)
/// This is a whole set of flags (PRESENT,RW,ACCESSED,DIRTY) for kernelspace tables
#define KERN_TABLE (PG_PRESENT|PG_RW|PG_ACCESSED|PG_DIRTY)
/// This is a whole set of flags (PRESENT,RW,ACCESSED,DIRTY,USER) for userspace tables
#define USER_TABLE (PG_PRESENT|PG_RW|PG_ACCESSED|PG_DIRTY|PG_USER)
#define PG_TABLE (PG_PRESENT|PG_RW|PG_XD)
/// This is a whole set of flags (PRESENT,RW,GLOBAL) for kernelspace pages
#define KERN_PAGE (PG_PRESENT|PG_RW|PG_GLOBAL)
/// This is a whole set of flags (PRESENT,RW,USER) for userspace pages
#define USER_PAGE (PG_PRESENT|PG_RW|PG_USER)
#define PG_PAGE (PG_PRESENT|PG_RW|PG_GLOBAL|PG_XD)
#if __SIZEOF_POINTER__ == 4
#define PGT_ENTRIES 1024
#elif __SIZEOF_POINTER__ == 8
#define PGT_ENTRIES 512
#endif
/** @brief Page table structure
/** @brief A single entry in a page map
*
* This structure keeps page table entries.\n
* On a 32bit system, a page table consists normally of 1024 entries.
* Usually used as a pointer to a mapped page map entry.
*/
typedef struct page_table
{
/// Page table entries are unsigned 32bit integers.
size_t entries[PGT_ENTRIES];
} page_table_t __attribute__ ((aligned (4096)));
/** @brief Page directory structure
*
* This structure keeps page directory entries.\
* On a 32bit system, a page directory consists normally of 1024 entries.
*/
typedef struct page_dir
{
/// Page dir entries are unsigned 32bit integers.
size_t entries[PGT_ENTRIES];
} page_dir_t __attribute__ ((aligned (4096)));
typedef size_t page_entry_t;
/** @brief Converts a virtual address to a physical
*
@ -119,28 +141,6 @@ typedef struct page_dir
*/
size_t virt_to_phys(size_t viraddr);
/** @brief Allocates a virtual address space range of npages
*
* The address range with special flags (if given) will have the size of n pages.
*
* @param npages The range in page-granularity
* @param flags further page flags
*
* @return The new range's address
*/
size_t vm_alloc(uint32_t npages, uint32_t flags);
/** @brief Frees a range in the virtual address space
*
* @param addr Address of the range
* @param npages Size of the range in pages
*
* @return
* - 0 on success
* - -EINVAL (-22) on failure.
*/
int vm_free(size_t addr, uint32_t npages);
/** @brief Unmap the physical memory at a specific virtual address
*
* All Page table entries within this range will be marked as not present
@ -192,29 +192,29 @@ int arch_paging_init(void);
*
* @return Returns the address of the boot task's page dir array.
*/
page_dir_t* get_boot_pgd(void);
page_entry_t* get_boot_page_map(void);
/** @brief Setup a new page directory for a new user-level task
*
* @param task Pointer to the task-specific task_t structure
* @param copy If true: PGD will be a copy of the kernel's address space PGD
* @param copy If true: copy userspace pages and tables
*
* @return
* - counter of allocated page tables
* - -ENOMEM (-12) on failure
*/
int create_pgd(task_t* task, int copy);
int copy_page_map(struct task* task, int copy);
/** @brief Delete page directory and its page tables
/** @brief Deletes all user page map structures of the current task
*
* Puts page tables and page directory back to buffer and
* sets the task's page directory pointer to NULL
* All allocated physical page frames are released in the bitmap
* The task->page_map is replaces by the boot_page_map()
*
* @return
* - 0 on success
* - -EINVAL (-22) on failure (in case PGD is still the boot-pgd).
*/
int drop_pgd(void);
int drop_page_map(void);
/** @brief Change the page permission in the page tables of the current task
*
@ -229,6 +229,18 @@ int drop_pgd(void);
* - 0 on success
* - -EINVAL (-22) on failure.
*/
int change_page_permissions(size_t start, size_t end, uint32_t flags);
int set_page_flags(size_t viraddr, uint32_t npages, int flags);
/** @brief Dump mapped memory
*
* @param mask Only watch for changes in these page flags (PG_PRESENT is set by default)
*/
void page_dump(size_t mask);
/** @brief Print stats about page flags
*
* @param reset Reset accessed and dirty bits in page tables
*/
void page_stats(int reset);
#endif

View file

@ -0,0 +1,161 @@
/*
* Copyright 2014 Steffen Vogel, Chair for Operating Systems,
* RWTH Aachen University
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* This file is part of MetalSVM.
*/
/**
* @file arch/x86/include/asm/page_helpers.h
* @brief Some small helper functions declared as static inline
* @author Stefan Lankes
* @author Steffen Vogel <steffen.vogel@rwth-aachen.de>
*/
#ifndef __ARCH_PAGE_HELPERS_H__
#define __ARCH_PAGE_HELPERS_H__
#include <metalsvm/page.h>
/** @brief Sign extending a integer
*
* @param addr The integer to extend
* @param bits The width if addr which should be extended
* @return The extended integer
*/
static inline size_t sign_extend(ssize_t addr, int bits)
{
int shift = BITS - bits;
return (addr << shift) >> shift; // sign bit gets copied during arithmetic right shift
}
/** @brief Get the base address of the child table
*
* @param entry The parent entry
* @return The child entry
*/
static inline page_entry_t* get_child_entry(page_entry_t *entry)
{
size_t child = (size_t) entry;
child <<= PAGE_MAP_BITS;
return (page_entry_t*) CANONICAL(child);
}
/** @brief Get the base address of the parent entry
*
* @param entry The child entry
* @return The parent entry
*/
static inline page_entry_t* get_parent_entry(page_entry_t *entry)
{
ssize_t parent = (size_t) entry;
parent >>= PAGE_MAP_BITS;
parent |= PAGE_MAP_PGT;
parent &= ~(sizeof(size_t) - 1); // align to page_entry_t
return (page_entry_t*) CANONICAL(parent);
}
/** @brief Get the corresponding page map entry to a given virtual address
*
* Please note: this implementation requires that the tables are mapped
* at the end of VAS!
*/
static inline page_entry_t* virt_to_entry(ssize_t addr, int level)
{
addr >>= PAGE_MAP_BITS;
addr |= PAGE_MAP_PGT;
addr >>= level * PAGE_MAP_BITS;
addr &= ~(sizeof(size_t) - 1); // align to page_entry_t
return (page_entry_t*) CANONICAL(addr);
}
/** @brief Get the corresponding virtual address to a page map entry */
static inline size_t entry_to_virt(page_entry_t* entry, int level)
{
size_t addr = (size_t) entry;
addr <<= (level+1) * PAGE_MAP_BITS;
return CANONICAL(addr);
}
/** @brief Converts a virtual address to a physical
*
* A non mapped virtual address causes a pagefault!
*
* @param addr Virtual address to convert
* @return physical address
*/
inline size_t virt_to_phys(size_t addr)
{
page_entry_t* entry = virt_to_entry(addr, 0); // get the PGT entry
size_t off = addr & ~PAGE_MASK; // offset within page
size_t phy = *entry & PAGE_MASK; // physical page frame number
return phy | off;
}
/** @brief Update page table bits (PG_*) by using arch independent flags (MAP_*) */
static size_t page_bits(int flags)
{
size_t bits = PG_PRESENT | PG_RW | PG_XD | PG_GLOBAL;
if (flags & MAP_NO_ACCESS) bits &= ~PG_PRESENT;
if (flags & MAP_READ_ONLY) bits &= ~PG_RW;
#ifdef CONFIG_X86_64
if (flags & MAP_CODE) bits &= ~PG_XD;
#endif
if (flags & MAP_USER_SPACE) bits &= ~PG_GLOBAL;
if (flags & MAP_USER_SPACE) bits |= PG_USER;
if (flags & MAP_WT) bits |= PG_PWT;
if (flags & MAP_NO_CACHE) bits |= PG_PCD;
if (flags & MAP_MPE) bits |= PG_MPE;
if (flags & MAP_SVM_INIT) bits |= PG_SVM_INIT;
if (flags & MAP_SVM_LAZYRELEASE) bits |= PG_SVM_LAZYRELEASE;
if (flags & MAP_SVM_STRONG) bits |= PG_SVM_STRONG;
return bits;
}
// TODO: test
size_t get_page_flags(size_t viraddr)
{
page_entry_t* entry = virt_to_entry(viraddr, 0);
size_t flags = *entry & ~PAGE_MASK;
int i;
for (i=1; i<PAGE_MAP_LEVELS; i++) {
entry = virt_to_entry(viraddr, i);
#ifdef CONFIG_X86_64
flags |= (*entry & PG_XD);
#endif
flags &= (*entry & PG_USER) | ~PG_USER;
flags &= (*entry & PG_RW) | ~PG_RW;
flags &= (*entry & PG_USER) | ~PG_USER;
}
return flags;
}
#endif

252
arch/x86/include/asm/pmc.h Normal file
View file

@ -0,0 +1,252 @@
/*
* Copyright 2013 Steffen Vogel, Chair for Operating Systems,
* RWTH Aachen University
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* This file is part of MetalSVM.
*/
/**
* @author Steffen Vogel
* @file arch/x86/include/pmc.h
* @brief Simple interface to IA32 Performance Monitor Counters
*
* This implementation is in parts specific for Intel Core 2 Duo Processors!
*/
#ifndef _ARCH_PMC_H_
#define _ARCH_PMC_H_
#include <stddef.h>
// PMC MSR addresses
#define MSR_PERF_GLOBAL_STATUS 0x38E // global counter control facilities
#define MSR_PERF_GLOBAL_CTRL 0x38F
#define MSR_PERF_GLOBAL_OVF_CTRL 0x390
#define IA32_PERF_CAPABILITIES 0x345
#define IA32_PERFEVTSEL(i) (0x186 + i) // general purpose PMC configuration register
#define IA32_PMC(i) (0x0C1 + i) // general purpose PMC counter register
#define IA32_A_PMC(i) (0x4C1 + i) // general purpose alias PMC counter register for full width writes
#define MSR_PERF_FIXED_CTR(i) (0x309 + i) // fixed function PMC counter register
#define MSR_PERF_FIXED_CTR_CTRL 0x38D // fixed functiion PMC configuration register
#define PMC_FIXED_OS (1 << 0)
#define PMC_FIXED_USR (1 << 1)
#define PMV_FIXED_PMI (1 << 3)
/* For Intel Core 2 Duo the MSR_PERF_FIXED_CTRs are mapped as followed:
* MSR_PERF_FIXED_CTR(0) => INST_RETIRED.ANY
* MSR_PERF_FIXED_CTR(1) => CPU_CLK_UNHALTED.CORE
* MSR_PERF_FIXED_CTR(2) => CPU_CLK_UNHALTED.REF */
// architectural flags for IA32_PERFEVTSEL
#define PMC_EVTSEL_CMASK 24 // counter mask [31:24]
#define PMC_EVTSEL_UMASK 8 // unit mask [15:8]
#define PMC_EVTSEL_INC (1 << 23) // invert counter mask
#define PMC_EVTSEL_EN (1 << 22) // enable counters
#define PMC_EVTSEL_ANY (1 << 21) // any thread (from version 3 on)
#define PMC_EVTSEL_INT (1 << 20) // APIC interrupt enable
#define PMC_EVTSEL_PC (1 << 19) // pin control
#define PMC_EVTSEL_E (1 << 18) // edge detect
#define PMC_EVTSEL_OS (1 << 17) // operating system mode
#define PMC_EVTSEL_USR (1 << 16) // user mode
// Core 2 Duo non-architecural flags for IA32_PERFEVTSEL (bus snooping)
#define PMC_EVTSEL_HITM (1 << 11) // HITM response
#define PMC_EVTSEL_HIT (1 << 9) // HIT response
#define PMV_EVTSEL_CLEAN (1 << 8) // CLEAN response
// architecutral PMC events CPUID.0AH.EBX[6:0]
#define PMC_EVT_UNHALTED_CORE_CLKS 0x003C // UnHalted Core Cycles
#define PMC_EVT_UNHALTED_REF_CLKS 0x013C // UnHalted Reference Cycles
#define PMC_EVT_INST_RET 0x00C0 // Instruction Retired
#define PMC_EVT_LLC_REF 0x4F2E // LLC Reference
#define PMC_EVT_LLC_MISS 0x412E // LLC Misses
#define PMC_EVT_BRANCH_RET 0x00C4 // Branch Instruction Retired
#define PMC_EVT_BRANCH_MISS_RET 0x00C5 // Branch Miss Retired
// Core 2 Duo non-architecural PMC events
#define PMC_EVT_DTLB_MISS_ANY 0x0108 // Memory accesses that missed the TLB
#define PMC_EVT_DTLB_MISS_LD 0x0208 // DTLB misses due to load operations
#define PMC_EVT_DTLB_MISS_L0_LD 0x0408 // Level 0: DTLB misses due to load operations
#define PMC_EVT_DTLB_MISS_ST 0x0808 // DTLB misses due to store operations
#define PMC_EVT_ITLB_FLUSH 0x4082 // ITLB flushes
#define PMC_EVT_ITLB_MISS 0x1282 // ITLB misses (either large or small page)
#define PMC_EVT_ITLB_MISS_RET 0x00C9 // Retired instructions that missed the ITLB
#define PMC_EVT_ITLB_MISS_SMALL 0x0282 // ITLB small page misses
#define PMC_EVT_ITLB_MISS_LARGE 0x1082 // ITLB large page misses
#define PMC_EVT_PAGE_WALK_COUNT 0x010C // Number of page-walks executed
#define PMC_EVT_PAGE_WALK_CLKS 0x020C // Duration of page-walks in core cycles
#define PMC_EVT_MEM_LOAD_RETIRED_L1D_MISS 0x01CB // Retired loads that miss the L1 data cache (precise event)
#define PMC_EVT_MEM_LOAD_RETIRED_L1D_LINE_MISS 0x02CB // L1 data cache line missed by retired loads (precise event)
#define PMC_EVT_MEM_LOAD_RETIRED_L2_MISS 0x04CB // Retired loads that miss the L2 cache (precise event)
#define PMC_EVT_MEM_LOAD_RETIRED_L2_LINE_MISS 0x08CB // L2 cache line missed by retired loads (precise event)
#define PMC_EVT_MEM_LOAD_RETIRED_DTLB_MISS 0x10CB // Retired loads that miss the DTLB (precise event)
struct pmc {
uint8_t id;
void (*start)();
void (*stop)();
void (*reset)();
void (*write)(uint64_t val);
uint64_t (*read)();
};
struct pmc_caps {
/// Architecural PM version (CPUID.0AH:EAX[7:0])
uint8_t version;
/// Number of available General Purpose PMCs (CPUID.0AH:EAX[15:8])
uint8_t gp_count;
/// Number of available Fixed Function PMCs (CPUID.0AH.EDX[4:0])
uint8_t ff_count;
/// Counter bit width of General Purpose PMCs (CPUID.0AH:EAX[23:16])
uint8_t gp_width;
/// Counter bit width of Fixed Function PMCs (CPUID.0AH.EDX[12:5])
uint8_t ff_width;
/// Bit mask of supported architecural PMC events (CPUID.0AH.EBX[6:0])
uint32_t arch_events;
/// IA32_PERF_CAPABILITIES MSR
uint64_t msr;
};
/** @brief Queries the CPU about available Performance Monitoring capabilities
*
* @return A pointer to the capabilities struct
**/
struct pmc_caps* pmc_init();
/** @brief Setups and stops the general purpose PMCs
*
* @param i The counter number to configure (positive for gp PMCs, negative for ff PMCs)
* @param event A combined event number including the unit mask (PMC_EVT_*)
* @param flags Flags for the IA32_PERFEVTSEL registers (PMC_EVTSEL_*)
* @param umask A seperate Unitmask ORed with event
* @param cmask A optional counter mask value
* @return
* - 0 on success
* - else failure (invalid counter or flags)
*/
int pmc_gp_config(uint8_t i, uint16_t event, uint32_t flags, uint8_t umask, uint8_t cmask);
/** @brief Setups and stops the fixed function PMCs
*
* @param i The counter number to configure (positive for gp PMCs, negative for ff PMCs)
* @param flags Flags for the MSR_PERF_FIXED_CTR_CTRL register
* @return
* - 0 on success
* - else failure (invalid counter or flags)
*/
int pmc_ff_config(uint8_t i, uint8_t flags);
/** @brief Start a single general purpose PMC
*
* @param i The counter number
* @return
* - 0 on success
* - -EINVAL on invalid counter number
*/
inline int pmc_gp_start(uint8_t i);
/** @brief Stop a single general purpose PMC
*
* @param i The counter number
* @return
* - 0 on success
* - -EINVAL on invalid counter number
*/
inline int pmc_gp_stop(uint8_t i);
/** @brief Start a single fixed function PMC
*
* @param i The counter number
* @return
* - 0 on success
* - -EINVAL on invalid counter number
*/
inline int pmc_ff_start(uint8_t i);
/** @brief Stop a single fixed function PMC
*
* @param i The counter number
* @return
* - 0 on success
* - -EINVAL on invalid counter number
*/
inline int pmc_ff_stop(uint8_t i);
/** @brief Start all PMCs at the same time
*
* @return
* - 0 on success
* - -EINVAL on invalid counter number
*/
inline int pmc_start_all();
/** @brief Stop all PMCs at the same time
*
* @return
* - 0 on success
* - -EINVAL on invalid counter number
*/
inline int pmc_stop_all();
/** @brief Reset all PMCs to zero
*
* @return
* - 0 on success
* - -EINVAL on invalid counter number
*/
inline int pmc_reset_all();
/** @brief Read a single general purpose PMC
*
* @param i The counter number
* @return The counter value (see struct pmc_caps.gp_width)
*/
inline uint64_t pmc_gp_read(uint8_t i);
/** @brief Read a single fixed function PMC
*
* @param i The counter number
* @return The counter value (see struct pmc_caps.gp_width)
*/
inline uint64_t pmc_ff_read(uint8_t i);
/** @brief Write a single general purpose PMC value
*
* Not all architectures support full width writes to the PMCs.
* If bit 13 (FW_WRITE) in struct pmc_caps.msr is not set the PMC
* is updated with the 32 bit sign extended version of val!
*
* @param i The counter number
* @param val The counter value (see struct pmc_caps.gp_width)
*/
inline int pmc_gp_write(uint8_t i, uint64_t val);
/** @brief Write a single fixed function PMC value
*
* @param i The counter number
* @param val The counter value (see struct pmc_caps.ff_width)
*/
inline int pmc_ff_write(uint8_t i, uint64_t val);
#endif

View file

@ -39,22 +39,124 @@
extern "C" {
#endif
// feature list 1
// CPUID.01H.EDX feature list
#define CPU_FEATURE_FPU (1 << 0)
#define CPU_FEATUE_PSE (1 << 3)
#define CPU_FEATURE_MSR (1 << 5)
#define CPU_FEATURE_PAE (1 << 6)
#define CPU_FEATURE_APIC (1 << 9)
#define CPU_FEATURE_PGE (1 << 13)
#define CPU_FEATURE_PAT (1 << 16)
#define CPU_FEATURE_PSE36 (1 << 17)
#define CPU_FEATURE_MMX (1 << 23)
#define CPU_FEATURE_FXSR (1 << 24)
#define CPU_FEATURE_SSE (1 << 25)
#define CPU_FEATURE_SSE2 (1 << 26)
// feature list 2
// CPUID.01H.ECX feature list
#define CPU_FEATURE_X2APIC (1 << 21)
#define CPU_FEATURE_AVX (1 << 28)
#define CPU_FEATURE_HYPERVISOR (1 << 31)
// CPUID.80000001H:EDX feature list
#define CPU_FEATURE_NX (1 << 20)
#define CPU_FEATURE_1GBHP (1 << 26)
#define CPU_FEATURE_LM (1 << 29)
// x86 control registers
/// Protected Mode Enable
#define CR0_PE (1 << 0)
/// Monitor coprocessor
#define CR0_MP (1 << 1)
/// Enable FPU emulation
#define CR0_EM (1 << 2)
/// Task switched
#define CR0_TS (1 << 3)
/// Extension type of coprocessor
#define CR0_ET (1 << 4)
/// Enable FPU error reporting
#define CR0_NE (1 << 5)
/// Enable write protected pages
#define CR0_WP (1 << 16)
/// Enable alignment checks
#define CR0_AM (1 << 18)
/// Globally enables/disable write-back caching
#define CR0_NW (1 << 29)
/// Globally disable memory caching
#define CR0_CD (1 << 30)
/// Enable paging
#define CR0_PG (1 << 31)
/// Virtual 8086 Mode Extensions
#define CR4_VME (1 << 0)
/// Protected-mode Virtual Interrupts
#define CR4_PVI (1 << 1)
/// Disable Time Stamp Counter register (rdtsc instruction)
#define CR4_TSD (1 << 2)
/// Enable debug extensions
#define CR4_DE (1 << 3)
/// Enable hugepage support
#define CR4_PSE (1 << 4)
/// Enable physical address extension
#define CR4_PAE (1 << 5)
/// Enable machine check exceptions
#define CR4_MCE (1 << 6)
/// Enable global pages
#define CR4_PGE (1 << 7)
/// Enable Performance-Monitoring Counter
#define CR4_PCE (1 << 8)
/// Enable Operating system support for FXSAVE and FXRSTOR instructions
#define CR4_OSFXSR (1 << 9)
/// Enable Operating System Support for Unmasked SIMD Floating-Point Exceptions
#define CR4_OSXMMEXCPT (1 << 10)
/// Enable Virtual Machine Extensions, see Intel VT-x
#define CR4_VMXE (1 << 13)
/// Enable Safer Mode Extensions, see Trusted Execution Technology (TXT)
#define CR4_SMXE (1 << 14)
/// Enables the instructions RDFSBASE, RDGSBASE, WRFSBASE, and WRGSBASE
#define CR4_FSGSBASE (1 << 16)
/// Enables process-context identifiers
#define CR4_PCIDE (1 << 17)
/// Enable XSAVE and Processor Extended States
#define CR4_OSXSAVE (1 << 18)
/// Enable Supervisor Mode Execution Protection
#define CR4_SMEP (1 << 20)
/// Enable Supervisor Mode Access Protection
#define CR4_SMAP (1 << 21)
// x86-64 specific MSRs
/// extended feature register
#define MSR_EFER 0xc0000080
/// legacy mode SYSCALL target
#define MSR_STAR 0xc0000081
/// long mode SYSCALL target
#define MSR_LSTAR 0xc0000082
/// compat mode SYSCALL target
#define MSR_CSTAR 0xc0000083
/// EFLAGS mask for syscall
#define MSR_SYSCALL_MASK 0xc0000084
/// 64bit FS base
#define MSR_FS_BASE 0xc0000100
/// 64bit GS base
#define MSR_GS_BASE 0xc0000101
/// SwapGS GS shadow
#define MSR_KERNEL_GS_BASE 0xc0000102
// MSR EFER bits
#define EFER_SCE (1 << 0)
#define EFER_LME (1 << 8)
#define EFER_LMA (1 << 10)
#define EFER_NXE (1 << 11)
#define EFER_SVME (1 << 12)
#define EFER_LMSLE (1 << 13)
#define EFER_FFXSR (1 << 14)
#define EFER_TCE (1 << 15)
typedef struct {
uint32_t feature1, feature2;
uint32_t feature1, feature2, feature3;
uint32_t addr_width;
} cpu_info_t;
extern cpu_info_t cpu_info;
@ -107,6 +209,16 @@ inline static uint32_t on_hypervisor(void)
return (cpu_info.feature2 & CPU_FEATURE_HYPERVISOR);
}
inline static uint32_t has_pge(void)
{
return (cpu_info.feature1 & CPU_FEATURE_PGE);
}
inline static uint32_t has_nx(void)
{
return (cpu_info.feature3 & CPU_FEATURE_NX);
}
/** @brief Read out time stamp counter
*
* The rdtsc asm command puts a 64 bit time stamp value
@ -116,9 +228,15 @@ inline static uint32_t on_hypervisor(void)
*/
inline static uint64_t rdtsc(void)
{
#ifdef CONFIG_X86_32
uint64_t x;
asm volatile ("rdtsc" : "=A" (x));
return x;
#elif defined(CONFIG_X86_64)
uint64_t lo, hi;
asm volatile ("rdtsc" : "=a"(lo), "=d"(hi) );
return (hi << 32 | lo);
#endif
}
/** @brief Flush cache
@ -273,7 +391,7 @@ int ipi_tlb_flush(void);
/** @brief Flush a specific page entry in TLB
* @param addr The (virtual) address of the page to flush
*/
static inline void tlb_flush_one_page(uint32_t addr)
static inline void tlb_flush_one_page(size_t addr)
{
asm volatile("invlpg (%0)" : : "r"(addr) : "memory");
#if MAX_CORES > 1
@ -282,7 +400,7 @@ static inline void tlb_flush_one_page(uint32_t addr)
* => User-level applications run only on one
* and we didn't flush the TLB of the other cores
*/
if (addr <= KERNEL_SPACE)
if (addr < KERNEL_SPACE)
ipi_tlb_flush();
#endif
}
@ -293,7 +411,7 @@ static inline void tlb_flush_one_page(uint32_t addr)
*/
static inline void tlb_flush(void)
{
uint32_t val = read_cr3();
size_t val = read_cr3();
if (val)
write_cr3(val);

View file

@ -26,13 +26,6 @@
extern "C" {
#endif
/** @brief Copy a physical page to another physical destination
*
* @param dest Destination address
* @param src Source address
*/
void copy_page_physical(void* dest, const void * src);
#ifdef HAVE_ARCH_MEMCPY
#ifdef CONFIG_ROCKCREEK

View file

@ -97,17 +97,19 @@ static inline int register_task(void)
*
* @return 0 in any case
*/
static inline int jump_to_user_code(uint32_t ep, uint32_t stack)
static inline int jump_to_user_code(size_t ep, size_t stack)
{
asm volatile ("push $0x23; push %0; push $0x1B; push %1" :: "r"(stack), "r"(ep)); // fake stack, see Intel Reference Manual, Vol 1, 6.3.6
#ifdef CONFIG_X86_32
asm volatile ("mov %0, %%ds; mov %0, %%fs; mov %0, %%gs; mov %0, %%es" :: "r"(0x23));
asm volatile ("push $0x23; push %0; push $0x1B; push %1" :: "r"(stack), "r"(ep));
asm volatile ("lret" ::: "cc");
asm volatile ("mov %0, %%ds; mov %0, %%fs; mov %0, %%gs; mov %0, %%es" :: "r"(0x23)); // update segment registers
asm volatile ("lret" ::: "cc"); // far return to user level code
#elif defined (CONFIG_X86_64)
asm volatile ("lretq" ::: "cc"); // far return to user level code
#endif
return 0;
#else
return -22;
#endif
}
#ifdef __cplusplus

View file

@ -0,0 +1,74 @@
/*
* Copyright 2010 Steffen Vogel, Chair for Operating Systems,
* RWTH Aachen University
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* This file is part of MetalSVM.
*/
/**
* @author Steffen Vogel
* @file arch/x86/include/asm/uart.h
* @brief COM port related code
*/
#ifndef __ARCH_UART_H__
#define __ARCH_UART_H__
#include <metalsvm/stddef.h>
#ifdef __cplusplus
extern "C" {
#endif
#ifdef CONFIG_UART
/** @brief Initialize VGA output and clear the screen */
void uart_init(void);
/** @brief Send a single character to the uart
*
* @return The original input character casted to int
*/
void uart_putchar(char c);
/** @brief Receive a single character on the uart
*
* @return The original input character casted to int
*/
char uart_getchar(void);
/** @brief Simple string output on screen.
*
* If you want a new line you will have to "\\n".
*
* @return Length of output in bytes
*/
void uart_puts(const char *str);
/** @brief Simple string output on screen.
*
* If you want a new line you will have to "\\n".
*
* @return Length of output in bytes
*/
int uart_gets(char *str, size_t len);
#endif
#ifdef __cplusplus
}
#endif
#endif

View file

@ -1,4 +1,4 @@
C_source := gdt.c kb.c timer.c irq.c isrs.c idt.c vga.c multiboot.c apic.c pci.c processor.c
C_source := gdt.c kb.c timer.c irq.c isrs.c idt.c vga.c uart.c multiboot.c apic.c pci.c processor.c pmc.c
ASM_source := entry$(BIT).asm string$(BIT).asm
MODULE := arch_x86_kernel

View file

@ -27,7 +27,7 @@
#include <metalsvm/init.h>
#include <metalsvm/page.h>
#include <metalsvm/spinlock.h>
#include <metalsvm/mmu.h>
#include <metalsvm/memory.h>
#include <metalsvm/tasks.h>
#include <asm/irq.h>
#include <asm/idt.h>
@ -387,19 +387,21 @@ void smp_start(uint32_t id)
kprintf("Application processor %d is entering its idle task\n", apic_cpu_id());
// initialize default cpu features
#ifdef CONFIG_X86_32
// initialization for x86_64 is done in smp_entry()
cpu_init();
#endif
// use the same gdt like the boot processors
gdt_flush();
// install IDT
idt_install();
// On 64bit system, paging is already enabled
#ifdef CONFIG_X86_32
/* enable paging */
write_cr3((size_t)get_boot_pgd());
write_cr3((size_t) get_boot_page_map());
i = read_cr0();
i = i | (1 << 31);
write_cr0(i);
@ -460,7 +462,8 @@ int smp_init(void)
for(i=1; (i<ncores) && (i<MAX_CORES); i++)
{
/*
* dirty hack: Copy 16bit startup code (see tools/smp_setup.asm)
* Dirty hack aka. runtime linking:
* Copy 16bit startup code (see tools/smp_setup.asm)
* to a 16bit address. Wakeup the other cores via IPI. They start
* at this address in real mode, switch to protected and finally
* they jump to smp_main.
@ -476,9 +479,9 @@ int smp_init(void)
#ifdef CONFIG_X86_32
*((uint32_t*) (bootaddr+j)) = (uint32_t) smp_start;
kprintf("Set entry point of the application processors at 0x%x\n", (uint32_t) smp_start);
#else
#elif defined(CONFIG_X86_64)
*((uint32_t*) (bootaddr+j)) = (uint32_t) smp_entry;
kprintf("Set entry point of the application processors at 0x%lx\n", (size_t) smp_entry);
kprintf("Set entry point of the application processors at 0x%x\n", (uint32_t) smp_entry);
#endif
}
@ -492,12 +495,13 @@ int smp_init(void)
*((uint32_t*) (bootaddr+j)) = (uint32_t) esp;
if ((int) esp < 0)
kprintf("Invalid stack value\n");
kprintf("Set stack of the application processors to 0x%x\n", esp);
}
}
//kprintf("size of the boot_code %d\n", sizeof(boot_code));
err = wakeup_ap((uint32_t)bootaddr, i);
//kprintf("Size of the boot_code: %d\n", sizeof(boot_code));
err = wakeup_ap((uint32_t) bootaddr, i);
if (err)
kprintf("Unable to wakeup application processor %d: %d\n", i, err);
@ -553,30 +557,33 @@ int map_apic(void)
lapic = map_region(0 /*lapic*/, lapic, 1, MAP_KERNEL_SPACE|MAP_NO_CACHE);
if (BUILTIN_EXPECT(!lapic, 0))
return -ENXIO;
#else
if (lapic != (size_t)&kernel_start - 0x1000) {
kprintf("Upps! Kernel has to remap LAPIC!\n");
lapic = map_region(0 /*lapic*/, lapic, 1, MAP_KERNEL_SPACE|MAP_NO_CACHE);
if (BUILTIN_EXPECT(!lapic, 0))
return -ENXIO;
}
#endif
vma_add(LAPIC_ADDR, LAPIC_ADDR + PAGE_SIZE, VMA_READ|VMA_WRITE);
vma_add(PAGE_CEIL((size_t) apic_config),
PAGE_FLOOR((size_t) apic_config + sizeof(apic_config_table_t)),
VMA_READ|VMA_WRITE|VMA_EXECUTE|VMA_CACHEABLE);
vma_add(PAGE_CEIL((size_t) apic_mp),
PAGE_FLOOR((size_t) apic_mp + sizeof(apic_mp_t)),
VMA_READ|VMA_WRITE|VMA_EXECUTE|VMA_CACHEABLE);
kprintf("Mapped LAPIC at 0x%x\n", lapic);
if (ioapic) {
size_t old = 0;
ioapic = (ioapic_t*) map_region(0 /*(size_t)ioapic*/, (size_t) ioapic, 1, MAP_KERNEL_SPACE|MAP_NO_CACHE);
kprintf("Mapped IOAPIC at 0x%x\n", ioapic);
// map all processor entries
for(i=0; i<MAX_CORES; i++) {
if (apic_processors[i] && (old != (((size_t)apic_processors[i]) & 0xFFFFF000)))
old = map_region(((size_t) apic_processors[i]) & 0xFFFFF000, ((size_t) apic_processors[i]) & 0xFFFFF000, 1, MAP_REMAP|MAP_KERNEL_SPACE|MAP_NO_CACHE);
}
ioapic = (ioapic_t*) map_region(IOAPIC_ADDR, (size_t) ioapic, 1, MAP_KERNEL_SPACE|MAP_NO_CACHE);
vma_add(IOAPIC_ADDR, IOAPIC_ADDR + PAGE_SIZE, VMA_READ|VMA_WRITE);
kprintf("Mapped IOAPIC at %p\n", ioapic);
kprintf("IOAPIC version: 0x%x\n", ioapic_version());
kprintf("Max Redirection Entry: %u\n", ioapic_max_redirection_entry());
// map all processor entries
size_t old = 0;
for(i=0; i<MAX_CORES; i++) {
if (apic_processors[i] && (old != (((size_t) apic_processors[i]) & PAGE_MASK))) {
old = map_region(((size_t) apic_processors[i]) & PAGE_MASK, ((size_t) apic_processors[i]) & PAGE_MASK, 1, MAP_REMAP|MAP_KERNEL_SPACE|MAP_NO_CACHE);
}
}
}
return 0;
@ -716,20 +723,19 @@ static int apic_probe(void)
goto found_mp;
}
}
}
mmap++;
}
}
#endif
#endif
found_mp:
#endif
if (!apic_mp)
goto no_mp;
kprintf("Found MP config table at 0x%x\n", apic_mp);
kprintf("System uses Multiprocessing Specification 1.%u\n", apic_mp->version);
kprintf("MP features 1: %u\n", apic_mp->features[0]);
kprintf("System uses MultiProcessing Specification 1.%u\n", apic_mp->version);
kprintf("MultiProcessing features 1: %u\n", apic_mp->features[0]);
if (apic_mp->features[0]) {
kputs("Currently, MetalSVM supports only multiprocessing via the MP config tables!\n");
@ -737,67 +743,75 @@ found_mp:
}
apic_config = (apic_config_table_t*) ((size_t) apic_mp->mp_config);
if (!apic_config || strncmp((void*) &apic_config->signature, "PCMP", 4) !=0) {
if (!apic_config || strncmp((void*) &apic_config->signature, "PCMP", 4) != 0) {
kputs("Invalid MP config table\n");
goto no_mp;
}
kprintf("MultiProcessing OEM: %.8s\n", apic_config->oem_id);
kprintf("MultiProcessing product: %.12s\n", apic_config->product_id);
addr = (size_t) apic_config;
addr += sizeof(apic_config_table_t);
if (addr % 4)
addr += 4 - addr % 4;
addr += 4 - addr % 4; // align to dword
// search the ISA bus => required to redirect the IRQs
for(i=0; i<apic_config->entry_count; i++) {
switch(*((uint8_t*) addr)) {
case 0:
addr += 20;
break;
case 1: {
apic_bus_entry_t* mp_bus;
for(i=0, count=0; i<apic_config->entry_count; i++) {
uint8_t type = * (uint8_t*) addr;
switch (type) {
case 0: { // CPU
apic_processor_entry_t* cpu = (apic_processor_entry_t*) addr;
mp_bus = (apic_bus_entry_t*) addr;
if (mp_bus->name[0] == 'I' && mp_bus->name[1] == 'S' &&
mp_bus->name[2] == 'A')
isa_bus = i;
}
default:
addr += 8;
if (i < MAX_CORES) {
if (cpu->cpu_flags & 0x01) // enabled?
apic_processors[i] = cpu;
if (cpu->cpu_flags & 0x03) // enabled boot processor?
boot_processor = i;
}
kprintf("Found CPU %d with id = %#4x, version = %#4x, signature = %#6x, feature = %#6x\n",
count, cpu->id, cpu->version, cpu->cpu_signature, cpu->cpu_feature);
count++;
}
addr += 20;
break;
case 1: { // BUS
apic_bus_entry_t* bus = (apic_bus_entry_t*) addr;
kprintf("Found %.6s bus with id %u\n", bus->name, bus->bus_id);
if (!strncmp(bus->name, "ISA", 3))
isa_bus = bus->bus_id;
}
addr += 8;
break;
case 2: {// IO APIC
apic_io_entry_t* io_entry = (apic_io_entry_t*) addr;
ioapic = (ioapic_t*) ((size_t) io_entry->addr);
kprintf("Found IOAPIC at 0x%x\n", ioapic);
}
addr += 8;
break;
case 3: {// IO INT
apic_ioirq_entry_t* extint = (apic_ioirq_entry_t*) addr;
if (extint->src_bus == isa_bus) {
irq_redirect[extint->src_irq] = extint->dest_intin;
kprintf("Redirect interrupt %u -> %u\n", extint->src_irq, extint->dest_intin);
}
}
addr += 8;
break;
case 4: // Local INT
default:
addr += 8;
}
}
addr = (size_t) apic_config;
addr += sizeof(apic_config_table_t);
if (addr % 4)
addr += 4 - addr % 4;
for(i=0, count=0; i<apic_config->entry_count; i++) {
if (*((uint8_t*) addr) == 0) { // cpu entry
if (i < MAX_CORES) {
apic_processors[i] = (apic_processor_entry_t*) addr;
if (!(apic_processors[i]->cpu_flags & 0x01)) // is the processor usable?
apic_processors[i] = NULL;
else if (apic_processors[i]->cpu_flags & 0x02)
boot_processor = i;
}
count++;
addr += 20;
} else if (*((uint8_t*) addr) == 2) { // IO_APIC
apic_io_entry_t* io_entry = (apic_io_entry_t*) addr;
ioapic = (ioapic_t*) ((size_t) io_entry->addr);
addr += 8;
kprintf("Found IOAPIC at 0x%x\n", ioapic);
} else if (*((uint8_t*) addr) == 3) { // IO_INT
apic_ioirq_entry_t* extint = (apic_ioirq_entry_t*) addr;
if (extint->src_bus == isa_bus) {
irq_redirect[extint->src_irq] = extint->dest_intin;
kprintf("Redirect irq %u -> %u\n", extint->src_irq, extint->dest_intin);
}
addr += 8;
} else addr += 8;
}
kprintf("Found %u cores\n", count);
if (count > MAX_CORES) {
kputs("Found too many cores! Increase the macro MAX_CORES!\n");
goto no_mp;
@ -822,8 +836,8 @@ check_lapic:
kprintf("Found APIC at 0x%x\n", lapic);
#ifdef CONFIG_X86_64
// On a x64 system, we already map the lapic below the kernel
lapic = (size_t)&kernel_start - 0x1000;
// On a x64 system, we already mapped the LAPIC at LAPIC_ADDR
lapic = LAPIC_ADDR;
#endif
kprintf("Maximum LVT Entry: 0x%x\n", apic_lvt_entries());
kprintf("APIC Version: 0x%x\n", apic_version());

View file

@ -29,7 +29,6 @@
SECTION .mboot
global start
start:
mov byte [msg], 'H'
jmp stublet
; This part MUST be 4byte aligned, so we solve that issue using 'ALIGN 4'
@ -38,10 +37,10 @@ mboot:
; Multiboot macros to make a few lines more readable later
MULTIBOOT_PAGE_ALIGN equ 1<<0
MULTIBOOT_MEMORY_INFO equ 1<<1
; MULTIBOOT_AOUT_KLUDGE equ 1<<16
; MULTIBOOT_AOUT_KLUDGE equ 1<<16
MULTIBOOT_HEADER_MAGIC equ 0x1BADB002
MULTIBOOT_HEADER_FLAGS equ MULTIBOOT_PAGE_ALIGN | MULTIBOOT_MEMORY_INFO ; | MULTIBOOT_AOUT_KLUDGE
MULTIBOOT_CHECKSUM equ -(MULTIBOOT_HEADER_MAGIC + MULTIBOOT_HEADER_FLAGS)
MULTIBOOT_CHECKSUM equ -(MULTIBOOT_HEADER_MAGIC + MULTIBOOT_HEADER_FLAGS)
EXTERN code, bss, end
; This is the GRUB Multiboot header. A boot signature
@ -49,8 +48,6 @@ mboot:
dd MULTIBOOT_HEADER_FLAGS
dd MULTIBOOT_CHECKSUM
msg db "?ello from MetalSVM kernel!!", 0
SECTION .text
ALIGN 4
stublet:
@ -70,7 +67,7 @@ stublet:
; jump to the boot processors's C code
extern main
call main
jmp $
jmp $ ; infinitive loop
global cpu_init
cpu_init:
@ -112,7 +109,7 @@ global read_ip
read_ip:
mov eax, [esp+4]
pop DWORD [eax] ; Get the return address
add esp, 4 ; Dirty Hack! read_ip cleanup the stacl
add esp, 4 ; Dirty Hack! read_ip cleanup the stack
jmp [eax] ; Return. Can't use RET because return
; address popped off the stack.

View file

@ -30,7 +30,7 @@ extern kernel_end
extern apic_mp
; We use a special name to map this section at the begin of our kernel
; => Multiboot needs its magic number at the begin of the kernel
; => Multiboot needs its magic number at the beginning of the kernel
SECTION .mboot
global start
start:
@ -42,19 +42,19 @@ mboot:
; Multiboot macros to make a few lines more readable later
MULTIBOOT_PAGE_ALIGN equ 1<<0
MULTIBOOT_MEMORY_INFO equ 1<<1
; MULTIBOOT_AOUT_KLUDGE equ 1<<16
; MULTIBOOT_AOUT_KLUDGE equ 1<<16
MULTIBOOT_HEADER_MAGIC equ 0x1BADB002
MULTIBOOT_HEADER_FLAGS equ MULTIBOOT_PAGE_ALIGN | MULTIBOOT_MEMORY_INFO ; | MULTIBOOT_AOUT_KLUDGE
MULTIBOOT_CHECKSUM equ -(MULTIBOOT_HEADER_MAGIC + MULTIBOOT_HEADER_FLAGS)
MULTIBOOT_CHECKSUM equ -(MULTIBOOT_HEADER_MAGIC + MULTIBOOT_HEADER_FLAGS)
EXTERN code, bss, end
; This is the GRUB Multiboot header. A boot signature
dd MULTIBOOT_HEADER_MAGIC
dd MULTIBOOT_HEADER_FLAGS
dd MULTIBOOT_CHECKSUM
ALIGN 4
; we need already a valid GDT to switch in the 64bit modus
; we need already a valid GDT to switch in the 64bit mode
GDT64: ; Global Descriptor Table (64-bit).
.Null: equ $ - GDT64 ; The null descriptor.
dw 0 ; Limit (low).
@ -81,112 +81,90 @@ GDT64: ; Global Descriptor Table (64-bit).
dw $ - GDT64 - 1 ; Limit.
dq GDT64 ; Base.
times 256 DD 0
times 256 DD 0 ; Stack for booting
startup_stack:
PAGE_MAP_ENTRIES equ (1<<9)
PAGE_SIZE equ (1<<12)
SECTION .data
; create default page tables for the 64bit kernel
global boot_pgd ; aka PML4
ALIGN 4096 ; of course, the page tables have to be page aligned
NOPTS equ 512
boot_pgd times 512 DQ 0
boot_pdpt times 512 DQ 0
boot_pd times 512 DQ 0
boot_pt times (NOPTS*512) DQ 0
; Create default page tables for the 64bit kernel
global boot_pml4
ALIGN PAGE_SIZE ; of course, the page tables have to be page aligned
boot_pml4 times PAGE_MAP_ENTRIES DQ 0
boot_pdpt times PAGE_MAP_ENTRIES DQ 0
boot_pgd times PAGE_MAP_ENTRIES DQ 0
boot_pgt times (KERNEL_SPACE/PAGE_SIZE) DQ 0
SECTION .text
ALIGN 8
%if MAX_CORES > 1
global smp_entry
smp_entry:
; enable caching, disable paging and fpu emulation
and eax, 0x1ffffffb
; ...and turn on FPU exceptions
or eax, 0x22
mov cr0, eax
; clears the current pgd entry
xor eax, eax
mov cr3, eax
; at this stage, we disable the SSE support
mov eax, cr4
and eax, 0xfffbf9ff
mov cr4, eax
; initialize page table
mov edi, boot_pgd
; Initialize cpu features
call cpu_init
; Initialize cr3 register
mov edi, boot_pml4
mov cr3, edi
; we need to enable PAE modus
; Enable PAE
mov eax, cr4
or eax, 1 << 5
mov cr4, eax
; switch to the compatibility mode (which is part of long mode)
; Enable longmode (compatibility mode)
mov ecx, 0xC0000080
rdmsr
or eax, 1 << 8
or eax, (1 << 8) | (1 << 11) ; IA32_EFER.LME = 1, IA32_EFER.NXE = 1
wrmsr
; enable paging
; Enable paging
mov eax, cr0
or eax, 1 << 31 | 1 << 0 ; Set the PG-bit, which is the 31nd bit, and the PM-bit, which is the 0th bit.
mov cr0, eax
or eax, 1 << 31 | 1 << 0 ; Set the PG-bit, which is the 31nd bit, and the PE-bit, which is the 0th bit.
mov cr0, eax ; According to the multiboot spec the PE-bit has to be set by bootloader already!
mov edi, [esp+4] ; set argumet for smp_start
; Jump to 64-bit longmode
mov edi, [esp+4] ; Set argumet for smp_start
lgdt [GDT64.Pointer] ; Load the 64-bit global descriptor table.
jmp GDT64.Code:smp_start64 ; Set the code segment and enter 64-bit long mode.
jmp $ ; endless loop
%endif
search_apic:
; Search MP Floating Pointer Structure
search_mps:
push ebp
mov ebp, esp
push ecx
xor eax, eax
mov ecx, [ebp+8]
L1:
.l1:
cmp [ecx], DWORD 0x5f504d5f ; MP_FLT_SIGNATURE
jne L2
jne .l2
mov al, BYTE [ecx+9]
cmp eax, 4
ja L2
ja .l2
mov al, BYTE [ecx+11]
cmp eax, 0
jne L2
jne .l2
mov eax, ecx
jmp L3
jmp .l3
L2:
.l2:
add ecx, 4
cmp ecx, [ebp+12]
jb L1
jb .l1
xor eax, eax
L3:
.l3:
pop ecx
pop ebp
ret
ALIGN 4
stublet:
mov esp, startup_stack-4
push ebx ; save pointer to the multiboot structure
mov eax, cr0
; enable caching, disable paging and fpu emulation
and eax, 0x1ffffffb
; ...and turn on FPU exceptions
or eax, 0x22
mov cr0, eax
; clears the current pgd entry
xor eax, eax
mov cr3, eax
; at this stage, we disable the SSE support
mov eax, cr4
and eax, 0xfffbf9ff
mov cr4, eax
; do we have the instruction cpuid?
check_longmode:
; Check for cpuid instruction
pushfd
pop eax
mov ecx, eax
@ -198,59 +176,22 @@ stublet:
push ecx
popfd
xor eax, ecx
jz Linvalid
; cpuid > 0x80000000?
jz .unsupported
; Check for extended cpu features (cpuid > 0x80000000)
mov eax, 0x80000000
cpuid
cmp eax, 0x80000001
jb Linvalid ; It is less, there is no long mode.
; do we have a long mode?
jb .unsupported ; It is less, there is no long mode.
; Check if longmode is supported
mov eax, 0x80000001
cpuid
test edx, 1 << 29 ; Test if the LM-bit, which is bit 29, is set in the D-register.
jz Linvalid ; They aren't, there is no long mode.
jz .unsupported ; They aren't, there is no long mode.
ret
.unsupported:
jmp $
; initialize page table
mov edi, boot_pgd
mov cr3, edi
; So lets make PML4T[0] point to the PDPT and so on:
mov DWORD [edi], boot_pdpt ; Set the double word at the destination index to pdpt.
or DWORD [edi], 0x00000003 ; Set present and writeable bit
mov edi, boot_pdpt
mov DWORD [edi], boot_pd ; Set the double word at the destination index to pd.
or DWORD [edi], 0x00000003 ; Set present and writeable bit
mov edi, boot_pd
mov ebx, boot_pt
mov ecx, NOPTS
L0:
mov DWORD [edi], ebx ; Set the double word at the destination index to pt.
or DWORD [edi], 0x00000003 ; Set present and writeable bit
add edi, 8
add ebx, 0x1000
loop L0
%ifdef CONFIG_VGA
; map the VGA address into the virtual address space
mov edi, 0xB8000
shr edi, 9 ; (edi >> 12) * 8
add edi, boot_pt
mov ebx, 0xB8000
or ebx, 0x00000013
mov DWORD [edi], ebx
%endif
; map multiboot structure into the virtual address space
mov edi, [esp]
and edi, 0xFFFFF000
shr edi, 9 ; (edi >> 12) * 8
add edi, boot_pt
mov ebx, [esp]
and ebx, 0xFFFFF000
or ebx, 0x00000003
mov DWORD [edi], ebx
; check if lapic is available
check_lapic:
push eax
push ebx
push ecx
@ -259,123 +200,196 @@ L0:
cpuid
and edx, 0x200
cmp edx, 0
je no_lapic
; map lapic at 0xFEE00000 below the kernel
mov edi, kernel_start - 0x1000
je .unsupported
; Map lapic at 0xFEE00000
mov edi, LAPIC_ADDR
shr edi, 9 ; (edi >> 12) * 8
add edi, boot_pt
mov ebx, 0xFEE00000
or ebx, 0x00000013
add edi, boot_pgt
mov ebx, 0xFEE00000 ; LAPIC base address
or ebx, 0x113 ; set present, global, writable and cache disable bits
mov DWORD [edi], ebx
no_lapic:
mov DWORD [edi+4], 0x80000000 ; set execution disable bit in higher half
.unsupported:
pop edx
pop ecx
pop ebx
pop eax
ret
; search APIC
cpu_init:
mov eax, cr0
; Enable caching, disable paging and fpu emulation
and eax, 0x1ffffffb
; ...and turn on FPU exceptions
or eax, 0x22
mov cr0, eax
; Clear the current pgd entry
xor eax, eax
mov cr3, eax
; at this stage, we disable the SSE support
mov eax, cr4
and eax, 0xfffbf9ff
mov cr4, eax
ret
; Identity map a single page at address eax
identity_page:
push edi
push ebx
mov edi, eax
and edi, 0xFFFFF000 ; page align in lower half
shr edi, 9 ; (edi >> 12) * 8 (index for boot_pgt)
add edi, boot_pgt
mov ebx, eax
and ebx, 0xFFFFF000 ; page align lower half
or ebx, 0x113 ; set present, global, writable and cache disable bits
mov DWORD [edi], ebx
mov DWORD [edi+4], 0x80000000 ; set execution disable bit in higher half
pop ebx
pop edi
ret
ALIGN 4
stublet:
mov esp, startup_stack-4
; Save pointer to the Multiboot structure
push ebx
; Initialize cpu features
call cpu_init
; Check if longmode is supported
call check_longmode
; Check if lapic is available
call check_lapic
; Find MP Floating Pointer structure
push DWORD 0x100000
push DWORD 0xF0000
call search_apic
call search_mps
add esp, 8
cmp eax, 0
jne La
jne map_mps
push DWORD 0xA0000
push DWORD 0x9F000
call search_apic
call search_mps
add esp, 8
cmp eax, 0
je Lb
je map_vga
La:
; map MP Floating Pointer Structure
map_mps:
; Map MP Floating Pointer structure
mov DWORD [apic_mp], eax
mov edi, eax
and edi, 0xFFFFF000
shr edi, 9 ; (edi >> 12) * 8
add edi, boot_pt
mov ebx, eax
and ebx, 0xFFFFF000
or ebx, 0x00000013
mov DWORD [edi], ebx
call identity_page
; map mp_config
mov edi, [eax+4]
and edi, 0xFFFFF000
shr edi, 9 ; (edi >> 12) * 8
add edi, boot_pt
mov ebx, [eax+4]
and ebx, 0xFFFFF000
or ebx, 0x00000013
mov DWORD [edi], ebx
; Map MP Configuration table
mov eax, [eax+4] ; Offset for physical address of MP table
call identity_page
Lb:
map_vga:
%ifdef CONFIG_VGA
; Map VGA textmode plane
mov eax, 0xB8000
call identity_page
%endif
map_multiboot:
; Map Multiboot structure
mov eax, [esp] ; Pointer is still on the stack
call identity_page
; Map Multiboot memory map
test DWORD [eax], (1 << 6) ; check if mmap_* fields are valid
jz init_paging
mov eax, [eax+48]
call identity_page
init_paging:
mov edi, boot_pml4
mov cr3, edi
mov DWORD [edi], boot_pdpt
or DWORD [edi], 0x103 ; Set present, global and writable flags
mov edi, boot_pdpt
mov DWORD [edi], boot_pgd
or DWORD [edi], 0x103 ; Set present, global and writable flags
mov edi, boot_pgd
mov ebx, boot_pgt
mov ecx, PAGE_MAP_ENTRIES ; Map all boot_pgt to the kernel space
.l1:
mov DWORD [edi], ebx
or DWORD [edi], 0x103 ; Set present, global and writable flags
add edi, 8
add ebx, 0x1000
loop .l1
map_kernel:
mov edi, kernel_start
shr edi, 9 ; (kernel_start >> 12) * 8
add edi, boot_pt
shr edi, 9 ; (edi >> 12) * 8 (index for boot_pgt)
add edi, boot_pgt
mov ebx, kernel_start
or ebx, 0x00000003
or ebx, 0x103 ; set present, global and writable flags
mov ecx, kernel_end ; determine kernel size in number of pages
sub ecx, kernel_start
shr ecx, 12
inc ecx
Lc:
mov DWORD [edi], ebx ; Set the double word at the destination index to the B-register.
.l1:
mov DWORD [edi], ebx
add edi, 8
add ebx, 0x1000
loop Lc
loop .l1
; we need to enable PAE modus
; Enable PAE
mov eax, cr4
or eax, 1 << 5
mov cr4, eax
; switch to the compatibility mode (which is part of long mode)
; Enable longmode (compatibility mode)
mov ecx, 0xC0000080
rdmsr
or eax, 1 << 8
or eax, (1 << 8) | (1 << 11) ; IA32_EFER.LME = 1, IA32_EFER.NXE = 1
wrmsr
; enable paging
; Enable paging
mov eax, cr0
or eax, 1 << 31 | 1 << 0 ; Set the PG-bit, which is the 31nd bit, and the PM-bit, which is the 0th bit.
or eax, (1 << 31) | (1 << 0) ; Set the PG-bit, which is the 31nd bit, and the PE-bit, which is the 0th bit.
mov cr0, eax
pop ebx ; restore pointer to multiboot structure
lgdt [GDT64.Pointer] ; Load the 64-bit global descriptor table.
jmp GDT64.Code:start64 ; Set the code segment and enter 64-bit long mode.
Linvalid:
jmp $
; Jump to 64-bit longmode
pop ebx ; Restore pointer to multiboot structure
lgdt [GDT64.Pointer] ; Load the 64-bit global descriptor table.
jmp GDT64.Code:start64 ; Set the code segment and enter 64-bit long mode.
[BITS 64]
start64:
; initialize segment registers
; Initialize segment registers
mov ax, GDT64.Data
mov ds, ax
mov es, ax
mov fs, ax
mov gs, ax
mov ss, ax
; set default stack pointer
; Set default stack pointer
mov rsp, boot_stack
add rsp, KERNEL_STACK_SIZE-16
; interpret multiboot information
; Interpret multiboot information
extern multiboot_init
mov rdi, rbx
call multiboot_init
; jump to the boot processors's C code
; Jump to the boot processors's C code
extern main
call main
jmp $
%if MAX_CORES > 1
smp_start64:
; initialize segment registers
; Initialize segment registers
mov ax, GDT64.Data
mov ds, ax
mov es, ax
@ -383,29 +397,12 @@ smp_start64:
mov gs, ax
mov ss, ax
; jump to the boot processors's C code
; Jump to the boot processors's C code
extern smp_start
call smp_start
jmp $
%endif
global cpu_init
cpu_init:
; mov eax, cr0
; enable caching, disable paging and fpu emulation
; and eax, 0x1ffffffb
; ...and turn on FPU exceptions
; or eax, 0x22
; mov cr0, eax
; clears the current pgd entry
; xor eax, eax
; mov cr3, eax
; at this stage, we disable the SSE support
; mov eax, cr4
; and eax, 0xfffbf9ff
; mov cr4, eax
; ret
; This will set up our new segment registers and is declared in
; C as 'extern void gdt_flush();'
global gdt_flush
@ -414,13 +411,6 @@ gdt_flush:
lgdt [gp]
ret
; determines the current instruction pointer (after the jmp)
global read_eip
read_eip:
pop rax ; Get the return address
jmp rax ; Return. Can't use RET because return
; address popped off the stack.
; In just a few pages in this tutorial, we will add our Interrupt
; Service Routines (ISRs) right here!
global isr0
@ -742,41 +732,41 @@ extern syscall_handler
; used to realize system calls
isrsyscall:
push r15
push r14
push r13
push r12
cli ; disable interrupts during prologue
; save caller saved registers
push r11
push r10
push r9
push r8
push rdi
push rsi
push rbp
push rsp
push rbx
push rdx
push rcx
push rax
mov rdi, rsp
; set kernel data segmenets
mov ax, 0x10
mov ds, ax
; x86-64 ABI calling convention (see newlib/src/libgloss/syscall.h)
mov r8, rbx
mov r9, rax
mov rax, 0 ; we've not used vector registers for this va_arg call
sti ; enable interrupts during syscall
call syscall_handler
cli ; disable interrupts during prologue
pop rax
; restore caller saved registers
pop rcx
pop rdx
pop rbx
add rsp, 8
pop rbp
pop rsi
pop rdi
pop r8
pop r9
pop r10
pop r11
pop r12
pop r13
pop r14
iretq
global irq0

View file

@ -50,7 +50,7 @@ size_t* get_current_stack(void)
#endif
// use new page table
write_cr3(virt_to_phys((size_t)curr_task->pgd));
write_cr3(virt_to_phys((size_t) curr_task->page_map));
return curr_task->last_stack_pointer;
}
@ -59,7 +59,7 @@ int arch_fork(task_t* task)
{
struct state* state;
task_t* curr_task = per_core(current_task);
size_t esp, state_size;
size_t state_size;
if (BUILTIN_EXPECT(!task, 0))
return -EINVAL;
@ -78,6 +78,7 @@ int arch_fork(task_t* task)
memcpy(task->stack, curr_task->stack, KERNEL_STACK_SIZE);
#ifdef CONFIG_X86_32
size_t esp;
asm volatile ("mov %%esp, %0" : "=m"(esp));
esp -= (size_t) curr_task->stack;
esp += (size_t) task->stack;
@ -107,8 +108,48 @@ int arch_fork(task_t* task)
// This will be the entry point for the new task. read_ip cleanups the stack
asm volatile ("push %0; call read_ip" :: "r"(&state->eip) : "%eax");
#else
#warning Currently, not supported!
return -1;
size_t rsp;
asm volatile ("mov %%rsp, %0" : "=m"(rsp));
rsp -= (size_t) curr_task->stack;
rsp += (size_t) task->stack;
state = (struct state*) (rsp - state_size);
//memset(state, 0x00, state_size);
asm volatile ("push %rax");
asm volatile ("push %rcx");
asm volatile ("push %rdx");
asm volatile ("push %rbx");
asm volatile ("push %rbp");
asm volatile ("push %rsi");
asm volatile ("push %rdi");
asm volatile ("push %r8");
asm volatile ("push %r9");
asm volatile ("push %r10");
asm volatile ("push %r11");
asm volatile ("pop %0" : "=m"(state->r11));
asm volatile ("pop %0" : "=m"(state->r10));
asm volatile ("pop %0" : "=m"(state->r9));
asm volatile ("pop %0" : "=m"(state->r8));
asm volatile ("pop %0" : "=m"(state->rdi));
asm volatile ("pop %0" : "=m"(state->rsi));
asm volatile ("pop %0" : "=m"(state->rbp));
asm volatile ("pop %0" : "=m"(state->rbx));
asm volatile ("pop %0" : "=m"(state->rdx));
asm volatile ("pop %0" : "=m"(state->rcx));
asm volatile ("pop %0" : "=m"(state->rax));
state->rsp = rsp;
task->last_stack_pointer = (size_t*) state;
state->int_no = 0xB16B00B5;
state->error = 0xC03DB4B3;
state->cs = 0x08;
state->ss = 0x10;
asm volatile ("pushf; pop %0" : "=m"(state->rflags)); // store the current RFLAGS
asm volatile ("leaq (%%rip), %0;": "=r"(state->rip)); // store current instruction pointer
state->rflags |= (1 << 9); // enable interrupts
#endif
return 0;
@ -266,7 +307,7 @@ void gdt_install(void)
gdt_set_gate(2, 0, limit,
GDT_FLAG_RING0 | GDT_FLAG_SEGMENT | GDT_FLAG_DATASEG | GDT_FLAG_PRESENT,
GDT_FLAG_4K_GRAN | mode);
/*
* Create code segement for userspace applications (ring 3)
*/

View file

@ -192,8 +192,8 @@ static const char *exception_messages[] = {
"Breakpoint", "Into Detected Overflow", "Out of Bounds", "Invalid Opcode",
"No Coprocessor", "Double Fault", "Coprocessor Segment Overrun", "Bad TSS",
"Segment Not Present", "Stack Fault", "General Protection Fault", "Page Fault",
"Unknown Interrupt", "Coprocessor Fault", "Alignment Check", "Machine Check",
"Reserved", "Reserved", "Reserved", "Reserved", "Reserved",
"Unknown Interrupt", "Math Fault", "Alignment Check", "Machine Check",
"SIMD Floating-Point", "Virtualization", "Reserved", "Reserved", "Reserved",
"Reserved", "Reserved", "Reserved", "Reserved", "Reserved", "Reserved",
"Reserved", "Reserved" };
@ -208,13 +208,18 @@ static const char *exception_messages[] = {
static void fault_handler(struct state *s)
{
if (s->int_no < 32) {
kputs(exception_messages[s->int_no]);
task_t* task = per_core(current_task);
#ifdef CONFIG_X86_32
kprintf(" Exception (%d) at 0x%x:0x%x on core %u, error code 0x%x, eflags 0x%x\n",
s->int_no, s->cs, s->eip, CORE_ID, s->error, s->eflags);
kprintf("%s Exception (%d) at cs:eip = %#x:%#lx, core = %u, task = %u, error = %#x\n",
"Register state: eflags = %#lx, eax = %#lx, ebx = %#lx, ecx = %#lx, edx = %#lx, edi = %#lx, esi = %#lx, ebp = %#llx, esp = %#lx\n",
exception_messages[s->int_no], s->int_no, s->cs, s->eip, CORE_ID, task->id, s->error,
s->eflags, s->eax, s->ebx, s->ecx, s->edx, s->edi, s->esi, s->ebp, s->esp);
#elif defined(CONFIG_X86_64)
kprintf(" Exception (%d) at 0x%llx:0x%llx on core %u, error code 0x%llx, rflags 0x%llx\n",
s->int_no, s->cs, s->rip, CORE_ID, s->error, s->rflags);
kprintf("%s Exception (%d) at cs:rip = %#x:%#lx, core = %u, task = %u, error = %#lx\n"
"Register state: rflags = %#lx, rax = %#lx, rbx = %#lx, rcx = %#lx, rdx = %#lx, rdi = %#lx, rsi = %#lx, rbp = %#llx, rsp = %#lx\n",
exception_messages[s->int_no], s->int_no, s->cs, s->rip, CORE_ID, task->id, s->error,
s->rflags, s->rax, s->rbx, s->rcx, s->rdx, s->rdi, s->rsi, s->rbp, s->rsp);
#endif
/* Now, we signalize that we have handled the interrupt */

View file

@ -37,7 +37,7 @@ void kb_init(size_t size, tid_t tid) {
}
void kb_finish(void) {
kfree(kb_buffer.buffer, (kb_buffer.maxsize * sizeof(char)));
kfree(kb_buffer.buffer);
kb_buffer.buffer = NULL;
kb_buffer.size = 0;
kb_buffer.maxsize = 0;

206
arch/x86/kernel/pmc.c Normal file
View file

@ -0,0 +1,206 @@
/*
* Copyright 2013 Steffen Vogel, Chair for Operating Systems,
* RWTH Aachen University
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* This file is part of MetalSVM.
*/
/**
* @author Steffen Vogel
* @file arch/x86/kernel/pmc.c
* @brief Simple interface to IA32 Performance Monitor Counters
*
* This implementation is in parts specific for Intel Core 2 Duo Processors!
* General purpose PMCS => pmc_gp_*()
* Fixed function PMCs => pmc_ff_*()
*/
#include <errno.h>
#include <asm/pmc.h>
#include <asm/processor.h>
static struct pmc_caps caps = { 0 };
struct pmc_caps* pmc_init()
{
if (!caps.version) {
uint32_t a, b, c, d;
cpuid(0x0A, &a, &b, &c, &d);
caps.version = (a >> 0) & 0xff;
caps.gp_count = (a >> 8) & 0xff;
caps.gp_width = (a >> 16) & 0xff;
caps.ff_count = (d >> 0) & 0x1f;
caps.ff_width = (d >> 5) & 0xff;
caps.arch_events = (b >> 0) & 0x3f;
// check if IA32_PERF_CAPABILITIES MSR is available
if (caps.version >= 2) {
cpuid(0x01, &a, &b, &c, &d);
if (c & (1 << 15 /* PDCM */))
caps.msr = rdmsr(IA32_PERF_CAPABILITIES);
}
}
return &caps;
}
int pmc_gp_config(uint8_t i, uint16_t event, uint32_t flags, uint8_t umask, uint8_t cmask)
{
if (BUILTIN_EXPECT(i > caps.gp_count, 0))
return -EINVAL;
uint64_t evtsel = flags | event;
evtsel |= (cmask << PMC_EVTSEL_CMASK) | (umask << PMC_EVTSEL_UMASK);
wrmsr(IA32_PERFEVTSEL(i), evtsel);
wrmsr(IA32_PMC(i), 0); // reset counter
return 0;
}
int pmc_ff_config(uint8_t i, uint8_t flags)
{
if (BUILTIN_EXPECT(i > caps.ff_count, 0))
return -EINVAL;
uint64_t ctrl = rdmsr(MSR_PERF_FIXED_CTR_CTRL);
ctrl &= ~(0x0f << i*4); // clear flags
ctrl |= (flags & 0xf) << i*4;
wrmsr(MSR_PERF_FIXED_CTR_CTRL, ctrl);
return 0;
}
inline int pmc_gp_start(uint8_t i)
{
if (BUILTIN_EXPECT(i > caps.gp_count, 0))
return -EINVAL;
wrmsr(IA32_PERFEVTSEL(i), rdmsr(IA32_PERFEVTSEL(i)) | PMC_EVTSEL_EN);
return 0;
}
inline int pmc_gp_stop(uint8_t i)
{
if (BUILTIN_EXPECT(i > caps.gp_count, 0))
return -EINVAL;
wrmsr(IA32_PERFEVTSEL(i), rdmsr(IA32_PERFEVTSEL(i)) & ~PMC_EVTSEL_EN);
return 0;
}
inline int pmc_ff_start(uint8_t i)
{
if (BUILTIN_EXPECT(i > caps.ff_count, 0))
return -EINVAL;
// TODO
return 0;
}
inline int pmc_ff_stop(uint8_t i)
{
if (BUILTIN_EXPECT(i > caps.ff_count, 0))
return -EINVAL;
// TODO
return 0;
}
inline int pmc_start_all()
{
if (BUILTIN_EXPECT(caps.version < 2, 0))
return -EINVAL;
wrmsr(MSR_PERF_GLOBAL_CTRL, -1L);
return 0;
}
inline int pmc_stop_all()
{
if (BUILTIN_EXPECT(caps.version < 2, 0))
return -EINVAL;
wrmsr(MSR_PERF_GLOBAL_CTRL, 0);
return 0;
}
inline int pmc_reset_all()
{
if (BUILTIN_EXPECT(caps.version < 2, 0))
return -EINVAL;
int i;
for (i = 0; i < caps.gp_count; i++)
pmc_gp_write(i, 0);
for (i = 0; i < caps.ff_count; i++)
pmc_ff_write(i, 0);
return 0;
}
inline uint64_t pmc_gp_read(uint8_t i)
{
if (BUILTIN_EXPECT(i > caps.gp_count, 0))
return 0;
return rdmsr(IA32_PMC(i));
return 0;
}
inline uint64_t pmc_ff_read(uint8_t i)
{
if (BUILTIN_EXPECT(i > caps.ff_count, 0))
return 0;
return rdmsr(MSR_PERF_FIXED_CTR(i));
return 0;
}
inline int pmc_gp_write(uint8_t i, uint64_t val)
{
if (BUILTIN_EXPECT(i > caps.gp_count, 0))
return -EINVAL;
if (caps.version >= 2 && caps.msr & (1 << 13 /* FW_WRITE */))
wrmsr(IA32_A_PMC(i), val);
else
wrmsr(IA32_PMC(i), val);
return 0;
}
inline int pmc_ff_write(uint8_t i, uint64_t val)
{
if (BUILTIN_EXPECT(i > caps.ff_count, 0))
return -EINVAL;
wrmsr(MSR_PERF_FIXED_CTR(i), val);
return 0;
}

View file

@ -85,27 +85,49 @@ static void fpu_init_fxsr(union fpu_state* fpu)
fx->mxcsr = 0x1f80;
}
cpu_info_t cpu_info = { 0, 0 };
cpu_info_t cpu_info = { 0, 0, 0, 0 };
static uint32_t cpu_freq = 0;
int cpu_detection(void)
{
uint32_t a, b;
uint32_t a, b, c, d;
size_t cr4;
uint8_t first_time = 0;
if (!cpu_info.feature1) {
first_time = 1;
cpuid(1, &a, &b, &cpu_info.feature2, &cpu_info.feature1);
cpuid(0x80000001, &a, &b, &c, &cpu_info.feature3);
cpuid(0x80000008, &cpu_info.addr_width, &b, &c, &d);
}
if (first_time) {
kprintf("Paging features: %s%s%s%s%s%s%s%s\n",
(cpu_info.feature1 & CPU_FEATUE_PSE) ? "PSE (2/4Mb) " : "",
(cpu_info.feature1 & CPU_FEATURE_PAE) ? "PAE " : "",
(cpu_info.feature1 & CPU_FEATURE_PGE) ? "PGE " : "",
(cpu_info.feature1 & CPU_FEATURE_PAT) ? "PAT " : "",
(cpu_info.feature1 & CPU_FEATURE_PSE36) ? "PSE36 " : "",
(cpu_info.feature3 & CPU_FEATURE_NX) ? "NX " : "",
(cpu_info.feature3 & CPU_FEATURE_1GBHP) ? "PSE (1Gb) " : "",
(cpu_info.feature3 & CPU_FEATURE_LM) ? "LM" : "");
kprintf("Physical adress-width: %u bits\n", cpu_info.addr_width & 0xff);
kprintf("Linear adress-width: %u bits\n", (cpu_info.addr_width >> 8) & 0xff);
}
cr4 = read_cr4();
if (has_fxsr())
cr4 |= 0x200; // set the OSFXSR bit
cr4 |= CR4_OSFXSR;
if (has_sse())
cr4 |= 0x400; // set the OSXMMEXCPT bit
cr4 |= CR4_OSXMMEXCPT;
if (has_pge())
cr4 |= CR4_PGE;
write_cr4(cr4);
if (has_nx())
wrmsr(MSR_EFER, rdmsr(MSR_EFER) | EFER_NXE);
if (first_time && has_sse())
wmb = sfence;
@ -130,7 +152,6 @@ int cpu_detection(void)
}
if (first_time && on_hypervisor()) {
uint32_t c, d;
char vendor_id[13];
kprintf("MetalSVM is running on a hypervisor!\n");
@ -144,7 +165,7 @@ int cpu_detection(void)
kprintf("Hypervisor Vendor Id: %s\n", vendor_id);
kprintf("Maximum input value for hypervisor CPUID info: 0x%x\n", a);
}
return 0;
}

View file

@ -42,38 +42,4 @@ L3:
pop rax
ret
%if 0
; The following function is derived from JamesM's kernel development tutorials
; (http://www.jamesmolloy.co.uk/tutorial_html/)
global copy_page_physical
copy_page_physical:
push esi ; According to __cdecl, we must preserve the contents of ESI
push edi ; and EDI.
pushf ; push EFLAGS, so we can pop it and reenable interrupts
; later, if they were enabled anyway.
cli ; Disable interrupts, so we aren't interrupted.
; Load these in BEFORE we disable paging!
mov edi, [esp+12+4] ; Destination address
mov esi, [esp+12+8] ; Source address
mov edx, cr0 ; Get the control register...
and edx, 0x7fffffff ; and...
mov cr0, edx ; Disable paging.
cld
mov ecx, 0x400 ; 1024*4bytes = 4096 bytes = page size
rep movsd ; copy page
mov edx, cr0 ; Get the control register again
or edx, 0x80000000 ; and...
mov cr0, edx ; Enable paging.
popf ; Pop EFLAGS back.
pop edi ; Get the original value of EDI
pop esi ; and ESI back.
ret
%endif
SECTION .note.GNU-stack noalloc noexec nowrite progbits

72
arch/x86/kernel/uart.c Normal file
View file

@ -0,0 +1,72 @@
/*
* Copyright 2011 Steffen Vogel, Chair for Operating Systems,
* RWTH Aachen University
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* This file is part of MetalSVM.
*/
#include <asm/uart.h>
#include <asm/io.h>
/**
* @author Steffen Vogel <steffen.vogel@rwth-aachen.de>
*/
static inline int is_transmit_empty()
{
return inportb(UART_PORT + 5) & 0x20;
}
static inline int received()
{
return inportb(UART_PORT + 5) & 1;
}
void uart_init()
{
outportb(UART_PORT + 1, 0x00); // Disable all interrupts
outportb(UART_PORT + 3, 0x80); // Enable DLAB (set baud rate divisor)
outportb(UART_PORT + 0, 0x0C); // Set divisor to 12 (lo byte) 9600 baud
outportb(UART_PORT + 1, 0x00); // (hi byte)
outportb(UART_PORT + 3, 0x03); // 8 bits, no parity, one stop bit (8N1)
outportb(UART_PORT + 2, 0xC7); // Enable FIFO, clear them, with 14-byte threshold
outportb(UART_PORT + 4, 0x0B); // IRQs enabled, RTS/DSR set
}
char uart_getchar()
{
while (!received());
return inportb(UART_PORT);
}
void uart_putchar(char chr)
{
while (!is_transmit_empty());
outportb(UART_PORT, chr);
}
void uart_puts(const char* str)
{
while (*str) uart_putchar(*(str++));
}
int uart_gets(char* str, size_t len)
{
size_t ret = 0;
while (ret < len)
str[ret] = uart_getchar();
return ret;
}

View file

@ -1,4 +1,4 @@
C_source := page$(BIT).c svm.c
C_source := page.c svm.c
MODULE := arch_x86_mm
include $(TOPDIR)/Makefile.inc

722
arch/x86/mm/page.c Normal file
View file

@ -0,0 +1,722 @@
/*
* Copyright 2014 Steffen Vogel, Chair for Operating Systems,
* RWTH Aachen University
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* This file is part of MetalSVM.
*/
#include <metalsvm/stddef.h>
#include <metalsvm/stdio.h>
#include <metalsvm/stdlib.h>
#include <metalsvm/memory.h>
#include <metalsvm/vma.h>
#include <metalsvm/string.h>
#include <metalsvm/spinlock.h>
#include <metalsvm/processor.h>
#include <metalsvm/tasks.h>
#include <metalsvm/errno.h>
#include <metalsvm/page.h>
#include <asm/page_helpers.h>
#include <asm/irq.h>
#include <asm/multiboot.h>
#include <asm/apic.h>
/**
* @author Steffen Vogel <steffen.vogel@rwth-aachen.de>
*/
/*
* Virtual Memory Layout of the standard configuration
* (1 GB kernel space)
*
* 0x0000000000000000 - 0x00000000000FFFFF: reserved for IO devices (16MB)
* 0x0000000000100000 - 0x00000000008C2000: Kernel (~8MB)
* 0x00000000008c3000 - 0x0000000000973000: Init Ramdisk (~2MB)
*
* 0x0001000000000000 - 0xffff000000000000: Memory hole (48 bit VAS limitation)
*
* 0xFFFFFE8000000000 - 0xFFFFFEFFFFFFFFFF: Page map dest for copy_page_map() (512GB)
* 0xFFFFFF0000000000 - 0xFFFFFF7FFFFFFFFF: Page map source for copy_page_map() (512GB)
* 0xFFFFFF8000000000 - 0xFFFFFFFFFFFFFFFF: Self-referenced page maps of the current task (512GB)
*/
/// Boot task's page map (setup by entryXX.asm)
extern page_entry_t boot_pml4[PAGE_MAP_ENTRIES];
/// Kernel space page map lock
static spinlock_t kslock = SPINLOCK_INIT;
/// Mapping of self referenced page map (at the end of the VAS)
// TODO: find a more generic initialization
#ifdef CONFIG_X86_32
static page_entry_t* const current_map = (page_entry_t*) (1 * PAGE_MAP_PGD);
static page_entry_t* const src_map = (page_entry_t*) (2 * PAGE_MAP_PGD);
static page_entry_t* const dest_map = (page_entry_t*) (3 * PAGE_MAP_PGD);
#elif defined(CONFIG_X86_64)
static page_entry_t* const current_map = (page_entry_t*) (1 * PAGE_MAP_PML4);
static page_entry_t* const src_map = (page_entry_t*) (2 * PAGE_MAP_PML4);
static page_entry_t* const dest_map = (page_entry_t*) (3 * PAGE_MAP_PML4);
#endif
#ifdef CONFIG_X86_32
static page_entry_t boot_pgd[PAGE_MAP_ENTRIES];
#endif
page_entry_t* get_boot_page_map(void)
{
#ifdef CONFIG_X86_32
return boot_pgd;
#elif defined(CONFIG_X86_64)
return boot_pml4;
#endif
}
void page_dump(size_t mask)
{
task_t* task = per_core(current_task);
mask |= PG_PRESENT;
size_t flags = 0;
size_t start = 0;
size_t end;
void print(size_t start, size_t end, size_t flags) {
size_t size = end - start;
kprintf("%#018lx-%#018lx %#14x %c%c%c%c%c%c\n", start, end, size,
(mask & flags & PG_XD) ? '-' : 'x',
(mask & flags & PG_GLOBAL) ? 'g' : '-',
(mask & flags & PG_DIRTY) ? 'd' : '-',
(mask & flags & PG_ACCESSED) ? 'a' : '-',
(mask & flags & PG_USER) ? 'u' : '-',
(mask & flags & PG_RW) ? 'w' : '-'
);
}
void traverse(int level, page_entry_t* entry) {
page_entry_t* stop = entry + PAGE_MAP_ENTRIES;
for (; entry != stop; entry++) {
if (*entry & PG_PRESENT) {
if (level && !(*entry & PG_PSE)) // do "pre-order" traversal
// TODO: handle "inheritance" of page table flags (see get_page_flags())
traverse(level-1, get_child_entry(entry));
else {
if (!flags) {
flags = *entry & ~PAGE_MASK & mask;
start = entry_to_virt(entry, level);
}
else if (flags != (*entry & ~PAGE_MASK & mask)) {
end = entry_to_virt(entry, level);
print(start, end, flags);
flags = *entry & ~PAGE_MASK & mask;
start = end;
}
}
}
else if (flags) {
end = entry_to_virt(entry, level);
print(start, end, flags);
flags = 0;
}
}
}
// lock tables
spinlock_lock(&kslock);
spinlock_irqsave_lock(&task->page_lock);
kprintf("%-18s-%18s %14s %-6s\n", "start", "end", "size", "flags"); // header
traverse(PAGE_MAP_LEVELS-1, current_map);
if (flags) // workaround to print last mapping
print(start, 0L, flags);
// unlock tables
spinlock_irqsave_unlock(&task->page_lock);
spinlock_unlock(&kslock);
}
void page_stats(int reset)
{
task_t* task = per_core(current_task);
int i, stats[13] = { 0 };
const char* labels[] = { [0] = "present", "writable", "user accessable", "write through", "cache disabled", // IA-32 "legacy" bits
"accessed", "dirty", "huge pages", "global", "svm", "svm lazy", "svm init",
[12] = "exec disabled" // IA-32e / PAE bits
};
void traverse(int level, page_entry_t* entry) {
page_entry_t* stop = entry + PAGE_MAP_ENTRIES;
for (; entry != stop; entry++) {
if (*entry & PG_PRESENT) {
if (level && !(*entry & PG_PSE))
traverse(level-1, get_child_entry(entry));
else {
// increment stat counters
int i;
for (i=0; i<12; i++) { // IA-32 "legacy" bits
if (*entry & (1 << i))
stats[i]++;
}
#ifdef CONFIG_X86_64
for (i=0; i<1; i++) { // IA-32e / PAE bits
if (*entry & (1 << (63-i)))
stats[i+PAGE_BITS]++;
}
#endif
if (reset) { // reset accessed and dirty bits
*entry &= ~(PG_ACCESSED|PG_DIRTY);
tlb_flush_one_page(entry_to_virt(entry, level)); // see IA32 Vol3 4.8
}
}
}
}
}
// lock tables
spinlock_lock(&kslock);
spinlock_irqsave_lock(&task->page_lock);
traverse(PAGE_MAP_LEVELS-1, current_map);
// unlock tables
spinlock_irqsave_unlock(&task->page_lock);
spinlock_unlock(&kslock);
kprintf("total pages:\n");
for (i=0; i<13; i++)
kprintf(" - %s:%*lu\n", labels[i], 25-strlen(labels[i]), stats[i]);
}
int copy_page_map(task_t* new_task, int copy)
{
task_t* cur_task = per_core(current_task);
int traverse(int level, page_entry_t* src, page_entry_t* dest) {
page_entry_t* stop = src + PAGE_MAP_ENTRIES;
for (; src != stop; src++, dest++) {
if (*src & PG_PRESENT) {
if (*src & PG_USER) { // deep copy page frame
size_t phyaddr = get_page();
if (BUILTIN_EXPECT(!phyaddr, 0))
return -ENOMEM;
atomic_int32_inc(&cur_task->user_usage);
copy_page(phyaddr, *src & PAGE_MASK);
*dest = phyaddr | (*src & ~PAGE_MASK);
// do "pre-order" traversal
if (level && !(*src & PG_PSE)) {
int ret = traverse(level-1, get_child_entry(src),
get_child_entry(dest));
if (ret < 0)
return ret;
}
}
else // shallow copy kernel table
*dest = *src;
}
else // table does not exists
*dest = 0;
}
return 0;
}
page_entry_t* src_virt = (copy) ? cur_task->page_map : get_boot_page_map();
page_entry_t* dest_virt = (page_entry_t*) palloc(PAGE_SIZE, MAP_KERNEL_SPACE);
if (BUILTIN_EXPECT(!dest_virt, 0))
return -ENOMEM;
size_t src_phys = virt_to_phys((size_t) src_virt);
size_t dest_phys = virt_to_phys((size_t) dest_virt);
// lock tables
spinlock_lock(&kslock);
spinlock_irqsave_lock(&cur_task->page_lock);
kprintf("copy_page_map: copy = %u, src = %p (%p, %p), dest = %p (%p, %p)\n",
copy, src_virt, src_phys, src_map, dest_virt, dest_phys, dest_map); // TODO: remove
// temporary map src and dest tables
current_map[PAGE_MAP_ENTRIES-2] = (src_phys & PAGE_MASK) | (PG_TABLE & ~PG_RW); // source is read-only!
current_map[PAGE_MAP_ENTRIES-3] = (dest_phys & PAGE_MASK) | PG_TABLE;
//tlb_flush(); // ouch :(
int ret = traverse(PAGE_MAP_LEVELS-1, src_map, dest_map);
// setup self reference for new table
dest_map[PAGE_MAP_ENTRIES-1] = dest_phys | PG_TABLE;
// unmap temporary tables
current_map[PAGE_MAP_ENTRIES-2] = 0;
current_map[PAGE_MAP_ENTRIES-3] = 0;
dest_map[PAGE_MAP_ENTRIES-2] = 0;
dest_map[PAGE_MAP_ENTRIES-3] = 0;
tlb_flush(); // ouch :(
// unlock tables
spinlock_irqsave_unlock(&cur_task->page_lock);
spinlock_unlock(&kslock);
new_task->page_map = dest_virt;
return ret;
}
int drop_page_map(void)
{
task_t* task = per_core(current_task);
void traverse(int level, page_entry_t* entry) {
page_entry_t* stop = entry + PAGE_MAP_ENTRIES;
for (; entry != stop; entry++) {
if (*entry & PG_PRESENT) {
// do "post-order" traversal
if (level && !(*entry & PG_PSE))
traverse(level-1, get_child_entry(entry));
if (*entry & PG_USER) {
kprintf("drop_page_map: entry = %p. level = %u\n", entry, level);
if (put_page(*entry & PAGE_MASK))
atomic_int32_dec(&task->user_usage);
}
}
}
}
kprintf("drop_page_map: task = %u\n", task->id); // TODO: remove
// check assertions
if (BUILTIN_EXPECT(task->page_map == get_boot_page_map(), 0))
return -EINVAL;
if (BUILTIN_EXPECT(!task || !task->page_map, 0))
return -EINVAL;
// lock tables
spinlock_irqsave_lock(&task->page_lock);
kprintf("user_usage: %u (task = %u)\n", atomic_int32_read(&task->user_usage), task->id);
traverse(PAGE_MAP_LEVELS-1, current_map);
put_page((size_t) task->page_map);
// we replace the page table
task->page_map = get_boot_page_map();
tlb_flush();
// unlock tables
spinlock_irqsave_unlock(&task->page_lock);
return 0;
}
int set_page_flags(size_t viraddr, uint32_t npages, int flags)
{
task_t* task = per_core(current_task);
page_entry_t* first[PAGE_MAP_LEVELS];
page_entry_t* last[PAGE_MAP_LEVELS];
size_t bits = page_bits(flags);
size_t start = viraddr;
size_t end = start + npages * PAGE_SIZE;
void traverse(int level, page_entry_t* entry) {
page_entry_t* stop = entry + PAGE_MAP_ENTRIES;
for (; entry != stop; entry++) {
if (entry < last[level] && entry >= first[level]) {
if ((*entry & PG_PRESENT) && !(*entry & PG_PSE)) {
if (level) {
if (flags & MAP_USER_SPACE)
*entry |= PG_USER;
#ifdef CONFIG_X86_64
if (flags & MAP_CODE)
*entry &= ~PG_XD;
#endif
// do "pre-order" traversal
traverse(level-1, get_child_entry(entry));
}
else
*entry = (*entry & PAGE_MASK) | bits;
tlb_flush_one_page(entry_to_virt(entry, level));
}
}
}
}
// check assertions
if (BUILTIN_EXPECT(!task || !task->page_map, 0))
return 0;
// calc page tree boundaries
int i;
for (i=0; i<PAGE_MAP_LEVELS; i++) {
first[i] = virt_to_entry(start, i);
last[i] = virt_to_entry(end - 1, i) + 1; // exclusive
}
// lock tables
if (start < KERNEL_SPACE)
spinlock_lock(&kslock);
if (end >= KERNEL_SPACE)
spinlock_irqsave_lock(&task->page_lock);
traverse(PAGE_MAP_LEVELS-1, current_map);
// unlock tables
if (start < KERNEL_SPACE)
spinlock_unlock(&kslock);
if (end >= KERNEL_SPACE)
spinlock_irqsave_unlock(&task->page_lock);
return 0;
}
size_t map_region(size_t viraddr, size_t phyaddr, uint32_t npages, uint32_t flags)
{
task_t* task = per_core(current_task);
page_entry_t* first[PAGE_MAP_LEVELS];
page_entry_t* last[PAGE_MAP_LEVELS];
// TODO: this behaviour should be deprecated
if (!viraddr) {
int vma_flags = VMA_HEAP;
if (flags & MAP_USER_SPACE)
vma_flags |= VMA_USER;
viraddr = vma_alloc(npages * PAGE_SIZE, vma_flags);
}
size_t bits = page_bits(flags);
size_t start = viraddr;
size_t end = start + npages * PAGE_SIZE;
int traverse(int level, page_entry_t* entry) {
page_entry_t* stop = entry + PAGE_MAP_ENTRIES;
for (; entry != stop; entry++) {
if (entry < last[level] && entry >= first[level]) {
if (level) { // PGD, PDPT, PML4..
if (*entry & PG_PRESENT) {
if ((flags & MAP_USER_SPACE) && !(*entry & PG_USER)) {
/* We are altering entries which cover
* the kernel. So before changing them we need to
* make a private copy for the task */
size_t phyaddr = get_page();
if (BUILTIN_EXPECT(!phyaddr, 0))
return -ENOMEM;
atomic_int32_inc(&task->user_usage);
copy_page(phyaddr, *entry & PAGE_MASK);
*entry = phyaddr | (*entry & ~PAGE_MASK);
*entry &= ~PG_GLOBAL;
*entry |= PG_USER;
/* We just need to flush the table itself.
* TLB entries for the kernel remain valid
* because we've not changed them. */
tlb_flush_one_page(entry_to_virt(entry, 0));
}
}
else {
/* Theres no page map table available
* which covers the region. Therefore we will create a
* new table. */
size_t phyaddr = get_page();
if (BUILTIN_EXPECT(!phyaddr, 0))
return -ENOMEM;
if (flags & MAP_USER_SPACE)
atomic_int32_inc(&task->user_usage);
*entry = phyaddr | bits;
memset(get_child_entry(entry), 0x00, PAGE_SIZE); // fill with zeros
}
// do "pre-order" traversal if no hugepage
if (!(*entry & PG_PSE)) {
int ret = traverse(level-1, get_child_entry(entry));
if (ret < 0)
return ret;
}
}
else { // PGT
if ((*entry & PG_PRESENT) && !(flags & MAP_REMAP))
return -EINVAL;
*entry = phyaddr | bits;
if (flags & MAP_USER_SPACE)
atomic_int32_inc(&task->user_usage);
if (flags & MAP_REMAP)
tlb_flush_one_page(entry_to_virt(entry, level));
phyaddr += PAGE_SIZE;
}
}
}
return 0;
}
kprintf("map_region: map %u pages from %#lx to %#lx with flags: %#x\n", npages, viraddr, phyaddr, flags); // TODO: remove
if (BUILTIN_EXPECT(!task || !task->page_map, 0))
return 0;
// calc page tree boundaries
int i;
for (i=0; i<PAGE_MAP_LEVELS; i++) {
first[i] = virt_to_entry(start, i);
last[i] = virt_to_entry(end - 1, i) + 1; // exclusive
}
// lock tables
if (start < KERNEL_SPACE)
spinlock_lock(&kslock);
if (end >= KERNEL_SPACE)
spinlock_irqsave_lock(&task->page_lock);
int ret = traverse(PAGE_MAP_LEVELS-1, current_map);
// unlock tables
if (start < KERNEL_SPACE)
spinlock_unlock(&kslock);
if (end >= KERNEL_SPACE)
spinlock_irqsave_unlock(&task->page_lock);
return (ret) ? 0 : viraddr;
}
int unmap_region(size_t viraddr, uint32_t npages)
{
task_t* task = per_core(current_task);
page_entry_t* first[PAGE_MAP_LEVELS];
page_entry_t* last[PAGE_MAP_LEVELS];
size_t start = viraddr;
size_t end = start + npages * PAGE_SIZE;
kprintf("unmap_region: unmap %u pages from %#lx\n", npages, viraddr); // TODO: remove
/** @return number of page table entries which a present */
int traverse(int level, page_entry_t* entry) {
int used = 0;
page_entry_t* stop = entry + PAGE_MAP_ENTRIES;
for (; entry != stop; entry++) {
if (entry < last[level] && entry >= first[level]) {
if (level) { // PGD, PDPT, PML4
if ((*entry & PG_PRESENT) && !(*entry & PG_PSE)) {
// do "post-order" traversal if table is present and no hugepage
if (traverse(level-1, get_child_entry(entry)))
used++;
else { // child table is empty => delete it
*entry &= ~PG_PRESENT;
tlb_flush_one_page(entry_to_virt(entry, 0));
if (*entry & PG_USER) {
if (put_page(*entry & PAGE_MASK))
atomic_int32_dec(&task->user_usage);
}
}
}
}
else { // PGT
*entry &= ~PG_PRESENT;
tlb_flush_one_page(entry_to_virt(entry, level));
if (*entry & PG_USER)
atomic_int32_dec(&task->user_usage);
}
}
else {
if (*entry & PG_PRESENT)
used++;
}
}
return used;
}
if (BUILTIN_EXPECT(!task || !task->page_map, 0))
return 0;
// calc page tree boundaries
int i;
for (i=0; i<PAGE_MAP_LEVELS; i++) {
first[i] = virt_to_entry(start, i);
last[i] = virt_to_entry(end - 1, i) + 1; // exclusive
}
// lock tables
if (start < KERNEL_SPACE)
spinlock_lock(&kslock);
if (end >= KERNEL_SPACE)
spinlock_irqsave_lock(&task->page_lock);
traverse(PAGE_MAP_LEVELS-1, current_map);
// unlock tables
if (start < KERNEL_SPACE)
spinlock_unlock(&kslock);
if (end > KERNEL_SPACE)
spinlock_irqsave_unlock(&task->page_lock);
return 0;
}
static void pagefault_handler(struct state *s)
{
task_t* task = per_core(current_task);
size_t viraddr = read_cr2();
// on demand userspace heap mapping
if ((task->heap) && (viraddr >= task->heap->start) && (viraddr < task->heap->end)) {
viraddr &= PAGE_MASK;
size_t phyaddr = get_page();
if (BUILTIN_EXPECT(!phyaddr, 0)) {
kprintf("out of memory: task = %u\n", task->id);
goto default_handler;
}
viraddr = map_region(viraddr, phyaddr, 1, MAP_USER_SPACE);
if (BUILTIN_EXPECT(!viraddr, 0)) {
kprintf("map_region: could not map %#lx to %#lx, task = %u\n", viraddr, phyaddr, task->id);
put_page(phyaddr);
goto default_handler;
}
memset((void*) viraddr, 0x00, PAGE_SIZE); // fill with zeros
return;
}
default_handler:
kprintf("Page Fault Exception (%d) at cs:ip = %#x:%#lx, core = %u, task = %u, addr = %#lx, error = %#x [ %s %s %s %s %s ]\n",
s->int_no, s->cs,
#ifdef CONFIG_X86_32
s->eip,
#elif defined(CONFIG_X86_64)
s->rip,
#endif
CORE_ID, task->id, viraddr, s->error,
(s->error & 0x4) ? "user" : "supervisor",
(s->error & 0x10) ? "instruction" : "data",
(s->error & 0x2) ? "write" : ((s->error & 0x10) ? "fetch" : "read"),
(s->error & 0x1) ? "protection" : "not present",
(s->error & 0x8) ? "reserved bit" : "\b");
// TODO: move this to something like print_registers()
#ifdef CONFIG_X86_32
kprintf("Register state: eflags = %#lx, eax = %#lx, ebx = %#lx, ecx = %#lx, edx = %#lx, edi = %#lx, esi = %#lx, ebp = %#llx, esp = %#lx\n",
s->eflags, s->eax, s->ebx, s->ecx, s->edx, s->edi, s->esi, s->ebp, s->esp);
#elif defined(CONFIG_X86_64)
kprintf("Register state: rflags = %#lx, rax = %#lx, rbx = %#lx, rcx = %#lx, rdx = %#lx, rdi = %#lx, rsi = %#lx, rbp = %#llx, rsp = %#lx\n",
s->rflags, s->rax, s->rbx, s->rcx, s->rdx, s->rdi, s->rsi, s->rbp, s->rsp);
#endif
irq_enable();
abort();
}
int arch_paging_init(void)
{
uint32_t i, npages;
// replace default pagefault handler
irq_uninstall_handler(14);
irq_install_handler(14, pagefault_handler);
// setup recursive paging
page_entry_t* boot_map = get_boot_page_map();
boot_map[PAGE_MAP_ENTRIES-1] = (size_t) boot_map | PG_TABLE;
/*
* In longmode the kernel is already maped into the kernel space (see entry64.asm)
* this includes .data, .bss, .text, VGA, the multiboot & multiprocessing (APIC) structures
*/
#if MAX_CORES > 1
// reserve page for smp boot code
if (!map_region(SMP_SETUP_ADDR, SMP_SETUP_ADDR, 1, MAP_NO_CACHE | MAP_REMAP)) {
kputs("could not reserve page for smp boot code\n");
return -ENOMEM;
}
#endif
#ifdef CONFIG_MULTIBOOT
#if 0
// map reserved memory regions into the kernel space
if (mb_info && (mb_info->flags & MULTIBOOT_INFO_MEM_MAP)) {
multiboot_memory_map_t* mmap = (multiboot_memory_map_t*) mb_info->mmap_addr;
multiboot_memory_map_t* mmap_end = (void*) ((size_t) mb_info->mmap_addr + mb_info->mmap_length);
while (mmap < mmap_end) {
if (mmap->type != MULTIBOOT_MEMORY_AVAILABLE) {
npages = mmap->len / PAGE_SIZE;
if ((mmap->addr+mmap->len) % PAGE_SIZE)
npages++;
map_region(mmap->addr, mmap->addr, npages, MAP_NO_CACHE | MAP_REMAP);
}
mmap++;
}
}
#endif
/*
* Modules like the init ram disk are already loaded.
* Therefore, we map these modules into the kernel space.
*/
if (mb_info && (mb_info->flags & MULTIBOOT_INFO_MODS)) {
multiboot_module_t* mmodule = (multiboot_module_t*) ((size_t) mb_info->mods_addr);
npages = PAGE_FLOOR(mb_info->mods_count*sizeof(multiboot_module_t)) >> PAGE_BITS;
map_region((size_t) mmodule, (size_t) mmodule, npages, MAP_REMAP);
for(i=0; i<mb_info->mods_count; i++, mmodule++) {
// map physical address to the same virtual address
npages = PAGE_FLOOR(mmodule->mod_end - mmodule->mod_start) >> PAGE_BITS;
kprintf("Map module %s at %#x (%u pages)\n", (char*)(size_t) mmodule->cmdline, mmodule->mod_start, npages);
map_region((size_t) (mmodule->mod_start), (size_t) (mmodule->mod_start), npages, MAP_REMAP);
}
}
#endif
// we turned on paging => now, we are able to register our task
register_task();
// APIC registers into the kernel address space
map_apic();
return 0;
}

View file

@ -1,836 +0,0 @@
/*
* Copyright 2010 Stefan Lankes, Chair for Operating Systems,
* RWTH Aachen University
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* This file is part of MetalSVM.
*/
#include <metalsvm/stddef.h>
#include <metalsvm/stdio.h>
#include <metalsvm/stdlib.h>
#include <metalsvm/mmu.h>
#include <metalsvm/vma.h>
#include <metalsvm/string.h>
#include <metalsvm/page.h>
#include <metalsvm/spinlock.h>
#include <metalsvm/processor.h>
#include <metalsvm/tasks.h>
#include <metalsvm/errno.h>
#include <asm/irq.h>
#include <asm/multiboot.h>
#include <asm/apic.h>
#ifdef CONFIG_ROCKCREEK
#include <asm/RCCE_lib.h>
#include <asm/SCC_API.h>
#include <asm/svm.h>
#include <asm/icc.h>
#endif
/*
* Virtual Memory Layout of the standard configuration
* (1 GB kernel space)
*
* 0x00000000 - 0x000FFFFF: reserved for IO devices (16MB)
* 0x00100000 - 0x0DEADFFF: Kernel (size depends on the configuration) (221MB)
* 0x0DEAE000 - 0x3FFFEFFF: Kernel heap (801MB)
* 0x3FFFF000 - 0x3FFFFFFF: Page Tables are mapped in this region (4KB)
* (The last 256 entries belongs to kernel space)
*/
/*
* Note that linker symbols are not variables, they have no memory allocated for
* maintaining a value, rather their address is their value.
*/
extern const void kernel_start;
extern const void kernel_end;
// boot task's page directory and page directory lock
static page_dir_t boot_pgd = {{[0 ... PGT_ENTRIES-1] = 0}};
static page_table_t pgt_container = {{[0 ... PGT_ENTRIES-1] = 0}};
static page_table_t boot_pgt[KERNEL_SPACE/(1024*PAGE_SIZE)];
static spinlock_t kslock = SPINLOCK_INIT;
static int paging_enabled = 0;
page_dir_t* get_boot_pgd(void)
{
return &boot_pgd;
}
/*
* TODO: We create a full copy of the current task. Copy-On-Access will be the better solution.
*
* No PGD locking is needed because onls create_pgd use this function and holds already the
* PGD lock.
*/
inline static size_t copy_page_table(task_t* task, uint32_t pgd_index, page_table_t* pgt, int* counter)
{
uint32_t i;
page_table_t* new_pgt;
size_t phyaddr;
if (BUILTIN_EXPECT(!pgt, 0))
return 0;
new_pgt = kmalloc(sizeof(page_table_t));
if (!new_pgt)
return 0;
memset(new_pgt, 0x00, sizeof(page_table_t));
if (counter)
(*counter)++;
for(i=0; i<PGT_ENTRIES; i++) {
if (pgt->entries[i] & PAGE_MASK) {
if (!(pgt->entries[i] & PG_USER)) {
// Kernel page => copy only page entries
new_pgt->entries[i] = pgt->entries[i];
continue;
}
phyaddr = get_page();
if (!phyaddr)
continue;
if (counter)
(*counter)++;
copy_page_physical((void*)phyaddr, (void*) (pgt->entries[i] & PAGE_MASK));
new_pgt->entries[i] = phyaddr | (pgt->entries[i] & 0xFFF);
atomic_int32_inc(&task->user_usage);
}
}
phyaddr = virt_to_phys((size_t)new_pgt);
return phyaddr;
}
int create_pgd(task_t* task, int copy)
{
page_dir_t* pgd;
page_table_t* pgt;
page_table_t* pgt_container;
uint32_t i;
uint32_t index1, index2;
size_t viraddr, phyaddr;
int counter = 0;
task_t* curr_task = per_core(current_task);
if (BUILTIN_EXPECT(!paging_enabled, 0))
return -EINVAL;
// we already know the virtual address of the "page table container"
// (see file header)
pgt_container = (page_table_t*) ((KERNEL_SPACE - PAGE_SIZE) & PAGE_MASK);
// create new page directory for the new task
pgd = kmalloc(sizeof(page_dir_t));
if (!pgd)
return -ENOMEM;
memset(pgd, 0x00, sizeof(page_dir_t));
// create a new "page table container" for the new task
pgt = kmalloc(sizeof(page_table_t));
if (!pgt) {
kfree(pgd, sizeof(page_dir_t));
return -ENOMEM;
}
memset(pgt, 0x00, sizeof(page_table_t));
spinlock_lock(&kslock);
for(i=0; i<PGT_ENTRIES; i++) {
pgd->entries[i] = boot_pgd.entries[i];
// only kernel entries will be copied
if (pgd->entries[i] && !(pgd->entries[i] & PG_USER))
pgt->entries[i] = pgt_container->entries[i];
}
spinlock_unlock(&kslock);
// map page table container at the end of the kernel space
viraddr = (KERNEL_SPACE - PAGE_SIZE) & PAGE_MASK;
index1 = viraddr >> 22;
index2 = (viraddr >> 12) & 0x3FF;
// now, we create a self reference
pgd->entries[index1] = ((size_t) virt_to_phys((size_t) pgt) & PAGE_MASK)|KERN_TABLE;
pgt->entries[index2] = ((size_t) virt_to_phys((size_t) pgt) & PAGE_MASK)|KERN_PAGE;
task->pgd = pgd;
if (copy) {
spinlock_irqsave_lock(&curr_task->pgd_lock);
for (i=KERNEL_SPACE/(1024*PAGE_SIZE); i<1024; i++) {
if (!(curr_task->pgd->entries[i]))
continue;
if (!(curr_task->pgd->entries[i] & PG_USER))
continue;
phyaddr = copy_page_table(task, i, (page_table_t*) ((KERNEL_SPACE - 1024*PAGE_SIZE + i*PAGE_SIZE) & PAGE_MASK), &counter);
if (phyaddr) {
pgd->entries[i] = (phyaddr & PAGE_MASK) | (curr_task->pgd->entries[i] & 0xFFF);
pgt->entries[i] = (phyaddr & PAGE_MASK) | KERN_PAGE;
}
}
spinlock_irqsave_unlock(&curr_task->pgd_lock);
}
return counter;
}
/*
* drops all page frames and the PGD of a user task
*/
int drop_pgd(void)
{
page_dir_t* pgd = per_core(current_task)->pgd;
size_t phy_pgd = virt_to_phys((size_t) pgd);
task_t* task = per_core(current_task);
uint32_t i;
if (BUILTIN_EXPECT(pgd == &boot_pgd, 0))
return -EINVAL;
spinlock_irqsave_lock(&task->pgd_lock);
for(i=0; i<PGT_ENTRIES; i++) {
if (pgd->entries[i] & PG_USER) {
put_page(pgd->entries[i] & PAGE_MASK);
pgd->entries[i] = 0;
}
}
// freeing the page directory
put_page(phy_pgd);
task->pgd = NULL;
spinlock_irqsave_unlock(&task->pgd_lock);
return 0;
}
size_t virt_to_phys(size_t viraddr)
{
task_t* task = per_core(current_task);
uint32_t index1, index2;
page_table_t* pgt;
size_t ret = 0;
if (!paging_enabled)
return viraddr;
if (BUILTIN_EXPECT(!task || !task->pgd, 0))
return 0;
spinlock_irqsave_lock(&task->pgd_lock);
index1 = viraddr >> 22;
index2 = (viraddr >> 12) & 0x3FF;
if (!(task->pgd->entries[index1] & PAGE_MASK))
goto out;
pgt = (page_table_t*) ((KERNEL_SPACE - 1024*PAGE_SIZE + index1*PAGE_SIZE) & PAGE_MASK);
if (!pgt || !(pgt->entries[index2]))
goto out;
ret = pgt->entries[index2] & PAGE_MASK; // determine page frame
ret = ret | (viraddr & 0xFFF); // add page offset
out:
//kprintf("vir %p to phy %p\n", viraddr, ret);
spinlock_irqsave_unlock(&task->pgd_lock);
return ret;
}
size_t map_region(size_t viraddr, size_t phyaddr, uint32_t npages, uint32_t flags)
{
task_t* task = per_core(current_task);
page_table_t* pgt;
size_t index, i;
size_t ret;
if (BUILTIN_EXPECT(!task || !task->pgd, 0))
return 0;
if (BUILTIN_EXPECT(!paging_enabled && (viraddr != phyaddr), 0))
return 0;
if (flags & MAP_KERNEL_SPACE)
spinlock_lock(&kslock);
else
spinlock_irqsave_lock(&task->pgd_lock);
if (!viraddr) {
viraddr = vm_alloc(npages, flags);
if (BUILTIN_EXPECT(!viraddr, 0)) {
kputs("map_adress: found no valid virtual address\n");
ret = 0;
goto out;
}
}
ret = viraddr;
//kprintf("map %d pages from %p to %p\n", npages, phyaddr, ret);
for(i=0; i<npages; i++, viraddr+=PAGE_SIZE, phyaddr+=PAGE_SIZE) {
index = viraddr >> 22;
if (!(task->pgd->entries[index])) {
page_table_t* pgt_container;
pgt = (page_table_t*) get_pages(1);
if (BUILTIN_EXPECT(!pgt, 0)) {
kputs("map_address: out of memory\n");
ret = 0;
goto out;
}
// set the new page table into the directory
if (flags & MAP_USER_SPACE)
task->pgd->entries[index] = (uint32_t)pgt|USER_TABLE;
else
task->pgd->entries[index] = (uint32_t)pgt|KERN_TABLE;
// if paging is already enabled, we need to use the virtual address
if (paging_enabled)
// we already know the virtual address of the "page table container"
// (see file header)
pgt_container = (page_table_t*) ((KERNEL_SPACE - PAGE_SIZE) & PAGE_MASK);
else
pgt_container = (page_table_t*) (task->pgd->entries[(KERNEL_SPACE - PAGE_SIZE) >> 22] & PAGE_MASK);
if (BUILTIN_EXPECT(!pgt_container, 0)) {
kputs("map_address: internal error\n");
ret = 0;
goto out;
}
// map the new table into the address space of the kernel space
pgt_container->entries[index] = ((size_t) pgt)|KERN_PAGE;
// clear the page table
if (paging_enabled)
memset((void*) ((KERNEL_SPACE - 1024*PAGE_SIZE + index*PAGE_SIZE) & PAGE_MASK), 0x00, PAGE_SIZE);
else
memset(pgt, 0x00, PAGE_SIZE);
} else pgt = (page_table_t*) (task->pgd->entries[index] & PAGE_MASK);
/* convert physical address to virtual */
if (paging_enabled)
pgt = (page_table_t*) ((KERNEL_SPACE - 1024*PAGE_SIZE + index*PAGE_SIZE) & PAGE_MASK);
index = (viraddr >> 12) & 0x3FF;
if (pgt->entries[index] && !(flags & MAP_REMAP)) {
kprintf("0x%x is already mapped\n", viraddr);
ret = 0;
goto out;
}
if (flags & MAP_USER_SPACE)
pgt->entries[index] = USER_PAGE|(phyaddr & PAGE_MASK);
else
pgt->entries[index] = KERN_PAGE|(phyaddr & PAGE_MASK);
if (flags & MAP_NO_CACHE)
pgt->entries[index] |= PG_PCD;
#ifdef CONFIG_ROCKCREEK
if (flags & MAP_MPE)
pgt->entries[index] |= PG_MPE;
#endif
if (flags & MAP_SVM_STRONG)
#ifndef SVM_WB
pgt->entries[index] |= PG_SVM_STRONG|PG_PWT;
#else
pgt->entries[index] |= PG_SVM;
#endif
if (flags & MAP_SVM_LAZYRELEASE)
pgt->entries[index] |= PG_SVM_LAZYRELEASE|PG_PWT;
if (flags & MAP_SVM_INIT)
pgt->entries[index] |= PG_SVM_INIT;
if (flags & MAP_NO_ACCESS)
pgt->entries[index] &= ~PG_PRESENT;
if (flags & MAP_WT)
pgt->entries[index] |= PG_PWT;
if (flags & MAP_USER_SPACE)
atomic_int32_inc(&task->user_usage);
tlb_flush_one_page(viraddr);
}
out:
if (flags & MAP_KERNEL_SPACE)
spinlock_unlock(&kslock);
else
spinlock_irqsave_unlock(&task->pgd_lock);
return ret;
}
int change_page_permissions(size_t start, size_t end, uint32_t flags)
{
uint32_t index1, index2, newflags;
size_t viraddr = start & 0xFFFFF000;
size_t phyaddr;
page_table_t* pgt;
page_dir_t* pgd;
task_t* task = per_core(current_task);
if (BUILTIN_EXPECT(!paging_enabled, 0))
return -EINVAL;
pgd = per_core(current_task)->pgd;
if (BUILTIN_EXPECT(!pgd, 0))
return -EINVAL;
spinlock_irqsave_lock(&task->pgd_lock);
while (viraddr < end)
{
index1 = viraddr >> 22;
index2 = (viraddr >> 12) & 0x3FF;
while ((viraddr < end) && (index2 < 1024)) {
pgt = (page_table_t*) (page_table_t*) ((KERNEL_SPACE - 1024*PAGE_SIZE + index1*PAGE_SIZE) & PAGE_MASK);
if (pgt && pgt->entries[index2]) {
phyaddr = pgt->entries[index2] & PAGE_MASK;
newflags = pgt->entries[index2] & 0xFFF; // get old flags
if (!(newflags & PG_SVM_INIT)) {
if ((newflags & PG_SVM_STRONG) && !(newflags & PG_PRESENT) && (flags & (VMA_READ|VMA_WRITE) && !(flags & VMA_NOACCESS)))
newflags |= PG_PRESENT;
else if ((newflags & PG_SVM_STRONG) && (newflags & PG_PRESENT) && (flags & VMA_NOACCESS))
newflags &= ~PG_PRESENT;
}
// update flags
if (!(flags & VMA_WRITE)) {
newflags &= ~PG_RW;
#ifdef CONFIG_ROCKCREEK
if (newflags & (PG_SVM_STRONG|PG_SVM_LAZYRELEASE))
newflags &= ~PG_MPE;
#endif
} else {
newflags |= PG_RW;
#ifdef CONFIG_ROCKCREEK
if (newflags & (PG_SVM_STRONG|PG_SVM_LAZYRELEASE))
newflags |= PG_MPE;
#endif
}
pgt->entries[index2] = (newflags & 0xFFF) | (phyaddr & PAGE_MASK);
tlb_flush_one_page(viraddr);
}
index2++;
viraddr += PAGE_SIZE;
}
}
spinlock_irqsave_unlock(&task->pgd_lock);
return 0;
}
/*
* Use the first fit algorithm to find a valid address range
*
* TODO: O(n) => bad performance, we need a better approach
*/
size_t vm_alloc(uint32_t npages, uint32_t flags)
{
task_t* task = per_core(current_task);
uint32_t index1, index2, j;
size_t viraddr, i, ret = 0;
size_t start, end;
page_table_t* pgt;
if (BUILTIN_EXPECT(!task || !task->pgd || !paging_enabled, 0))
return 0;
if (flags & MAP_KERNEL_SPACE) {
start = (((size_t) &kernel_end) + PAGE_SIZE) & PAGE_MASK;
end = (KERNEL_SPACE - 2*PAGE_SIZE) & PAGE_MASK; // we need 1 PAGE for our PGTs
} else {
start = KERNEL_SPACE & PAGE_MASK;
end = PAGE_MASK;
}
if (BUILTIN_EXPECT(!npages, 0))
return 0;
if (flags & MAP_KERNEL_SPACE)
spinlock_lock(&kslock);
else
spinlock_irqsave_lock(&task->pgd_lock);
viraddr = i = start;
j = 0;
do {
index1 = i >> 22;
index2 = (i >> 12) & 0x3FF;
pgt = (page_table_t*) ((KERNEL_SPACE - 1024*PAGE_SIZE + index1*PAGE_SIZE) & PAGE_MASK);
if (!pgt || !(pgt->entries[index2])) {
i+=PAGE_SIZE;
j++;
} else {
// restart search
j = 0;
viraddr = i + PAGE_SIZE;
i = i + PAGE_SIZE;
}
} while((j < npages) && (i<=end));
if ((j >= npages) && (viraddr < end))
ret = viraddr;
if (flags & MAP_KERNEL_SPACE)
spinlock_unlock(&kslock);
else
spinlock_irqsave_unlock(&task->pgd_lock);
return ret;
}
int unmap_region(size_t viraddr, uint32_t npages)
{
task_t* task = per_core(current_task);
uint32_t i;
uint32_t index1, index2;
page_table_t* pgt;
if (BUILTIN_EXPECT(!task || !task->pgd || !paging_enabled, 0))
return -EINVAL;
if (viraddr <= KERNEL_SPACE)
spinlock_lock(&kslock);
else
spinlock_irqsave_lock(&task->pgd_lock);
for(i=0; i<npages; i++, viraddr+=PAGE_SIZE)
{
index1 = viraddr >> 22;
index2 = (viraddr >> 12) & 0x3FF;
pgt = (page_table_t*) ((KERNEL_SPACE - 1024*PAGE_SIZE + index1*PAGE_SIZE) & PAGE_MASK);
if (!pgt)
continue;
pgt->entries[index2] &= ~PG_PRESENT;
if (viraddr > KERNEL_SPACE)
atomic_int32_dec(&task->user_usage);
tlb_flush_one_page(viraddr);
}
if (viraddr <= KERNEL_SPACE)
spinlock_unlock(&kslock);
else
spinlock_irqsave_unlock(&task->pgd_lock);
return 0;
}
int vm_free(size_t viraddr, uint32_t npages)
{
task_t* task = per_core(current_task);
uint32_t i;
uint32_t index1, index2;
page_table_t* pgt;
if (BUILTIN_EXPECT(!task || !task->pgd || !paging_enabled, 0))
return -EINVAL;
if (viraddr <= KERNEL_SPACE)
spinlock_lock(&kslock);
else
spinlock_irqsave_lock(&task->pgd_lock);
for(i=0; i<npages; i++, viraddr+=PAGE_SIZE)
{
index1 = viraddr >> 22;
index2 = (viraddr >> 12) & 0x3FF;
pgt = (page_table_t*) ((KERNEL_SPACE - 1024*PAGE_SIZE + index1*PAGE_SIZE) & PAGE_MASK);
if (!pgt)
continue;
pgt->entries[index2] = 0;
tlb_flush_one_page(viraddr);
}
if (viraddr <= KERNEL_SPACE)
spinlock_unlock(&kslock);
else
spinlock_irqsave_unlock(&task->pgd_lock);
return 0;
}
int print_paging_tree(size_t viraddr)
{
task_t* task = per_core(current_task);
uint32_t index1, index2;
page_dir_t* pgd = NULL;
page_table_t* pgt = NULL;
if (BUILTIN_EXPECT(!viraddr, 0))
return -EINVAL;
index1 = viraddr >> 22;
index2 = (viraddr >> 12) & 0x3FF;
spinlock_irqsave_lock(&task->pgd_lock);
kprintf("Paging dump of address 0x%x\n", viraddr);
pgd = task->pgd;
kprintf("\tPage directory entry %u: ", index1);
if (pgd) {
kprintf("0x%0x\n", pgd->entries[index1]);
pgt = (page_table_t*) (pgd->entries[index1] & PAGE_MASK);
} else
kputs("invalid page directory\n");
/* convert physical address to virtual */
if (paging_enabled && pgt)
pgt = (page_table_t*) (KERNEL_SPACE - 1024*PAGE_SIZE + index1*PAGE_SIZE);
kprintf("\tPage table entry %u: ", index2);
if (pgt)
kprintf("0x%x\n", pgt->entries[index2]);
else
kputs("invalid page table\n");
spinlock_irqsave_unlock(&task->pgd_lock);
return 0;
}
static void pagefault_handler(struct state *s)
{
task_t* task = per_core(current_task);
page_dir_t* pgd = task->pgd;
page_table_t* pgt = NULL;
size_t viraddr = read_cr2();
size_t phyaddr;
#ifdef CONFIG_ROCKCREEK
uint32_t index1, index2;
#endif
if ((viraddr >= task->start_heap) && (viraddr <= task->end_heap) && (viraddr > KERNEL_SPACE)) {
viraddr = viraddr & PAGE_MASK;
phyaddr = get_page();
if (BUILTIN_EXPECT(!phyaddr, 0))
goto default_handler;
if (map_region(viraddr, phyaddr, 1, MAP_USER_SPACE) == viraddr) {
memset((void*) viraddr, 0x00, PAGE_SIZE);
return;
}
kprintf("Could not map 0x%x at 0x%x\n", phyaddr, viraddr);
put_page(phyaddr);
}
#ifdef CONFIG_ROCKCREEK
// does our SVM system need to handle this page fault?
index1 = viraddr >> 22;
index2 = (viraddr >> 12) & 0x3FF;
if (!pgd || !(pgd->entries[index1] & PAGE_MASK))
goto default_handler;
pgt = (page_table_t*) ((KERNEL_SPACE - 1024*PAGE_SIZE + index1*PAGE_SIZE) & PAGE_MASK);
if (!pgt || !(pgt->entries[index2]))
goto default_handler;
if (pgt->entries[index2] & PG_SVM_INIT) {
if (BUILTIN_EXPECT(!svm_alloc_page(viraddr, pgt), 1))
return;
else
goto default_handler;
}
if (pgt->entries[index2] & PG_SVM_STRONG)
if (BUILTIN_EXPECT(!svm_access_request(viraddr), 1))
return;
#endif
default_handler:
kprintf("PAGE FAULT: Task %u got page fault at %p (irq %d, cs:eip 0x%x:0x%x)\n", task->id, viraddr, s->int_no, s->cs, s->eip);
kprintf("Register state: eax = 0x%x, ebx = 0x%x, ecx = 0x%x, edx = 0x%x, edi = 0x%x, esi = 0x%x, ebp = 0x%x, esp = 0x%xi, ds = 0x%x, es = 0x%x\n",
s->eax, s->ebx, s->ecx, s->edx, s->edi, s->esi, s->ebp, s->esp, s->ds, s->es);
irq_enable();
abort();
}
int arch_paging_init(void)
{
uint32_t i, npages, index1, index2;
page_table_t* pgt;
size_t viraddr;
// uninstall default handler and install our own
irq_uninstall_handler(14);
irq_install_handler(14, pagefault_handler);
// Create a page table to reference to the other page tables
pgt = &pgt_container;
// map this table at the end of the kernel space
viraddr = KERNEL_SPACE - PAGE_SIZE;
index1 = viraddr >> 22;
index2 = (viraddr >> 12) & 0x3FF;
// now, we create a self reference
per_core(current_task)->pgd->entries[index1] = (((size_t) pgt) & PAGE_MASK)|KERN_TABLE;
pgt->entries[index2] = ((size_t) pgt & 0xFFFFF000)|KERN_PAGE;
// create the other PGTs for the kernel space
for(i=0; i<KERNEL_SPACE/(1024*PAGE_SIZE)-1; i++) {
size_t phyaddr = boot_pgt+i;
memset((void*) phyaddr, 0x00, sizeof(page_table_t));
per_core(current_task)->pgd->entries[i] = (phyaddr & PAGE_MASK)|KERN_TABLE;
pgt->entries[i] = (phyaddr & PAGE_MASK)|KERN_PAGE;
}
/*
* Set the page table and page directory entries for the kernel. We map the kernel's physical address
* to the same virtual address.
*/
npages = ((size_t) &kernel_end - (size_t) &kernel_start) >> PAGE_SHIFT;
if ((size_t)&kernel_end & (PAGE_SIZE-1))
npages++;
map_region((size_t)&kernel_start, (size_t)&kernel_start, npages, MAP_KERNEL_SPACE);
#if MAX_CORES > 1
// Reserve page for smp boot code
if (!map_region(SMP_SETUP_ADDR, SMP_SETUP_ADDR, 1, MAP_KERNEL_SPACE|MAP_NO_CACHE)) {
kputs("could not reserve page for smp boot code\n");
return -ENOMEM;
}
#endif
#ifdef CONFIG_VGA
// map the video memory into the kernel space
map_region(VIDEO_MEM_ADDR, VIDEO_MEM_ADDR, 1, MAP_KERNEL_SPACE|MAP_NO_CACHE);
#endif
#ifdef CONFIG_MULTIBOOT
/*
* of course, mb_info has to map into the kernel space
*/
if (mb_info)
map_region((size_t) mb_info & PAGE_MASK, (size_t) mb_info & PAGE_MASK, 1, MAP_KERNEL_SPACE);
#if 0
/*
* Map reserved memory regions into the kernel space
*/
if (mb_info && (mb_info->flags & MULTIBOOT_INFO_MEM_MAP)) {
multiboot_memory_map_t* mmap = (multiboot_memory_map_t*) mb_info->mmap_addr;
multiboot_memory_map_t* mmap_end = (void*) ((size_t) mb_info->mmap_addr + mb_info->mmap_length);
while (mmap < mmap_end) {
if (mmap->type != MULTIBOOT_MEMORY_AVAILABLE) {
npages = mmap->len / PAGE_SIZE;
if ((mmap->addr+mmap->len) % PAGE_SIZE)
npages++;
map_region(mmap->addr, mmap->addr, npages, MAP_KERNEL_SPACE|MAP_NO_CACHE);
}
mmap++;
}
}
#endif
/*
* Modules like the init ram disk are already loaded.
* Therefore, we map these moduels into the kernel space.
*/
if (mb_info && (mb_info->flags & MULTIBOOT_INFO_MODS)) {
multiboot_module_t* mmodule = (multiboot_module_t*) ((size_t) mb_info->mods_addr);
npages = mb_info->mods_count * sizeof(multiboot_module_t) >> PAGE_SHIFT;
if (mb_info->mods_count * sizeof(multiboot_module_t) & (PAGE_SIZE-1))
npages++;
map_region((size_t) mb_info->mods_addr, (size_t) mb_info->mods_addr, npages, MAP_KERNEL_SPACE);
for(i=0; i<mb_info->mods_count; i++, mmodule++) {
// map physical address to the same virtual address
npages = (mmodule->mod_end - mmodule->mod_start) >> PAGE_SHIFT;
if (mmodule->mod_end & (PAGE_SIZE-1))
npages++;
kprintf("Map module %s at 0x%x (%u pages)\n", (char*) mmodule->cmdline, mmodule->mod_start, npages);
map_region((size_t) mmodule->mod_start, (size_t) mmodule->mod_start, npages, MAP_KERNEL_SPACE);
}
}
#endif
#ifdef CONFIG_ROCKCREEK
// map SCC's bootinfo
viraddr = map_region(SCC_BOOTINFO, SCC_BOOTINFO, 1, MAP_KERNEL_SPACE);
kprintf("Map SCC's bootinfos at 0x%x\n", viraddr);
// map SCC's configuration registers
viraddr = map_region(CRB_X0_Y0, CRB_X0_Y0, (CRB_OWN-CRB_X0_Y0+16*1024*1024) >> PAGE_SHIFT, MAP_KERNEL_SPACE|MAP_NO_CACHE);
kprintf("Map configuration registers at 0x%x\n", viraddr);
// map SCC's message passing buffers
viraddr = map_region(MPB_X0_Y0, MPB_X0_Y0, (MPB_OWN-MPB_X0_Y0+16*1024*1024) >> PAGE_SHIFT, MAP_KERNEL_SPACE|MAP_MPE);
kprintf("Map message passing buffers at 0x%x\n", viraddr);
// map the FPGA registers
viraddr = map_region(FPGA_BASE, FPGA_BASE, 0x10000 >> PAGE_SHIFT, MAP_KERNEL_SPACE|MAP_NO_CACHE);
kprintf("Map FPGA regsiters at 0x%x\n", viraddr);
#endif
/* enable paging */
write_cr3((uint32_t) &boot_pgd);
i = read_cr0();
i = i | (1 << 31);
write_cr0(i);
paging_enabled = 1;
#ifdef CONFIG_ROCKCREEK
// map the initial ramdisk
npages = bootinfo->size >> PAGE_SHIFT;
if (bootinfo->size & (PAGE_SIZE-1))
npages++;
viraddr = map_region(0, bootinfo->addr, npages, MAP_KERNEL_SPACE);
kprintf("Map initrd from 0x%x to 0x%x (size %u bytes)\n", bootinfo->addr, viraddr, bootinfo->size);
bootinfo->addr = viraddr;
#endif
/*
* we turned on paging
* => now, we are able to register our task
*/
register_task();
// APIC registers into the kernel address space
map_apic();
return 0;
}

View file

@ -1,650 +0,0 @@
/*
* Copyright 2012 Stefan Lankes, Chair for Operating Systems,
* RWTH Aachen University
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* This file is part of MetalSVM.
*/
#include <metalsvm/stddef.h>
#include <metalsvm/stdio.h>
#include <metalsvm/stdlib.h>
#include <metalsvm/mmu.h>
#include <metalsvm/vma.h>
#include <metalsvm/string.h>
#include <metalsvm/page.h>
#include <metalsvm/spinlock.h>
#include <metalsvm/processor.h>
#include <metalsvm/tasks.h>
#include <metalsvm/errno.h>
#include <asm/irq.h>
#include <asm/multiboot.h>
#include <asm/apic.h>
#ifdef CONFIG_ROCKCREEK
#include <asm/RCCE_lib.h>
#include <asm/SCC_API.h>
#include <asm/svm.h>
#include <asm/icc.h>
#endif
/*
* Virtual Memory Layout of the standard configuration
* (1 GB kernel space)
*
* 0x00000000 - 0x000FFFFF: reserved for IO devices (16MB)
* 0x00100000 - 0x0DEADFFF: Kernel (size depends on the configuration) (221MB)
* 0x0DEAE000 - 0x3FFFFFFF: Kernel heap
*
*/
/*
* Note that linker symbols are not variables, they have no memory allocated for
* maintaining a value, rather their address is their value.
*/
extern const void kernel_start;
extern const void kernel_end;
// boot task's page directory and page directory lock
extern page_dir_t boot_pgd;
static spinlock_t kslock = SPINLOCK_INIT;
static int paging_enabled = 0;
page_dir_t* get_boot_pgd(void)
{
return &boot_pgd;
}
int create_pgd(task_t* task, int copy)
{
// Currently, we support only kernel tasks
// => all tasks are able to use the same pgd
if (BUILTIN_EXPECT(!paging_enabled, 0))
return -EINVAL;
task->pgd = get_boot_pgd();
return 0;
}
/*
* drops all page frames and the PGD of a user task
*/
int drop_pgd(void)
{
#if 0
page_dir_t* pgd = per_core(current_task)->pgd;
size_t phy_pgd = virt_to_phys((size_t) pgd);
task_t* task = per_core(current_task);
uint32_t i;
if (BUILTIN_EXPECT(pgd == &boot_pgd, 0))
return -EINVAL;
spinlock_lock(&task->pgd_lock);
for(i=0; i<1024; i++) {
if (pgd->entries[i] & PG_USER) {
put_page(pgd->entries[i] & PAGE_MASK);
pgd->entries[i] = 0;
}
}
// freeing the page directory
put_page(phy_pgd);
task->pgd = NULL;
spinlock_unlock(&task->pgd_lock);
#endif
return 0;
}
size_t virt_to_phys(size_t viraddr)
{
task_t* task = per_core(current_task);
uint16_t idx_pd4 = (viraddr >> 39) & 0x1FF;
uint16_t idx_dirp = (viraddr >> 30) & 0x1FF;
uint16_t idx_dir = (viraddr >> 21) & 0x1FF;
uint16_t idx_table = (viraddr >> 12) & 0x1FF;
page_table_t* pgt;
size_t ret = 0;
if (!paging_enabled)
return viraddr;
if (BUILTIN_EXPECT(!task || !task->pgd, 0))
return 0;
spinlock_irqsave_lock(&task->pgd_lock);
// Currently, we allocate pages only in kernel space.
// => physical address of the page table is identical of the virtual address
pgt = (page_table_t*) (task->pgd->entries[idx_pd4] & PAGE_MASK);
if (!pgt)
goto out;
pgt = (page_table_t*) (pgt->entries[idx_dirp] & PAGE_MASK);
if (!pgt)
goto out;
pgt = (page_table_t*) (pgt->entries[idx_dir] & PAGE_MASK);
if (!pgt)
goto out;
ret = (size_t) (pgt->entries[idx_table] & PAGE_MASK);
if (!ret)
goto out;
ret = ret | (viraddr & 0xFFF); // add page offset
out:
//kprintf("vir %p to phy %p\n", viraddr, ret);
spinlock_irqsave_unlock(&task->pgd_lock);
return ret;
}
size_t map_region(size_t viraddr, size_t phyaddr, uint32_t npages, uint32_t flags)
{
task_t* task = per_core(current_task);
page_table_t* pgt;
size_t i, ret;
if (BUILTIN_EXPECT(!task || !task->pgd, 0))
return 0;
if (BUILTIN_EXPECT(!paging_enabled && (viraddr != phyaddr), 0))
return 0;
if (flags & MAP_KERNEL_SPACE)
spinlock_lock(&kslock);
else
spinlock_irqsave_lock(&task->pgd_lock);
if (!viraddr) {
viraddr = vm_alloc(npages, flags);
if (BUILTIN_EXPECT(!viraddr, 0)) {
kputs("map_region: found no valid virtual address\n");
ret = 0;
goto out;
}
}
ret = viraddr;
for(i=0; i<npages; i++, viraddr+=PAGE_SIZE, phyaddr+=PAGE_SIZE) {
uint16_t idx_pd4 = (viraddr >> 39) & 0x1FF;
uint16_t idx_dirp = (viraddr >> 30) & 0x1FF;
uint16_t idx_dir = (viraddr >> 21) & 0x1FF;
uint16_t idx_table = (viraddr >> 12) & 0x1FF;
pgt = (page_table_t*) (task->pgd->entries[idx_pd4] & PAGE_MASK);
if (!pgt) {
kputs("map_region: out of memory\n");
ret = 0;
goto out;
}
pgt = (page_table_t*) (pgt->entries[idx_dirp] & PAGE_MASK);
if (!pgt) {
kputs("map_region: out of memory\n");
ret = 0;
goto out;
}
pgt = (page_table_t*) (pgt->entries[idx_dir] & PAGE_MASK);
if (!pgt) {
kputs("map_region: out of memory\n");
ret = 0;
goto out;
}
/* convert physical address to virtual */
// Currently, we allocate pages only in kernel space.
// => physical address of the page table is identical of the virtual address
//if (paging_enabled)
// pgt = (page_table_t*) ((KERNEL_SPACE - 1024*PAGE_SIZE + index*PAGE_SIZE) & PAGE_MASK);
if (pgt->entries[idx_table] && !(flags & MAP_REMAP)) {
kprintf("0x%x is already mapped\n", viraddr);
ret = 0;
goto out;
}
if (flags & MAP_USER_SPACE)
pgt->entries[idx_table] = USER_PAGE|(phyaddr & PAGE_MASK);
else
pgt->entries[idx_table] = KERN_PAGE|(phyaddr & PAGE_MASK);
if (flags & MAP_NO_CACHE)
pgt->entries[idx_table] |= PG_PCD;
if (flags & MAP_NO_ACCESS)
pgt->entries[idx_table] &= ~PG_PRESENT;
if (flags & MAP_WT)
pgt->entries[idx_table] |= PG_PWT;
if (flags & MAP_USER_SPACE)
atomic_int32_inc(&task->user_usage);
tlb_flush_one_page(viraddr);
}
out:
if (flags & MAP_KERNEL_SPACE)
spinlock_unlock(&kslock);
else
spinlock_irqsave_unlock(&task->pgd_lock);
return ret;
}
int change_page_permissions(size_t start, size_t end, uint32_t flags)
{
#if 0
uint32_t index1, index2, newflags;
size_t viraddr = start & PAGE_MASK;
size_t phyaddr;
page_table_t* pgt;
page_dir_t* pgd;
task_t* task = per_core(current_task);
if (BUILTIN_EXPECT(!paging_enabled, 0))
return -EINVAL;
pgd = per_core(current_task)->pgd;
if (BUILTIN_EXPECT(!pgd, 0))
return -EINVAL;
spinlock_lock(&task->pgd_lock);
while (viraddr < end)
{
index1 = viraddr >> 22;
index2 = (viraddr >> 12) & 0x3FF;
while ((viraddr < end) && (index2 < 1024)) {
pgt = (page_table_t*) (page_table_t*) ((KERNEL_SPACE - 1024*PAGE_SIZE + index1*PAGE_SIZE) & PAGE_MASK);
if (pgt && pgt->entries[index2]) {
phyaddr = pgt->entries[index2] & PAGE_MASK;
newflags = pgt->entries[index2] & 0xFFF; // get old flags
if (!(newflags & PG_SVM_INIT)) {
if ((newflags & PG_SVM_STRONG) && !(newflags & PG_PRESENT) && (flags & (VMA_READ|VMA_WRITE) && !(flags & VMA_NOACCESS)))
newflags |= PG_PRESENT;
else if ((newflags & PG_SVM_STRONG) && (newflags & PG_PRESENT) && (flags & VMA_NOACCESS))
newflags &= ~PG_PRESENT;
}
// update flags
if (!(flags & VMA_WRITE)) {
newflags &= ~PG_RW;
#ifdef CONFIG_ROCKCREEK
if (newflags & (PG_SVM_STRONG|PG_SVM_LAZYRELEASE))
newflags &= ~PG_MPE;
#endif
} else {
newflags |= PG_RW;
#ifdef CONFIG_ROCKCREEK
if (newflags & (PG_SVM_STRONG|PG_SVM_LAZYRELEASE))
newflags |= PG_MPE;
#endif
}
pgt->entries[index2] = (newflags & 0xFFF) | (phyaddr & PAGE_MASK);
tlb_flush_one_page(viraddr);
}
index2++;
viraddr += PAGE_SIZE;
}
}
spinlock_unlock(&task->pgd_lock);
#endif
return -EINVAL;
}
/*
* Use the first fit algorithm to find a valid address range
*
* TODO: O(n) => bad performance, we need a better approach
*/
size_t vm_alloc(uint32_t npages, uint32_t flags)
{
task_t* task = per_core(current_task);
size_t viraddr, i, j, ret = 0;
size_t start, end;
page_table_t* pgt;
if (BUILTIN_EXPECT(!task || !task->pgd || !paging_enabled, 0))
return 0;
if (flags & MAP_KERNEL_SPACE) {
start = (((size_t) &kernel_end) + 10*PAGE_SIZE) & PAGE_MASK;
end = (KERNEL_SPACE - PAGE_SIZE) & PAGE_MASK;
} else {
start = KERNEL_SPACE & PAGE_MASK;
end = PAGE_MASK;
}
if (BUILTIN_EXPECT(!npages, 0))
return 0;
if (flags & MAP_KERNEL_SPACE)
spinlock_lock(&kslock);
else
spinlock_irqsave_lock(&task->pgd_lock);
viraddr = i = start;
j = 0;
do {
uint16_t idx_pd4 = (viraddr >> 39) & 0x1FF;
uint16_t idx_dirp = (viraddr >> 30) & 0x1FF;
uint16_t idx_dir = (viraddr >> 21) & 0x1FF;
uint16_t idx_table = (viraddr >> 12) & 0x1FF;
// Currently, we allocate pages only in kernel space.
// => physical address of the page table is identical of the virtual address
pgt = (page_table_t*) (task->pgd->entries[idx_pd4] & PAGE_MASK);
if (!pgt) {
i += (size_t)PGT_ENTRIES*PGT_ENTRIES*PGT_ENTRIES*PAGE_SIZE;
j += PGT_ENTRIES*PGT_ENTRIES*PGT_ENTRIES;
continue;
}
pgt = (page_table_t*) (pgt->entries[idx_dirp] & PAGE_MASK);
if (!pgt) {
i += PGT_ENTRIES*PGT_ENTRIES*PAGE_SIZE;
j += PGT_ENTRIES*PGT_ENTRIES;
continue;
}
pgt = (page_table_t*) (pgt->entries[idx_dir] & PAGE_MASK);
if (!pgt) {
i += PGT_ENTRIES*PAGE_SIZE;
j += PGT_ENTRIES;
continue;
}
if (!(pgt->entries[idx_table])) {
i += PAGE_SIZE;
j++;
} else {
// restart search
j = 0;
viraddr = i + PAGE_SIZE;
i = i + PAGE_SIZE;
}
} while((j < npages) && (i<=end));
if ((j >= npages) && (viraddr < end))
ret = viraddr;
if (flags & MAP_KERNEL_SPACE)
spinlock_unlock(&kslock);
else
spinlock_irqsave_unlock(&task->pgd_lock);
return ret;
}
int unmap_region(size_t viraddr, uint32_t npages)
{
task_t* task = per_core(current_task);
page_table_t* pgt;
size_t i;
uint16_t idx_pd4, idx_dirp;
uint16_t idx_dir, idx_table;
if (BUILTIN_EXPECT(!task || !task->pgd || !paging_enabled, 0))
return -EINVAL;
if (viraddr <= KERNEL_SPACE)
spinlock_lock(&kslock);
else
spinlock_irqsave_lock(&task->pgd_lock);
i = 0;
while(i<npages)
{
idx_pd4 = (viraddr >> 39) & 0x1FF;
idx_dirp = (viraddr >> 30) & 0x1FF;
idx_dir = (viraddr >> 21) & 0x1FF;
idx_table = (viraddr >> 12) & 0x1FF;
// Currently, we allocate pages only in kernel space.
// => physical address of the page table is identical of the virtual address
pgt = (page_table_t*) (task->pgd->entries[idx_pd4] & PAGE_MASK);
if (!pgt) {
viraddr += (size_t) PGT_ENTRIES*PGT_ENTRIES*PGT_ENTRIES*PAGE_SIZE;
i += PGT_ENTRIES*PGT_ENTRIES*PGT_ENTRIES;
continue;
}
pgt = (page_table_t*) (pgt->entries[idx_dirp] & PAGE_MASK);
if (!pgt) {
viraddr += PGT_ENTRIES*PGT_ENTRIES*PAGE_SIZE;
i += PGT_ENTRIES*PGT_ENTRIES;
continue;
}
pgt = (page_table_t*) (pgt->entries[idx_dir] & PAGE_MASK);
if (!pgt) {
viraddr += PGT_ENTRIES*PAGE_SIZE;
i += PGT_ENTRIES;
continue;
}
if (pgt->entries[idx_table])
pgt->entries[idx_table] &= ~PG_PRESENT;
viraddr +=PAGE_SIZE;
i++;
if (viraddr > KERNEL_SPACE)
atomic_int32_dec(&task->user_usage);
tlb_flush_one_page(viraddr);
}
if (viraddr <= KERNEL_SPACE)
spinlock_unlock(&kslock);
else
spinlock_irqsave_unlock(&task->pgd_lock);
return 0;
}
int vm_free(size_t viraddr, uint32_t npages)
{
task_t* task = per_core(current_task);
page_table_t* pgt;
size_t i;
uint16_t idx_pd4, idx_dirp;
uint16_t idx_dir, idx_table;
if (BUILTIN_EXPECT(!task || !task->pgd || !paging_enabled, 0))
return -EINVAL;
if (viraddr <= KERNEL_SPACE)
spinlock_lock(&kslock);
else
spinlock_irqsave_lock(&task->pgd_lock);
i = 0;
while(i<npages)
{
idx_pd4 = (viraddr >> 39) & 0x1FF;
idx_dirp = (viraddr >> 30) & 0x1FF;
idx_dir = (viraddr >> 21) & 0x1FF;
idx_table = (viraddr >> 12) & 0x1FF;
// Currently, we allocate pages only in kernel space.
// => physical address of the page table is identical of the virtual address
pgt = (page_table_t*) (task->pgd->entries[idx_pd4] & PAGE_MASK);
if (!pgt) {
viraddr += (size_t) PGT_ENTRIES*PGT_ENTRIES*PGT_ENTRIES*PAGE_SIZE;
i += PGT_ENTRIES*PGT_ENTRIES*PGT_ENTRIES;
continue;
}
pgt = (page_table_t*) (pgt->entries[idx_dirp] & PAGE_MASK);
if (!pgt) {
viraddr += PGT_ENTRIES*PGT_ENTRIES*PAGE_SIZE;
i += PGT_ENTRIES*PGT_ENTRIES;
continue;
}
pgt = (page_table_t*) (pgt->entries[idx_dir] & PAGE_MASK);
if (!pgt) {
viraddr += PGT_ENTRIES*PAGE_SIZE;
i += PGT_ENTRIES;
continue;
}
if (pgt->entries[idx_table])
pgt->entries[idx_table] = 0;
viraddr +=PAGE_SIZE;
i++;
tlb_flush_one_page(viraddr);
}
if (viraddr <= KERNEL_SPACE)
spinlock_unlock(&kslock);
else
spinlock_irqsave_unlock(&task->pgd_lock);
return 0;
}
static void pagefault_handler(struct state *s)
{
task_t* task = per_core(current_task);
//page_dir_t* pgd = task->pgd;
//page_table_t* pgt = NULL;
size_t viraddr = read_cr2();
//size_t phyaddr;
#if 0
if ((viraddr >= task->start_heap) && (viraddr <= task->end_heap) && (viraddr > KERNEL_SPACE)) {
viraddr = viraddr & PAGE_MASK;
phyaddr = get_page();
if (BUILTIN_EXPECT(!phyaddr, 0))
goto default_handler;
if (map_region(viraddr, phyaddr, 1, MAP_USER_SPACE) == viraddr) {
memset((void*) viraddr, 0x00, PAGE_SIZE);
return;
}
kprintf("Could not map 0x%x at 0x%x\n", phyaddr, viraddr);
put_page(phyaddr);
}
#endif
//default_handler:
kprintf("PAGE FAULT: Task %u got page fault at %p (irq %llu, cs:rip 0x%llx:0x%llx)\n", task->id, viraddr, s->int_no, s->cs, s->rip);
kprintf("Register state: rax = 0x%llx, rbx = 0x%llx, rcx = 0x%llx, rdx = 0x%llx, rdi = 0x%llx, rsi = 0x%llx, rbp = 0x%llx, rsp = 0x%llx\n",
s->rax, s->rbx, s->rcx, s->rdx, s->rdi, s->rsi, s->rbp, s->rsp);
while(1);
irq_enable();
abort();
}
int arch_paging_init(void)
{
uint32_t i, npages;
// uninstall default handler and install our own
irq_uninstall_handler(14);
irq_install_handler(14, pagefault_handler);
// kernel is already maped into the kernel space (see entry64.asm)
// this includes .data, .bss, .text, video memory and the multiboot structure
#if MAX_CORES > 1
// Reserve page for smp boot code
if (!map_region(SMP_SETUP_ADDR, SMP_SETUP_ADDR, 1, MAP_KERNEL_SPACE|MAP_NO_CACHE)) {
kputs("could not reserve page for smp boot code\n");
return -ENOMEM;
}
#endif
#ifdef CONFIG_MULTIBOOT
#if 0
/*
* Map reserved memory regions into the kernel space
*/
if (mb_info && (mb_info->flags & MULTIBOOT_INFO_MEM_MAP)) {
multiboot_memory_map_t* mmap = (multiboot_memory_map_t*) mb_info->mmap_addr;
multiboot_memory_map_t* mmap_end = (void*) ((size_t) mb_info->mmap_addr + mb_info->mmap_length);
while (mmap < mmap_end) {
if (mmap->type != MULTIBOOT_MEMORY_AVAILABLE) {
npages = mmap->len / PAGE_SIZE;
if ((mmap->addr+mmap->len) % PAGE_SIZE)
npages++;
map_region(mmap->addr, mmap->addr, npages, MAP_KERNEL_SPACE|MAP_NO_CACHE);
}
mmap++;
}
}
#endif
/*
* Modules like the init ram disk are already loaded.
* Therefore, we map these moduels into the kernel space.
*/
if (mb_info && (mb_info->flags & MULTIBOOT_INFO_MODS)) {
multiboot_module_t* mmodule = (multiboot_module_t*) ((size_t) mb_info->mods_addr);
npages = mb_info->mods_count * sizeof(multiboot_module_t) >> PAGE_SHIFT;
if (mb_info->mods_count * sizeof(multiboot_module_t) & (PAGE_SIZE-1))
npages++;
map_region((size_t) (mb_info->mods_addr), (size_t) (mb_info->mods_addr), npages, MAP_REMAP|MAP_KERNEL_SPACE);
for(i=0; i<mb_info->mods_count; i++, mmodule++) {
// map physical address to the same virtual address
npages = (mmodule->mod_end - mmodule->mod_start) >> PAGE_SHIFT;
if (mmodule->mod_end & (PAGE_SIZE-1))
npages++;
kprintf("Map module %s at 0x%x (%u pages)\n", (char*) mmodule->cmdline, mmodule->mod_start, npages);
map_region((size_t) (mmodule->mod_start), (size_t) (mmodule->mod_start), npages, MAP_REMAP|MAP_KERNEL_SPACE);
}
}
#endif
/* signalize that we are able to use paging */
paging_enabled = 1;
/*
* we turned on paging
* => now, we are able to register our task
*/
register_task();
// APIC registers into the kernel address space
map_apic();
return 0;
}

View file

@ -20,7 +20,7 @@
#include <metalsvm/stddef.h>
#include <metalsvm/stdio.h>
#include <metalsvm/stdlib.h>
#include <metalsvm/mmu.h>
#include <metalsvm/memory.h>
#include <metalsvm/tasks.h>
#include <metalsvm/page.h>
#include <metalsvm/errno.h>

View file

@ -70,7 +70,7 @@ static ssize_t socket_write(fildes_t* file, uint8_t* buffer, size_t size)
return -ENOMEM;
memcpy(tmp, buffer, size);
ret = lwip_write(file->offset, tmp, size);
kfree(tmp, size);
kfree(tmp);
#endif
if (ret < 0)
ret = -errno;
@ -147,7 +147,7 @@ int socket_init(vfs_node_t* node, const char* name)
} while(blist);
kfree(new_node, sizeof(vfs_node_t));
kfree(new_node);
return -ENOMEM;
}

View file

@ -83,15 +83,8 @@ static ssize_t stdio_read(fildes_t* file, uint8_t* buffer, size_t size)
static ssize_t stdio_write(fildes_t* file, uint8_t* buffer, size_t size)
{
int i;
for (i = 0; i<size; i++, buffer++) {
#ifdef CONFIG_VGA
vga_putchar(*buffer);
#elif defined(CONFIG_UART)
uart_putchar(*buffer);
#else
for (i = 0; i<size; i++, buffer++)
kputchar(*buffer);
#endif
}
file->offset += size;
return size;
@ -152,7 +145,7 @@ int null_init(vfs_node_t* node, const char* name)
} while(blist);
kfree(new_node, sizeof(vfs_node_t));
kfree(new_node);
return -ENOMEM;
}
@ -211,7 +204,7 @@ int stdin_init(vfs_node_t* node, const char* name)
} while(blist);
kfree(new_node, sizeof(vfs_node_t));
kfree(new_node);
return -ENOMEM;
}
@ -270,7 +263,7 @@ int stdout_init(vfs_node_t* node, const char* name)
} while(blist);
kfree(new_node, sizeof(vfs_node_t));
kfree(new_node);
return -ENOMEM;
}
@ -329,7 +322,7 @@ int stderr_init(vfs_node_t* node, const char* name)
} while(blist);
kfree(new_node, sizeof(vfs_node_t));
kfree(new_node);
return -ENOMEM;
}

24
fs/fs.c
View file

@ -132,6 +132,30 @@ int close_fs(fildes_t* file)
return ret;
}
void list_fs(vfs_node_t* node, uint32_t depth)
{
int i = 0;
dirent_t* dirent = NULL;
fildes_t* file = kmalloc(sizeof(fildes_t));
file->offset = 0;
file->flags = 0;
while ((dirent = readdir_fs(node, i)) != 0) {
kprintf("%*c|- %s\n", 2*depth, ' ', dirent->name);
if (strcmp(dirent->name, ".") && strcmp(dirent->name, "..")) {
vfs_node_t *new_node = finddir_fs(node, dirent->name);
if (new_node) {
kprintf("%*c\\\n", 2*depth, ' ');
list_fs(new_node, depth + 1);
}
}
i++;
}
kfree(file);
}
struct dirent* readdir_fs(vfs_node_t * node, uint32_t index)
{
struct dirent* ret = NULL;

View file

@ -210,7 +210,7 @@ static int initrd_open(fildes_t* file, const char* name)
if (file->node->type == FS_FILE) {
if ((file->flags & O_CREAT) && (file->flags & O_EXCL))
return -EEXIST;
/* in the case of O_TRUNC kfree all the nodes */
if (file->flags & O_TRUNC) {
uint32_t i;
@ -221,8 +221,7 @@ static int initrd_open(fildes_t* file, const char* name)
/* the first blist pointer have do remain valid. */
for(i=0; i<MAX_DATABLOCKS && !data; i++) {
if (blist->data[i]) {
kfree(blist->data[i],
sizeof(data_block_t));
kfree(blist->data[i]);
}
}
if (blist->next) {
@ -234,12 +233,12 @@ static int initrd_open(fildes_t* file, const char* name)
do {
for(i=0; i<MAX_DATABLOCKS && !data; i++) {
if (blist->data[i]) {
kfree(blist->data[i], sizeof(data_block_t));
kfree(blist->data[i]);
}
}
lastblist = blist;
blist = blist->next;
kfree(lastblist, sizeof(block_list_t));
kfree(lastblist);
} while(blist);
}
@ -253,7 +252,7 @@ static int initrd_open(fildes_t* file, const char* name)
/* opendir was called: */
if (name[0] == '\0')
return 0;
/* open file was called: */
if (!(file->flags & O_CREAT))
return -ENOENT;
@ -264,11 +263,11 @@ static int initrd_open(fildes_t* file, const char* name)
vfs_node_t* new_node = kmalloc(sizeof(vfs_node_t));
if (BUILTIN_EXPECT(!new_node, 0))
return -EINVAL;
blist = &file->node->block_list;
dir_block_t* dir_block;
dirent_t* dirent;
memset(new_node, 0x00, sizeof(vfs_node_t));
new_node->type = FS_FILE;
new_node->read = &initrd_read;
@ -286,7 +285,7 @@ static int initrd_open(fildes_t* file, const char* name)
if (!dirent->vfs_node) {
dirent->vfs_node = new_node;
strncpy(dirent->name, (char*) name, MAX_FNAME);
goto exit_create_file; // there might be a better Solution ***************
goto exit_create_file; // TODO: there might be a better Solution
}
}
}
@ -425,9 +424,9 @@ static vfs_node_t* initrd_mkdir(vfs_node_t* node, const char* name)
blist = blist->next;
} while(blist);
kfree(dir_block, sizeof(dir_block_t));
kfree(dir_block);
out:
kfree(new_node, sizeof(vfs_node_t));
kfree(new_node);
return NULL;
}

View file

@ -34,14 +34,18 @@ extern "C" {
#define PAGE_SHIFT 12
#define CACHE_LINE 64
#define MAILBOX_SIZE 32
#define TIMER_FREQ 100 /* in HZ */
#define CLOCK_TICK_RATE 1193182 /* 8254 chip's internal oscillator frequency */
#define TIMER_FREQ 100 // in HZ
#define CLOCK_TICK_RATE 1193182 // 8254 chip's internal oscillator frequency
#define INT_SYSCALL 0x80
#define KERNEL_SPACE (1*1024*1024*1024)
#define VIDEO_MEM_ADDR 0xB8000 // the video memora address
#define SMP_SETUP_ADDR 0x07000
#define UART_PORT 0x3F8 // 0x2F8 for SCC
#define BYTE_ORDER LITTLE_ENDIAN
#define BYTE_ORDER LITTLE_ENDIAN
// some fixed addresses
#define SMP_SETUP_ADDR 0x07000
#define VIDEO_MEM_ADDR 0xB8000 // identity paged
#define LAPIC_ADDR 0xB9000 // fixed mapping from 0xFEE00000
#define IOAPIC_ADDR 0xBA000 // fixed mapping from 0xFEC00000
/*
* address space / (page_size * sizeof(uint8_t))
@ -52,7 +56,7 @@ extern "C" {
#define CONFIG_PCI
#define CONFIG_LWIP
#define CONFIG_VGA
//#define CONFIG_UART
#define CONFIG_UART
#define CONFIG_KEYBOARD
#define CONFIG_MULTIBOOT
//#define CONFIG_ROCKCREEK
@ -72,7 +76,7 @@ extern "C" {
//#define SHMADD
#define SHMDBG
//#define SHMADD_CACHEABLE
#define SCC_BOOTINFO 0x80000
#define SCC_BOOTINFO 0x80000
#define BUILTIN_EXPECT(exp, b) __builtin_expect((exp), (b))
//#define BUILTIN_EXPECT(exp, b) (exp)

View file

@ -242,6 +242,9 @@ int open_fs(fildes_t* file, const char* fname);
/** @brief Yet to be documented */
int close_fs(fildes_t * file);
/** @brief List a filesystem hirachically */
void list_fs(vfs_node_t* node, uint32_t depth);
/** @brief Get dir entry at index
* @param node VFS node to get dir entry from
* @param index Index position of desired dir entry

76
include/metalsvm/malloc.h Normal file
View file

@ -0,0 +1,76 @@
/*
* Copyright 2010 Steffen Vogel, Chair for Operating Systems,
* RWTH Aachen University
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* This file is part of MetalSVM.
*/
/**
* @author Steffen Vogel <steffen.vogel@rwth-aachen.de>
*/
#ifndef __MALLOC_H__
#define __MALLOC_H__
#include <metalsvm/stddef.h>
#ifdef __cplusplus
extern "C" {
#endif
/// Binary exponent of maximal size for kmalloc()
#define BUDDY_MAX 32 // 4 GB
/// Binary exponent of minimal buddy size
#define BUDDY_MIN 3 // 8 Byte >= sizeof(buddy_t)
/// Binary exponent of the size which we allocate with buddy_fill()
#define BUDDY_ALLOC 16 // 64 KByte = 16 * PAGE_SIZE
#define BUDDY_LISTS (BUDDY_MAX-BUDDY_MIN+1)
#define BUDDY_MAGIC 0xBABE
union buddy;
/** @brief Buddy
*
* Every free memory block is stored in a linked list according to its size.
* We can use this free memory to store store this buddy_t union which represents
* this block (the buddy_t union is alligned to the front).
* Therefore the address of the buddy_t union is equal with the address
* of the underlying free memory block.
*
* Every allocated memory block is prefixed with its binary size exponent and
* a known magic number. This prefix is hidden by the user because its located
* before the actual memory address returned by kmalloc()
*/
typedef union buddy {
/// Pointer to the next buddy in the linked list.
union buddy* next;
struct {
/// The binary exponent of the block size
uint8_t exponent;
/// Must be equal to BUDDY_MAGIC for a valid memory block
uint16_t magic;
} prefix;
} buddy_t;
/** @brief Dump free buddies */
void buddy_dump(void);
#ifdef __cplusplus
}
#endif
#endif

View file

@ -31,7 +31,6 @@
#include <metalsvm/stddef.h>
#include <asm/atomic.h>
//#include <asm/mmu.h>
#ifdef __cplusplus
extern "C" {
@ -50,33 +49,47 @@ extern atomic_int32_t total_available_pages;
*/
int mmu_init(void);
/** @brief get continuous pages
/** @brief Get continuous pages
*
* This function finds a continuous page region (first fit algorithm)
*
* @param no_pages Desired number of pages
* Use first fit algorithm to find a suitable, continous physical memory region
*
* @param npages Desired number of pages
* @return
* - physical address on success
* - 0 on failure
*/
size_t get_pages(uint32_t no_pages);
size_t get_pages(size_t npages);
/** @brief get a single page
/** @brief Get a single page
*
* Convenience function: uses get_pages(1);
*/
static inline size_t get_page(void) { return get_pages(1); }
/** @brief Put back a page after use
/** @brief Put back a sequence of continous pages
*
* @param phyaddr Physical address to put back
* @param phyaddr Physical address of the first page
* @param npages Number of pages
*
* @return
* - 0 on success
* - -EINVAL (-22) on failure
* @return number of pages which were marked as used before calling
*/
int put_page(size_t phyaddr);
int put_pages(size_t phyaddr, size_t npages);
/** @brief Put a single page
*
* Convenience function: uses put_pages(1);
*/
static inline int put_page(size_t phyaddr) { return put_pages(phyaddr, 1); }
/** @brief Copy a physical page frame
*
* @param psrc physical address of source page frame
* @param pdest physical address of source page frame
* @return
* - 0 on success
* - -1 on failure
*/
int copy_page(size_t pdest, size_t psrc);
#ifdef __cplusplus
}

View file

@ -29,10 +29,7 @@
#include <metalsvm/stddef.h>
#include <asm/page.h>
/**
* Sets up the environment, page directories etc and
* enables paging.
*/
/** @brief Sets up the environment, page directories etc and enables paging. */
static inline int paging_init(void) { return arch_paging_init(); }
#endif

View file

@ -28,14 +28,10 @@
extern "C" {
#endif
#define NULL ((void*) 0)
#define NULL ((void*) 0)
typedef unsigned int tid_t;
#define PAGE_SIZE (1 << PAGE_SHIFT)
#define PAGE_MASK ~(PAGE_SIZE - 1)
#define PAGE_ALIGN(addr) (((addr) + PAGE_SIZE - 1) & PAGE_MASK)
#if MAX_CORES == 1
#define per_core(name) name
#define DECLARE_PER_CORE(type, name) extern type name;
@ -66,10 +62,10 @@ typedef unsigned int tid_t;
irq_nested_enable(flags);\
return ret; \
}
#define CORE_ID smp_id()
#define CORE_ID smp_id()
#endif
/* needed to find the task, which is currently running on this core */
// needed to find the task, which is currently running on this core
struct task;
DECLARE_PER_CORE(struct task*, current_task);

View file

@ -29,72 +29,66 @@
#ifndef __STDLIB_H__
#define __STDLIB_H__
#include <metalsvm/config.h>
#include <metalsvm/tasks_types.h>
#include <asm/stddef.h>
#include <metalsvm/stddef.h>
#ifdef __cplusplus
extern "C" {
#endif
#define MAP_KERNEL_SPACE (1 << 0)
#define MAP_USER_SPACE (1 << 1)
#define MAP_PAGE_TABLE (1 << 2)
#define MAP_NO_CACHE (1 << 3)
#define MAP_WT (1 << 5)
#define MAP_CODE (1 << 6)
#define MAP_READONLY (1 << 7)
#ifdef CONFIG_ROCKCREEK
#define MAP_MPE (1 << 8)
#endif
#define MAP_SVM_STRONG (1 << 9)
#define MAP_SVM_LAZYRELEASE (1 << 10)
#define MAP_SVM_INIT (1 << 11)
#define MAP_NO_ACCESS (1 << 12)
#define MAP_REMAP (1 << 13)
#define MAP_NO_ACCESS (1 << 0)
#define MAP_READ_ONLY (1 << 1)
#define MAP_USER_SPACE (1 << 2)
#define MAP_CODE (1 << 3)
#define MAP_WT (1 << 4)
#define MAP_NO_CACHE (1 << 5)
#define MAP_MPE (1 << 6)
#define MAP_SVM_STRONG (1 << 7)
#define MAP_SVM_LAZYRELEASE (1 << 8)
#define MAP_SVM_INIT (1 << 9)
#define MAP_KERNEL_SPACE (0 << 2) // legacy compatibility
#define MAP_REMAP (1 << 12)
//#define MAP_NON_CONTINUOUS (1 << 13) // TODO
void NORETURN abort(void);
/** @brief Kernel's memory allocator function.
/** @brief General page allocator function
*
* This will just call mem_allocation with
* the flags MAP_KERNEL_SPACE and MAP_HEAP.
*
* @return Pointer to the new memory range
*/
void* kmalloc(size_t);
/** @brief Kernel's more general memory allocator function.
*
* This function lets you choose flags for the newly allocated memory.
* This function allocates and maps whole pages.
* To avoid fragmentation you should use kmalloc() and kfree()!
*
* @param sz Desired size of the new memory
* @param flags Flags to specify
* @param flags Flags to for map_region(), vma_add()
*
* @return Pointer to the new memory range
*/
void* mem_allocation(size_t sz, uint32_t flags);
void* palloc(size_t sz, uint32_t flags);
/** @brief Free memory
/** @brief Free general kernel memory
*
* The kernel malloc doesn't track how
* much memory was allocated for which pointer,
* The pmalloc() doesn't track how much memory was allocated for which pointer,
* so you have to specify how much memory shall be freed.
*/
void kfree(void*, size_t);
/** @brief Create a new stack for a new task
*
* @return start address of the new stack
* @param sz The size which should freed
*/
void* create_stack(void);
void pfree(void* addr, size_t sz);
/** @brief Delete stack of a finished task
/** @brief The memory allocator function
*
* @param addr Pointer to the stack
* @return 0 on success
* This allocator uses a buddy system to manage free memory.
*
* @return Pointer to the new memory range
*/
int destroy_stack(task_t* addr);
void* kmalloc(size_t sz);
/** @brief The memory free function
*
* Releases memory allocated by malloc()
*
* @param addr The address to the memory block allocated by malloc()
*/
void kfree(void* addr);
/** @brief String to long
*
@ -113,7 +107,7 @@ unsigned long strtoul(const char* nptr, char** endptr, int base);
*/
static inline int atoi(const char *str)
{
return (int)strtol(str, (char **)NULL, 10);
return (int)strtol(str, (char **) NULL, 10);
}
#ifdef __cplusplus

View file

@ -147,9 +147,7 @@ tid_t wait(int32_t* result);
*/
void update_load(void);
/** @brief Print the current cpu load
*
*/
/** @brief Print the current cpu load */
void dump_load(void);
#if MAX_CORES > 1
@ -201,9 +199,7 @@ int block_current_task(void);
*/
int set_timer(uint64_t deadline);
/** @brief check is a timer is expired
*
*/
/** @brief check is a timer is expired */
void check_timers(void);
/** @brief Abort current task */

View file

@ -36,6 +36,7 @@
#include <metalsvm/mailbox_types.h>
#include <asm/tasks_types.h>
#include <asm/atomic.h>
#include <asm/page.h>
#ifdef __cplusplus
extern "C" {
@ -62,7 +63,6 @@ extern "C" {
#define TASK_L2 (1 << 3)
typedef int (*entry_point_t)(void*);
struct page_dir;
/** @brief The task_t structure */
typedef struct task {
@ -86,12 +86,12 @@ typedef struct task {
struct task* prev;
/// last core id on which the task was running
uint32_t last_core;
/// usage in number of pages
/// usage in number of pages (including page map tables)
atomic_int32_t user_usage;
/// avoids concurrent access to the page directory
spinlock_irqsave_t pgd_lock;
/// pointer to the page directory
struct page_dir* pgd;
/// locks access to all page maps with PG_USER flag set
spinlock_irqsave_t page_lock;
/// virtual address of page map for CR3
page_entry_t* page_map;
/// lock for the VMA_list
spinlock_t vma_lock;
/// list of VMAs
@ -100,14 +100,12 @@ typedef struct task {
filp_t* fildes_table;
/// starting time/tick of the task
uint64_t start_tick;
/// start address of the heap
size_t start_heap;
/// end address of the heap
size_t end_heap;
/// the userspace heap
vma_t* heap;
/// LwIP error code
int lwip_err;
/// mail inbox
mailbox_wait_msg_t inbox;
mailbox_wait_msg_t inbox;
/// mail outbox array
mailbox_wait_msg_t* outbox[MAX_TASKS];
/// FPU state

View file

@ -19,6 +19,7 @@
/**
* @author Stefan Lankes
* @author Steffen Vogel <steffen.vogel@rwth-aachen.de>
* @file include/metalsvm/vma.h
* @brief VMA related sructure and functions
*/
@ -27,56 +28,125 @@
#define __VMA_H__
#include <metalsvm/stddef.h>
#include <asm/page.h>
#ifdef __cplusplus
extern "C" {
#endif
/// Read access to this VMA is allowed
#define VMA_READ (1 << 0)
/// Write access to this VMA is allowed
#define VMA_WRITE (1 << 1)
/// Instructions fetches in this VMA are allowed
#define VMA_EXECUTE (1 << 2)
/// This VMA is cacheable
#define VMA_CACHEABLE (1 << 3)
#define VMA_NOACCESS (1 << 4)
/// This VMA is not accessable
#define VMA_NO_ACCESS (1 << 4)
/// This VMA should be part of the userspace
#define VMA_USER (1 << 5)
/// A collection of flags used for the kernel heap (kmalloc)
#define VMA_HEAP (VMA_READ|VMA_WRITE|VMA_CACHEABLE)
// boundaries for VAS allocation
#define VMA_KERN_MIN 0xC0000
#define VMA_KERN_MAX KERNEL_SPACE
#define VMA_USER_MIN KERNEL_SPACE
// last three top level entries are reserved
#ifdef CONFIG_X86_32
#define VMA_USER_MAX 0xFF400000
#elif defined (CONFIG_X86_64)
#define VMA_USER_MAX 0xFFFFFE8000000000
#endif
struct vma;
/** @brief VMA structure definition */
/** @brief VMA structure definition
*
* Each item in this linked list marks a used part of the virtual address space.
* Its used by vm_alloc() to find holes between them.
*/
typedef struct vma {
/// Start address of the memory area
size_t start;
/// End address of the memory area
size_t end;
/// Type flags field
uint32_t type;
uint32_t flags;
/// Pointer of next VMA element in the list
struct vma* next;
/// Pointer to previous VMA element in the list
struct vma* prev;
} vma_t;
/** @brief Add a new virtual memory region to the list of VMAs
/** @brief Initalize the kernelspace VMA list
*
* @param task Pointer to the task_t structure of the task
* @param start Start address of the new region
* @param end End address of the new region
* @param type Type flags the new region shall have
* Reserves several system-relevant virtual memory regions:
* - SMP boot page (SMP_SETUP_ADDR)
* - VGA video memory (VIDEO_MEM_ADDR)
* - The kernel (kernel_start - kernel_end)
* - Multiboot structure (mb_info)
* - Multiboot mmap (mb_info->mmap_*)
* - Multiboot modules (mb_info->mods_*)
* - Init Ramdisk
*
* @return
* - 0 on success
* - <0 on failure
*/
int vma_init();
/** @brief Add a new virtual memory area to the list of VMAs
*
* @param start Start address of the new area
* @param end End address of the new area
* @param flags Type flags the new area shall have
*
* @return
* - 0 on success
* - -EINVAL (-22) or -EINVAL (-12) on failure
*/
int vma_add(struct task* task, size_t start, size_t end, uint32_t type);
int vma_add(size_t start, size_t end, uint32_t flags);
/** @brief Dump information about this task's VMAs into the terminal.
/** @brief Search for a free memory area
*
* This will print out Start, end and flags for each VMA in the task's list
* @param size Size of requestes VMA in bytes
* @param flags
* @return Type flags the new area shall have
* - 0 on failure
* - the start address of a free area
*/
size_t vma_alloc(size_t size, uint32_t flags);
/** @brief Free an allocated memory area
*
* @param task The task's task_t structure
* @param start Start address of the area to be freed
* @param end End address of the to be freed
* @return
* - 0 on success
* - -EINVAL (-22) on failure
*/
int vma_dump(struct task* task);
int vma_free(size_t start, size_t end);
/** @brief Free all virtual memory areas
*
* @return
* - 0 on success
*/
int drop_vma_list(struct task* task);
/** @brief Copy the VMA list of the current task to task
*
* @param task The task where the list should be copied to
* @return
* - 0 on success
*/
int copy_vma_list(struct task* src, struct task* dest);
/** @brief Dump information about this task's VMAs into the terminal. */
void vma_dump();
#ifdef __cplusplus
}

View file

@ -63,7 +63,7 @@ extern const void bss_end;
int lowlevel_init(void)
{
// initialize .bss section
memset((void*)&bss_start, 0x00, ((size_t) &bss_end - (size_t) &bss_start));
memset((char*) &bss_start, 0x00, (char*) &bss_end - (char*) &bss_start);
koutput_init();
@ -238,49 +238,6 @@ int network_shutdown(void)
return 0;
}
#if 0
static void list_fs(vfs_node_t* node, uint32_t depth)
{
int j, i = 0;
dirent_t* dirent = NULL;
fildes_t* file = kmalloc(sizeof(fildes_t));
file->offset = 0;
file->flags = 0;
while ((dirent = readdir_fs(node, i)) != 0) {
for(j=0; j<depth; j++)
kputs(" ");
kprintf("%s\n", dirent->name);
if (strcmp(dirent->name, ".") && strcmp(dirent->name, "..")) {
vfs_node_t *new_node = finddir_fs(node, dirent->name);
if (new_node) {
if (new_node->type == FS_FILE) {
char buff[16] = {[0 ... 15] = 0x00};
file->node = new_node;
file->offset = 0;
file->flags = 0;
read_fs(file, (uint8_t*)buff, 8);
for(j=0; j<depth+1; j++)
kputs(" ");
kprintf("content: %s\n", buff);
} else list_fs(new_node, depth+1);
}
}
i++;
}
kfree(file, sizeof(fildes_t));
}
static void list_root(void) {
kprintf("List of the file system:\n/\n");
list_fs(fs_root, 1);
}
#endif
int initd(void* arg)
{
#ifdef CONFIG_LWIP
@ -317,9 +274,13 @@ int initd(void* arg)
#endif
#endif
// list_root();
#if 1
kputs("Filesystem:\n");
list_fs(fs_root, 1);
#endif
test_init();
return 0;
}

View file

@ -21,7 +21,7 @@
#include <metalsvm/stdio.h>
#include <metalsvm/string.h>
#include <metalsvm/time.h>
#include <metalsvm/mmu.h>
#include <metalsvm/memory.h>
#include <metalsvm/tasks.h>
#include <metalsvm/processor.h>
#include <metalsvm/errno.h>
@ -29,6 +29,7 @@
#include <metalsvm/fs.h>
#include <asm/irq.h>
#include <asm/irqflags.h>
#include <asm/page.h>
#include <asm/kb.h>
#ifdef CONFIG_ROCKCREEK
#include <asm/icc.h>
@ -71,8 +72,9 @@ int main(void)
pushbg(COL_BLUE);
kprintf("This is MetalSVM %s Build %u, %u\n",
METALSVM_VERSION, &__BUILD_DATE, &__BUILD_TIME);
METALSVM_VERSION, &__BUILD_DATE, &__BUILD_TIME);
popbg();
system_init();
irq_init();
timer_init();
@ -85,7 +87,7 @@ int main(void)
icc_init();
svm_init();
#endif
initrd_init();
initrd_init();
irq_enable();
@ -101,9 +103,10 @@ int main(void)
disable_timer_irq();
#endif
sleep(5);
sleep(2);
create_kernel_task(&id, initd, NULL, NORMAL_PRIO);
kprintf("Create initd with id %u\n", id);
reschedule();
while(1) {

View file

@ -105,11 +105,11 @@ static int sys_open(const char* name, int flags, int mode)
/* file doesn't exist! */
if (check < 0) {
/* tidy up the fildescriptor */
kfree(curr_task->fildes_table[fd], sizeof(fildes_t));
kfree(curr_task->fildes_table[fd]);
curr_task->fildes_table[fd] = NULL;
return check;
}
return fd;
}
@ -196,7 +196,7 @@ static int sys_socket(int domain, int type, int protocol)
/* file doesn't exist! */
if (curr_task->fildes_table[fd]->node == NULL) {
/* tidy up the fildescriptor */
kfree(curr_task->fildes_table[fd], sizeof(fildes_t));
kfree(curr_task->fildes_table[fd]);
curr_task->fildes_table[fd] = NULL;
return -ENOENT;
}
@ -236,11 +236,11 @@ static int sys_accept(int s, struct sockaddr* addr, socklen_t* addrlen)
curr_task->fildes_table[fd]->offset = sock2;
curr_task->fildes_table[fd]->count = 1;
curr_task->fildes_table[fd]->node = findnode_fs("/dev/socket");
/* file doesn't exist! */
if (curr_task->fildes_table[fd]->node == NULL) {
/* tidy up the fildescriptor */
kfree(curr_task->fildes_table[fd], sizeof(fildes_t));
kfree(curr_task->fildes_table[fd]);
curr_task->fildes_table[fd] = NULL;
return -ENOENT;
}
@ -273,7 +273,7 @@ static int sys_close(int fd)
/* close command failed -> return check = errno */
if (BUILTIN_EXPECT(check < 0, 0))
return check;
kfree(curr_task->fildes_table[fd], sizeof(fildes_t));
kfree(curr_task->fildes_table[fd]);
curr_task->fildes_table[fd] = NULL;
} else {
curr_task->fildes_table[fd]->count--;
@ -356,7 +356,7 @@ static int sys_dup(int fd)
* free the memory which was allocated in get_fildes()
* cause will link it to another existing memory space
*/
kfree(curr_task->fildes_table[new_fd], sizeof(fildes_t));
kfree(curr_task->fildes_table[new_fd]);
/* and link it to another existing memory space */
curr_task->fildes_table[new_fd] = curr_task->fildes_table[fd];
@ -381,7 +381,7 @@ static int sys_dup2(int fd, int fd2)
/* If fd and fd2 are equal, then dup2() just returns fd2 */
if (fd == fd2)
return fd2;
/*
* if descriptor fd2 is already in use, it is first deallocated
* as if a close(2) call had been done first
@ -398,30 +398,32 @@ static int sys_dup2(int fd, int fd2)
static int sys_sbrk(int incr)
{
task_t* task = per_core(current_task);
vma_t* tmp = NULL;
vma_t* heap = task->heap;
int ret;
spinlock_lock(&task->vma_lock);
tmp = task->vma_list;
while(tmp && !((task->end_heap >= tmp->start) && (task->end_heap <= tmp->end)))
tmp = tmp->next;
if (BUILTIN_EXPECT(!heap,0 )) {
kprintf("sys_sbrk: missing heap!\n");
abort();
}
ret = (int) task->end_heap;
task->end_heap += incr;
if (task->end_heap < task->start_heap)
task->end_heap = task->start_heap;
// resize virtual memory area
if (tmp && (tmp->end <= task->end_heap))
tmp->end = task->end_heap;
ret = heap->end;
heap->end += incr;
if (heap->end < heap->start)
heap->end = heap->start;
// allocation and mapping of new pages for the heap
// is catched by the pagefault handler
kprintf("sys_sbrk: task = %d, heap->start = %#lx, heap->end = %#lx, incr = %i\n", task->id, heap->start, heap->end, incr); // TOD0: remove
spinlock_unlock(&task->vma_lock);
return ret;
}
int syscall_handler(uint32_t sys_nr, ...)
int syscall_handler(size_t sys_nr, ...)
{
int ret = -EINVAL;
va_list vl;
@ -500,7 +502,7 @@ int syscall_handler(uint32_t sys_nr, ...)
break;
case __NR_wait: {
int32_t* status = va_arg(vl, int32_t*);
ret = wait(status);
break;
}
@ -549,7 +551,7 @@ int syscall_handler(uint32_t sys_nr, ...)
ret = -ENOTSOCK;
break;
}
//kprintf("lwip_connect: %p with lenght %i and Socket %i", name, namelen, per_core(current_task)->fildes_table[fd].offset);
//kprintf("lwip_connect: %p with lenght %i and Socket %i", name, namelen, per_core(current_task)->fildes_table[fd].offset); // TODO: remove
ret = lwip_connect(per_core(current_task)->fildes_table[fd]->offset, name, namelen);
@ -601,7 +603,7 @@ int syscall_handler(uint32_t sys_nr, ...)
}
#endif
default:
kputs("invalid system call\n");
kprintf("syscall_handler: invalid system call %u\n", sys_nr);
ret = -ENOSYS;
break;
};

View file

@ -30,7 +30,7 @@
#include <metalsvm/stdlib.h>
#include <metalsvm/string.h>
#include <metalsvm/errno.h>
#include <metalsvm/mmu.h>
#include <metalsvm/memory.h>
#include <metalsvm/page.h>
#include <metalsvm/tasks.h>
#include <metalsvm/processor.h>
@ -47,26 +47,27 @@
* A task's id will be its position in this array.
*/
static task_t task_table[MAX_TASKS] = { \
[0] = {0, TASK_IDLE, NULL, NULL, 0, 0, 0, NULL, NULL, 0, ATOMIC_INIT(0), SPINLOCK_IRQSAVE_INIT, NULL, SPINLOCK_INIT, NULL, NULL, 0, 0, 0, 0}, \
[1 ... MAX_TASKS-1] = {0, TASK_INVALID, NULL, NULL, 0, 0, 0, NULL, NULL, 0, ATOMIC_INIT(0), SPINLOCK_IRQSAVE_INIT, NULL, SPINLOCK_INIT, NULL, NULL, 0, 0, 0, 0}};
[0] = {0, TASK_IDLE, NULL, NULL, 0, 0, 0, NULL, NULL, 0, ATOMIC_INIT(0), SPINLOCK_IRQSAVE_INIT, 0, SPINLOCK_INIT, NULL, NULL, 0, NULL}, \
[1 ... MAX_TASKS-1] = {0, TASK_INVALID, NULL, NULL, 0, 0, 0, NULL, NULL, 0, ATOMIC_INIT(0), SPINLOCK_IRQSAVE_INIT, 0, SPINLOCK_INIT, NULL, NULL, 0, NULL}
};
static spinlock_irqsave_t table_lock = SPINLOCK_IRQSAVE_INIT;
#ifndef CONFIG_TICKLESS
#if MAX_CORES > 1
static runqueue_t runqueues[MAX_CORES] = { \
[0] = {task_table+0, NULL, 0, {[0 ... 2] = 0}, TIMER_FREQ/5, TIMER_FREQ/2, 0, {[0 ... MAX_PRIO-1] = {NULL, NULL}}, {NULL, NULL}, SPINLOCK_IRQSAVE_INIT}, \
[1 ... MAX_CORES-1] = {NULL, NULL, 0, {[0 ... 2] = 0}, TIMER_FREQ/5, TIMER_FREQ/2, 0, {[0 ... MAX_PRIO-1] = {NULL, NULL}}, {NULL, NULL}, SPINLOCK_IRQSAVE_INIT}};
[0] = {task_table+0, NULL, 0, {[0 ... 2] = 0}, TIMER_FREQ/5, TIMER_FREQ/2, 0, {[0 ... MAX_PRIO-1] = {NULL, NULL}}, {NULL, NULL}, SPINLOCK_IRQSAVE_INIT}, \
[1 ... MAX_CORES-1] = {NULL, NULL, 0, {[0 ... 2] = 0}, TIMER_FREQ/5, TIMER_FREQ/2, 0, {[0 ... MAX_PRIO-1] = {NULL, NULL}}, {NULL, NULL}, SPINLOCK_IRQSAVE_INIT}};
#else
static runqueue_t runqueues[1] = { \
[0] = {task_table+0, NULL, 0, {[0 ... 2] = 0}, TIMER_FREQ/5, TIMER_FREQ/2, 0, {[0 ... MAX_PRIO-1] = {NULL, NULL}}, {NULL, NULL}, SPINLOCK_IRQSAVE_INIT}};
[0] = {task_table+0, NULL, 0, {[0 ... 2] = 0}, TIMER_FREQ/5, TIMER_FREQ/2, 0, {[0 ... MAX_PRIO-1] = {NULL, NULL}}, {NULL, NULL}, SPINLOCK_IRQSAVE_INIT}};
#endif
#else
#if MAX_CORES > 1
static runqueue_t runqueues[MAX_CORES] = { \
[0] = {task_table+0, NULL, 0, 0, {[0 ... MAX_PRIO-1] = {NULL, NULL}}, {NULL, NULL}, SPINLOCK_IRQSAVE_INIT}, \
[1 ... MAX_CORES-1] = {NULL, NULL, 0, 0, {[0 ... MAX_PRIO-1] = {NULL, NULL}}, {NULL, NULL}, SPINLOCK_IRQSAVE_INIT}};
[0] = {task_table+0, NULL, 0, 0, {[0 ... MAX_PRIO-1] = {NULL, NULL}}, {NULL, NULL}, SPINLOCK_IRQSAVE_INIT}, \
[1 ... MAX_CORES-1] = {NULL, NULL, 0, 0, {[0 ... MAX_PRIO-1] = {NULL, NULL}}, {NULL, NULL}, SPINLOCK_IRQSAVE_INIT}};
#else
static runqueue_t runqueues[1] = { \
[0] = {task_table+0, NULL, 0, 0, {[0 ... MAX_PRIO-1] = {NULL, NULL}}, {NULL, NULL}, SPINLOCK_IRQSAVE_INIT}};
[0] = {task_table+0, NULL, 0, 0, {[0 ... MAX_PRIO-1] = {NULL, NULL}}, {NULL, NULL}, SPINLOCK_IRQSAVE_INIT}};
#endif
#endif
@ -78,6 +79,7 @@ DEFINE_PER_CORE(task_t*, current_task, task_table+0);
extern const void boot_stack;
/** @brief helper function for the assembly code to determine the current task
*
* @return Pointer to the task_t structure of current task
*/
task_t* get_current_task(void) {
@ -96,6 +98,32 @@ uint32_t get_highest_priority(void)
return msb(runqueues[CORE_ID].prio_bitmap);
}
/** @brief Create a new stack for a new task
*
* @return start address of the new stack
*/
static void* create_stack(void)
{
return palloc(KERNEL_STACK_SIZE, MAP_KERNEL_SPACE);
}
/** @brief Delete stack of a finished task
*
* @param addr Pointer to the stack
* @return
* - 0 on success
* - -EINVAL on failure
*/
static int destroy_stack(task_t* task)
{
if (BUILTIN_EXPECT(!task || !task->stack, 0))
return -EINVAL;
pfree(task->stack, KERNEL_STACK_SIZE);
return 0;
}
int multitasking_init(void) {
if (BUILTIN_EXPECT(task_table[0].status != TASK_IDLE, 0)) {
kputs("Task 0 is not an idle task\n");
@ -104,7 +132,7 @@ int multitasking_init(void) {
mailbox_wait_msg_init(&task_table[0].inbox);
memset(task_table[0].outbox, 0x00, sizeof(mailbox_wait_msg_t*)*MAX_TASKS);
task_table[0].pgd = get_boot_pgd();
task_table[0].page_map = get_boot_page_map();
task_table[0].flags = TASK_DEFAULT_FLAGS;
task_table[0].prio = IDLE_PRIO;
task_table[0].stack = (void*) &boot_stack;
@ -128,7 +156,7 @@ size_t get_idle_task(uint32_t id)
atomic_int32_set(&task_table[id].user_usage, 0);
mailbox_wait_msg_init(&task_table[id].inbox);
memset(task_table[id].outbox, 0x00, sizeof(mailbox_wait_msg_t*)*MAX_TASKS);
task_table[id].pgd = get_boot_pgd();
task_table[id].page_map = get_boot_page_map();
current_task[id].var = task_table+id;
runqueues[id].idle = task_table+id;
@ -193,10 +221,8 @@ static void wakeup_blocked_tasks(int result)
spinlock_irqsave_unlock(&table_lock);
}
/** @brief A procedure to be called by
* procedures which are called by exiting tasks. */
/** @brief A procedure to be called by procedures which are called by exiting tasks. */
static void NORETURN do_exit(int arg) {
vma_t* tmp;
task_t* curr_task = per_core(current_task);
uint32_t flags, core_id, fd, status;
@ -204,17 +230,17 @@ static void NORETURN do_exit(int arg) {
for (fd = 0; fd < NR_OPEN; fd++) {
if(curr_task->fildes_table[fd] != NULL) {
/*
* delete a descriptor from the per-process object
* reference table. If this is not the last reference to the underlying
* object, the object will be ignored.
*/
* Delete a descriptor from the per-process object
* reference table. If this is not the last reference to the underlying
* object, the object will be ignored.
*/
if (curr_task->fildes_table[fd]->count == 1) {
/* try to close the file */
// try to close the file
status = close_fs(curr_task->fildes_table[fd]);
/* close command failed -> return check = errno */
// close command failed -> return check = errno
if (BUILTIN_EXPECT(status < 0, 0))
kprintf("Task %u was not able to close file descriptor %i. close_fs returned %d", curr_task->id, fd, -status);
kfree(curr_task->fildes_table[fd], sizeof(fildes_t));
kfree(curr_task->fildes_table[fd]);
curr_task->fildes_table[fd] = NULL;
} else {
curr_task->fildes_table[fd]->count--;
@ -222,37 +248,26 @@ static void NORETURN do_exit(int arg) {
}
}
}
//finally the table has to be cleared.
kfree(curr_task->fildes_table, sizeof(filp_t)*NR_OPEN);
kfree(curr_task->fildes_table); // finally the table has to be cleared
}
kprintf("Terminate task: %u, return value %d\n", curr_task->id, arg);
wakeup_blocked_tasks(arg);
//vma_dump(curr_task);
spinlock_lock(&curr_task->vma_lock);
drop_vma_list(curr_task);
drop_page_map();
// remove memory regions
while((tmp = curr_task->vma_list) != NULL) {
kfree((void*) tmp->start, tmp->end - tmp->start + 1);
curr_task->vma_list = tmp->next;
kfree((void*) tmp, sizeof(vma_t));
}
spinlock_unlock(&curr_task->vma_lock);
drop_pgd(); // delete page directory and its page tables
#if 0
#if 1
if (atomic_int32_read(&curr_task->user_usage))
kprintf("Memory leak! Task %d did not release %d pages\n",
curr_task->id, atomic_int32_read(&curr_task->user_usage));
curr_task->id, atomic_int32_read(&curr_task->user_usage));
#endif
flags = irq_nested_disable();
curr_task->status = TASK_FINISHED;
// decrease the number of active tasks
flags = irq_nested_disable();
core_id = CORE_ID;
spinlock_irqsave_lock(&runqueues[core_id].lock);
runqueues[core_id].nr_tasks--;
@ -262,9 +277,7 @@ static void NORETURN do_exit(int arg) {
reschedule();
kprintf("Kernel panic: scheduler on core %d found no valid task\n", CORE_ID);
while(1) {
HALT;
}
while(1) HALT;
}
/** @brief A procedure to be called by kernel tasks */
@ -300,6 +313,7 @@ void NORETURN abort(void) {
static int create_task(tid_t* id, entry_point_t ep, void* arg, uint8_t prio, uint32_t core_id)
{
task_t* curr_task;
task_t* new_task = NULL;
int ret = -ENOMEM;
uint32_t i;
@ -319,64 +333,76 @@ static int create_task(tid_t* id, entry_point_t ep, void* arg, uint8_t prio, uin
#endif
{
core_id = CORE_ID;
kprintf("Inavlid core id! Set id to %u!\n", core_id);
kprintf("create_task: invalid core id! Set id to %u!\n", core_id);
}
curr_task = per_core(current_task);
// search free entry in task table
for(i=0; i<MAX_TASKS; i++) {
if (task_table[i].status == TASK_INVALID) {
atomic_int32_set(&task_table[i].user_usage, 0);
ret = create_pgd(task_table+i, 0);
if (ret < 0) {
ret = -ENOMEM;
goto create_task_out;
}
task_table[i].id = i;
task_table[i].status = TASK_READY;
task_table[i].last_stack_pointer = NULL;
task_table[i].stack = create_stack();
task_table[i].flags = TASK_DEFAULT_FLAGS;
task_table[i].prio = prio;
task_table[i].last_core = 0;
spinlock_init(&task_table[i].vma_lock);
task_table[i].vma_list = NULL;
task_table[i].fildes_table = NULL;
mailbox_wait_msg_init(&task_table[i].inbox);
memset(task_table[i].outbox, 0x00, sizeof(mailbox_wait_msg_t*)*MAX_TASKS);
task_table[i].outbox[curr_task->id] = &curr_task->inbox;
if (id)
*id = i;
ret = create_default_frame(task_table+i, ep, arg);
task_table[i].start_heap = 0;
task_table[i].end_heap = 0;
task_table[i].lwip_err = 0;
task_table[i].start_tick = get_clock_tick();
// add task in the runqueue
spinlock_irqsave_lock(&runqueues[core_id].lock);
runqueues[core_id].prio_bitmap |= (1 << prio);
runqueues[core_id].nr_tasks++;
if (!runqueues[core_id].queue[prio-1].first) {
task_table[i].next = task_table[i].prev = NULL;
runqueues[core_id].queue[prio-1].first = task_table+i;
runqueues[core_id].queue[prio-1].last = task_table+i;
} else {
task_table[i].prev = runqueues[core_id].queue[prio-1].last;
task_table[i].next = NULL;
runqueues[core_id].queue[prio-1].last->next = task_table+i;
runqueues[core_id].queue[prio-1].last = task_table+i;
}
spinlock_irqsave_unlock(&runqueues[core_id].lock);
new_task = &task_table[i];
break;
}
}
create_task_out:
if (BUILTIN_EXPECT(!new_task, 0)) {
ret = -ENOMEM;
goto out;
}
atomic_int32_set(&new_task->user_usage, 0);
ret = copy_page_map(new_task, 0);
if (ret < 0) {
ret = -ENOMEM;
goto out;
}
new_task->id = i;
new_task->status = TASK_READY;
new_task->last_stack_pointer = NULL;
new_task->flags = TASK_DEFAULT_FLAGS;
new_task->prio = prio;
new_task->last_core = 0;
spinlock_init(&new_task->vma_lock);
new_task->vma_list = NULL;
new_task->fildes_table = NULL;
new_task->stack = create_stack();
if (BUILTIN_EXPECT(!new_task->stack, 0)) {
ret = -ENOMEM;
goto out;
}
mailbox_wait_msg_init(&new_task->inbox);
memset(new_task->outbox, 0x00, sizeof(mailbox_wait_msg_t*)*MAX_TASKS);
new_task->outbox[curr_task->id] = &curr_task->inbox;
if (id)
*id = i;
ret = create_default_frame(new_task, ep, arg);
new_task->lwip_err = 0;
new_task->start_tick = get_clock_tick();
// add task in the runqueue
spinlock_irqsave_lock(&runqueues[core_id].lock);
runqueues[core_id].prio_bitmap |= (1 << prio);
runqueues[core_id].nr_tasks++;
if (!runqueues[core_id].queue[prio-1].first) {
new_task->next = new_task->prev = NULL;
runqueues[core_id].queue[prio-1].first = new_task;
runqueues[core_id].queue[prio-1].last = new_task;
}
else {
new_task->prev = runqueues[core_id].queue[prio-1].last;
new_task->next = NULL;
runqueues[core_id].queue[prio-1].last->next = new_task;
runqueues[core_id].queue[prio-1].last = new_task;
}
spinlock_irqsave_unlock(&runqueues[core_id].lock);
out:
spinlock_irqsave_unlock(&table_lock);
return ret;
@ -387,109 +413,103 @@ int sys_fork(void)
int ret = -ENOMEM;
unsigned int i, core_id, fd_i;
task_t* parent_task = per_core(current_task);
vma_t** child;
vma_t* parent;
vma_t* tmp;
task_t* child_task = NULL;
spinlock_lock(&parent_task->vma_lock);
spinlock_irqsave_lock(&table_lock);
core_id = CORE_ID;
// search free entry in task_table
for(i=0; i<MAX_TASKS; i++) {
if (task_table[i].status == TASK_INVALID) {
atomic_int32_set(&task_table[i].user_usage, 0);
ret = create_pgd(task_table+i, 1);
if (ret < 0) {
ret = -ENOMEM;
goto create_task_out;
}
task_table[i].id = i;
task_table[i].last_stack_pointer = NULL;
task_table[i].stack = create_stack();
spinlock_init(&task_table[i].vma_lock);
// copy VMA list
child = &task_table[i].vma_list;
parent = parent_task->vma_list;
tmp = NULL;
while(parent) {
*child = (vma_t*) kmalloc(sizeof(vma_t));
if (BUILTIN_EXPECT(!child, 0))
break;
(*child)->start = parent->start;
(*child)->end = parent->end;
(*child)->type = parent->type;
(*child)->prev = tmp;
(*child)->next = NULL;
parent = parent->next;
tmp = *child;
child = &((*child)->next);
}
/* init fildes_table */
task_table[i].fildes_table = kmalloc(sizeof(filp_t)*NR_OPEN);
memcpy(task_table[i].fildes_table, parent_task->fildes_table, sizeof(filp_t)*NR_OPEN);
for (fd_i = 0; fd_i < NR_OPEN; fd_i++)
if ((task_table[i].fildes_table[fd_i]) != NULL)
task_table[i].fildes_table[fd_i]->count++;
mailbox_wait_msg_init(&task_table[i].inbox);
memset(task_table[i].outbox, 0x00, sizeof(mailbox_wait_msg_t*)*MAX_TASKS);
task_table[i].outbox[parent_task->id] = &parent_task->inbox;
task_table[i].flags = parent_task->flags;
memcpy(&(task_table[i].fpu), &(parent_task->fpu), sizeof(union fpu_state));
task_table[i].start_tick = get_clock_tick();
task_table[i].start_heap = 0;
task_table[i].end_heap = 0;
task_table[i].lwip_err = 0;
task_table[i].prio = parent_task->prio;
task_table[i].last_core = parent_task->last_core;
// add task in the runqueue
spinlock_irqsave_lock(&runqueues[core_id].lock);
runqueues[core_id].prio_bitmap |= (1 << parent_task->prio);
runqueues[core_id].nr_tasks++;
if (!runqueues[core_id].queue[parent_task->prio-1].first) {
task_table[i].next = task_table[i].prev = NULL;
runqueues[core_id].queue[parent_task->prio-1].first = task_table+i;
runqueues[core_id].queue[parent_task->prio-1].last = task_table+i;
} else {
task_table[i].prev = runqueues[core_id].queue[parent_task->prio-1].last;
task_table[i].next = NULL;
runqueues[core_id].queue[parent_task->prio-1].last->next = task_table+i;
runqueues[core_id].queue[parent_task->prio-1].last = task_table+i;
}
spinlock_irqsave_unlock(&runqueues[core_id].lock);
ret = arch_fork(task_table+i);
if (parent_task != per_core(current_task)) {
// Oh, the current task is the new child task!
// Leave the function without releasing the locks
// because the locks are already released
// by the parent task!
return 0;
}
if (!ret) {
task_table[i].status = TASK_READY;
ret = i;
}
child_task = &task_table[i];
break;
}
}
create_task_out:
if (BUILTIN_EXPECT(!child_task, 0)) {
ret = -ENOMEM;
goto out;
}
kprintf("sys_fork: parent id = %u, child id = %u\n", parent_task->id , child_task->id); // TODO: remove
atomic_int32_set(&child_task->user_usage, 0);
ret = copy_page_map(child_task, 1);
if (BUILTIN_EXPECT(ret < 0, 0)) {
ret = -ENOMEM;
goto out;
}
ret = copy_vma_list(parent_task, child_task);
if (BUILTIN_EXPECT(ret < 0, 0)) {
ret = -ENOMEM;
goto out;
}
child_task->id = i;
child_task->last_stack_pointer = NULL;
child_task->stack = create_stack();
if (BUILTIN_EXPECT(!child_task->stack, 0)) {
ret = -ENOMEM;
goto out;
}
// init fildes_table
child_task->fildes_table = kmalloc(sizeof(filp_t)*NR_OPEN);
memcpy(child_task->fildes_table, parent_task->fildes_table, sizeof(filp_t)*NR_OPEN);
for (fd_i=0; fd_i<NR_OPEN; fd_i++) {
if ((child_task->fildes_table[fd_i]) != NULL)
child_task->fildes_table[fd_i]->count++;
}
// init mailbox
mailbox_wait_msg_init(&child_task->inbox);
memset(child_task->outbox, 0x00, sizeof(mailbox_wait_msg_t*)*MAX_TASKS);
child_task->outbox[parent_task->id] = &parent_task->inbox;
child_task->flags = parent_task->flags;
memcpy(&child_task->fpu, &parent_task->fpu, sizeof(union fpu_state));
child_task->start_tick = get_clock_tick();
child_task->lwip_err = 0;
child_task->prio = parent_task->prio;
child_task->last_core = parent_task->last_core;
// add task in the runqueue
spinlock_irqsave_lock(&runqueues[core_id].lock);
runqueues[core_id].prio_bitmap |= (1 << parent_task->prio);
runqueues[core_id].nr_tasks++;
if (!runqueues[core_id].queue[parent_task->prio-1].first) {
child_task->next = child_task->prev = NULL;
runqueues[core_id].queue[parent_task->prio-1].first = child_task;
runqueues[core_id].queue[parent_task->prio-1].last = child_task;
}
else {
child_task->prev = runqueues[core_id].queue[parent_task->prio-1].last;
child_task->next = NULL;
runqueues[core_id].queue[parent_task->prio-1].last->next = child_task;
runqueues[core_id].queue[parent_task->prio-1].last = child_task;
}
spinlock_irqsave_unlock(&runqueues[core_id].lock);
ret = arch_fork(child_task);
if (parent_task != per_core(current_task))
/*
* Oh, the current task is the new child task!
* Leave the function without releasing the locks
* because the locks are already released by the parent task!
*/
return 0;
if (!ret) {
child_task->status = TASK_READY;
ret = i;
}
out:
spinlock_irqsave_unlock(&table_lock);
spinlock_unlock(&parent_task->vma_lock);
return ret;
}
@ -515,7 +535,7 @@ static int kernel_entry(void* args)
ret = kernel_args->func(kernel_args->args);
kfree(kernel_args, sizeof(kernel_args_t));
kfree(kernel_args);
return ret;
}
@ -561,16 +581,15 @@ static int load_task(load_args_t* largs)
{
uint32_t i, offset, idx, fd_i;
uint32_t addr, npages, flags;
size_t stack = 0;
size_t stack = 0, heap = 0;
elf_header_t header;
elf_program_header_t prog_header;
//elf_section_header_t sec_header;
///!!! kfree is missing!
fildes_t *file = kmalloc(sizeof(fildes_t));
fildes_t *file = kmalloc(sizeof(fildes_t)); // TODO: kfree is missing!
file->offset = 0;
file->flags = 0;
//TODO: init the hole fildes_t struct!
// TODO: init the hole fildes_t struct!
task_t* curr_task = per_core(current_task);
int err;
@ -581,22 +600,22 @@ static int load_task(load_args_t* largs)
if (!file->node)
return -EINVAL;
/* init fildes_table */
// init fildes_table
spinlock_irqsave_lock(&table_lock);
if (!task_table[curr_task->id].fildes_table) {
task_table[curr_task->id].fildes_table = kmalloc(sizeof(filp_t)*NR_OPEN);
if (BUILTIN_EXPECT(!task_table[curr_task->id].fildes_table, 0)) {
if (!curr_task->fildes_table) {
curr_task->fildes_table = kmalloc(sizeof(filp_t)*NR_OPEN);
if (BUILTIN_EXPECT(!curr_task->fildes_table, 0)) {
spinlock_irqsave_unlock(&table_lock);
return -ENOMEM;
}
memset(task_table[curr_task->id].fildes_table, 0x00, sizeof(filp_t)*NR_OPEN);
memset(curr_task->fildes_table, 0x00, sizeof(filp_t)*NR_OPEN);
for (fd_i = 0; fd_i < 3; fd_i++) {
task_table[curr_task->id].fildes_table[fd_i] = kmalloc(sizeof(fildes_t));
task_table[curr_task->id].fildes_table[fd_i]->count = 1;
curr_task->fildes_table[fd_i] = kmalloc(sizeof(fildes_t));
curr_task->fildes_table[fd_i]->count = 1;
}
task_table[curr_task->id].fildes_table[0]->node = findnode_fs("/dev/stdin");
task_table[curr_task->id].fildes_table[1]->node = findnode_fs("/dev/stdout");
task_table[curr_task->id].fildes_table[2]->node = findnode_fs("/dev/stderr");
curr_task->fildes_table[0]->node = findnode_fs("/dev/stdin");
curr_task->fildes_table[1]->node = findnode_fs("/dev/stdout");
curr_task->fildes_table[2]->node = findnode_fs("/dev/stderr");
}
spinlock_irqsave_unlock(&table_lock);
@ -617,43 +636,43 @@ static int load_task(load_args_t* largs)
if (BUILTIN_EXPECT(header.ident._class != ELF_CLASS_32, 0))
goto invalid;
#else
#elif defined(CONFIG_X86_64)
if (BUILTIN_EXPECT(header.machine != ELF_EM_X86_64, 0))
goto invalid;
if (BUILTIN_EXPECT(header.ident._class != ELF_CLASS_64, 0))
goto invalid;
#else
#error "unknown arch"
#endif
if (BUILTIN_EXPECT(header.ident.data != ELF_DATA_2LSB, 0))
goto invalid;
if (header.entry <= KERNEL_SPACE)
if (header.entry < KERNEL_SPACE)
goto invalid;
// interpret program header table
for (i=0; i<header.ph_entry_count; i++) {
file->offset = header.ph_offset+i*header.ph_entry_size;
if (read_fs(file, (uint8_t*)&prog_header, sizeof(elf_program_header_t)) == 0) {
if (read_fs(file, (uint8_t*) &prog_header, sizeof(elf_program_header_t)) == 0) {
kprintf("Could not read programm header!\n");
continue;
}
switch(prog_header.type)
{
switch(prog_header.type) {
case ELF_PT_LOAD: // load program segment
if (!prog_header.virt_addr)
continue;
npages = (prog_header.mem_size >> PAGE_SHIFT);
if (prog_header.mem_size & (PAGE_SIZE-1))
npages++;
npages = PAGE_FLOOR(prog_header.mem_size) >> PAGE_BITS;
addr = get_pages(npages);
flags = MAP_USER_SPACE;
if (prog_header.flags & PF_X)
flags |= MAP_CODE;
if (!(prog_header.flags & PF_W))
flags |= MAP_READ_ONLY;
// map page frames in the address space of the current task
if (!map_region(prog_header.virt_addr, addr, npages, flags)) {
@ -662,35 +681,30 @@ static int load_task(load_args_t* largs)
}
// clear pages
memset((void*) prog_header.virt_addr, 0x00, npages*PAGE_SIZE);
memset((void*) prog_header.virt_addr, 0x00, npages * PAGE_SIZE);
// set starting point of the heap
if (curr_task->start_heap < prog_header.virt_addr+prog_header.mem_size)
curr_task->start_heap = curr_task->end_heap = prog_header.virt_addr+prog_header.mem_size;
// update heap location
if (heap < prog_header.virt_addr + prog_header.mem_size)
heap = prog_header.virt_addr+prog_header.mem_size;
// load program
file->offset = prog_header.offset;
read_fs(file, (uint8_t*)prog_header.virt_addr, prog_header.file_size);
read_fs(file, (uint8_t*) prog_header.virt_addr, prog_header.file_size);
flags = VMA_CACHEABLE;
flags = VMA_CACHEABLE | VMA_USER;
if (prog_header.flags & PF_R)
flags |= VMA_READ;
if (prog_header.flags & PF_W)
flags |= VMA_WRITE;
if (prog_header.flags & PF_X)
flags |= VMA_EXECUTE;
vma_add(curr_task, prog_header.virt_addr, prog_header.virt_addr+npages*PAGE_SIZE-1, flags);
if (!(prog_header.flags & PF_W))
change_page_permissions(prog_header.virt_addr, prog_header.virt_addr+npages*PAGE_SIZE-1, flags);
vma_add(prog_header.virt_addr, prog_header.virt_addr+npages*PAGE_SIZE, flags);
break;
case ELF_PT_GNU_STACK: // Indicates stack executability
// create user-level stack
npages = DEFAULT_STACK_SIZE >> PAGE_SHIFT;
if (DEFAULT_STACK_SIZE & (PAGE_SIZE-1))
npages++;
npages = PAGE_FLOOR(DEFAULT_STACK_SIZE) >> PAGE_BITS;
addr = get_pages(npages);
stack = header.entry*2; // virtual address of the stack
@ -701,14 +715,15 @@ static int load_task(load_args_t* largs)
memset((void*) stack, 0x00, npages*PAGE_SIZE);
// create vma regions for the user-level stack
flags = VMA_CACHEABLE;
flags = VMA_CACHEABLE | VMA_USER;
if (prog_header.flags & PF_R)
flags |= VMA_READ;
if (prog_header.flags & PF_W)
flags |= VMA_WRITE;
if (prog_header.flags & PF_X)
flags |= VMA_EXECUTE;
vma_add(curr_task, stack, stack+npages*PAGE_SIZE-1, flags);
vma_add(stack, stack+npages*PAGE_SIZE, flags);
break;
}
}
@ -726,8 +741,23 @@ static int load_task(load_args_t* largs)
}
#endif
// setup heap
if (!curr_task->heap)
curr_task->heap = (vma_t*) kmalloc(sizeof(vma_t));
if (BUILTIN_EXPECT(!curr_task->heap || !heap, 0)) {
kprintf("load_task: heap is missing!\n");
return -ENOMEM;
}
curr_task->heap->flags = VMA_HEAP|VMA_USER;
curr_task->heap->start = heap;
curr_task->heap->end = heap;
// TODO: insert into list
if (BUILTIN_EXPECT(!stack, 0)) {
kprintf("Stack is missing!\n");
kprintf("load_task: stack is missing!\n");
return -ENOMEM;
}
@ -740,9 +770,9 @@ static int load_task(load_args_t* largs)
// push argv on the stack
offset -= largs->argc * sizeof(char*);
for(i=0; i<largs->argc; i++) {
for (i=0; i<largs->argc; i++) {
((char**) (stack+offset))[i] = (char*) (stack+idx);
while(((char*) stack)[idx] != '\0')
idx++;
idx++;
@ -750,7 +780,7 @@ static int load_task(load_args_t* largs)
// push env on the stack
offset -= (largs->envc+1) * sizeof(char*);
for(i=0; i<largs->envc; i++) {
for (i=0; i<largs->envc; i++) {
((char**) (stack+offset))[i] = (char*) (stack+idx);
while(((char*) stack)[idx] != '\0')
@ -771,10 +801,10 @@ static int load_task(load_args_t* largs)
*((char***) (stack+offset)) = (char**) (stack + offset + 2*sizeof(char**) + (largs->envc+1) * sizeof(char*));
// push argc on the stack
offset -= sizeof(int);
offset -= sizeof(size_t);
*((int*) (stack+offset)) = largs->argc;
kfree(largs, sizeof(load_args_t));
kfree(largs);
// clear fpu state
curr_task->flags &= ~(TASK_FPU_USED|TASK_FPU_INIT);
@ -785,12 +815,12 @@ static int load_task(load_args_t* largs)
invalid:
kprintf("Invalid executable!\n");
kprintf("magic number 0x%x\n", (uint32_t) header.ident.magic);
kprintf("header type 0x%x\n", (uint32_t) header.type);
kprintf("machine type 0x%x\n", (uint32_t) header.machine);
kprintf("elf ident class 0x%x\n", (uint32_t) header.ident._class);
kprintf("elf identdata !0x%x\n", header.ident.data);
kprintf("program entry point 0x%x\n", (size_t) header.entry);
kprintf("Magic number: 0x%x\n", (uint32_t) header.ident.magic);
kprintf("Header type: 0x%x\n", (uint32_t) header.type);
kprintf("Machine type: 0x%x\n", (uint32_t) header.machine);
kprintf("ELF ident class: 0x%x\n", (uint32_t) header.ident._class);
kprintf("ELF ident data: 0x%x\n", header.ident.data);
kprintf("Program entry point: 0x%x\n", (size_t) header.entry);
return -EINVAL;
}
@ -806,7 +836,7 @@ static int user_entry(void* arg)
ret = load_task((load_args_t*) arg);
kfree(arg, sizeof(load_args_t));
kfree(arg);
return ret;
}
@ -824,7 +854,6 @@ static int user_entry(void* arg)
*/
int create_user_task_on_core(tid_t* id, const char* fname, char** argv, uint32_t core_id)
{
#ifdef CONFIG_X86_32
vfs_node_t* node;
int argc = 0;
size_t i, buffer_size = 0;
@ -860,18 +889,14 @@ int create_user_task_on_core(tid_t* id, const char* fname, char** argv, uint32_t
while ((*dest++ = *src++) != 0);
}
/* create new task */
// create new task
return create_task(id, user_entry, load_args, NORMAL_PRIO, core_id);
#else
return -EINVAL;
#endif
}
/** @brief Used by the execve-Systemcall */
int sys_execve(const char* fname, char** argv, char** env)
{
vfs_node_t* node;
vma_t* tmp;
size_t i, buffer_size = 0;
load_args_t* load_args = NULL;
char *dest, *src;
@ -879,6 +904,8 @@ int sys_execve(const char* fname, char** argv, char** env)
int envc = 0;
task_t* curr_task = per_core(current_task);
kprintf("sys_execve: fname = %s, argv = %p, env = %p\n", fname, argv, env); // TODO: remove
node = findnode_fs((char*) fname);
if (!node || !(node->type == FS_FILE))
return -EINVAL;
@ -920,14 +947,15 @@ int sys_execve(const char* fname, char** argv, char** env)
while ((*dest++ = *src++) != 0);
}
spinlock_lock(&curr_task->vma_lock);
// remove old program
while((tmp = curr_task->vma_list) != NULL) {
kfree((void*) tmp->start, tmp->end - tmp->start + 1);
curr_task->vma_list = tmp->next;
kfree((void*) tmp, sizeof(vma_t));
}
vma_t *vma;
for (vma=curr_task->vma_list; vma; vma = vma->next)
pfree((void*) vma->start, vma->end - vma->start);
// TODO: Heap?
spinlock_unlock(&curr_task->vma_lock);
@ -940,7 +968,7 @@ int sys_execve(const char* fname, char** argv, char** env)
ret = load_task(load_args);
kfree(load_args, sizeof(load_args_t));
kfree(load_args);
return ret;
}

View file

@ -26,21 +26,19 @@
#include <asm/atomic.h>
#include <asm/processor.h>
#include <asm/io.h>
#ifdef CONFIG_UART
#include <asm/uart.h>
#endif
#ifdef CONFIG_VGA
#include <asm/vga.h>
#include <asm/vga.h>
#endif
#define NO_EARLY_PRINT 0
#define VGA_EARLY_PRINT 1
#define UART_EARLY_PRINT 2
#ifdef CONFIG_VGA
static uint32_t early_print = VGA_EARLY_PRINT;
#elif defined(CONFIG_UART)
static uint32_t early_print = UART_EARLY_PRINT;
#else
static uint32_t early_print = NO_EARLY_PRINT;
#endif
static spinlock_irqsave_t olock = SPINLOCK_IRQSAVE_INIT;
static atomic_int32_t kmsg_counter = ATOMIC_INIT(0);
static unsigned char kmessages[KMSG_SIZE] __attribute__ ((section(".kmsg"))) = {[0 ... KMSG_SIZE-1] = 0x00};
@ -136,7 +134,7 @@ int kmsg_init(vfs_node_t * node, const char *name)
}
} while (blist);
kfree(new_node, sizeof(vfs_node_t));
kfree(new_node);
return -ENOMEM;
}
@ -145,6 +143,10 @@ int koutput_init(void)
{
#ifdef CONFIG_VGA
vga_init();
early_print |= VGA_EARLY_PRINT;
#endif
#ifdef CONFIG_UART
early_print |= UART_EARLY_PRINT;
#endif
return 0;
@ -161,12 +163,16 @@ int kputchar(int c)
kmessages[pos % KMSG_SIZE] = (unsigned char) c;
#ifdef CONFIG_VGA
if (early_print == VGA_EARLY_PRINT)
if (early_print & VGA_EARLY_PRINT)
vga_putchar(c);
#endif
#ifdef CONFIG_UART
if (early_print == UART_EARLY_PRINT)
if (early_print & UART_EARLY_PRINT) {
if (c == '\n')
uart_putchar('\r');
uart_putchar(c);
}
#endif
if (early_print != NO_EARLY_PRINT)
@ -186,11 +192,14 @@ int kputs(const char *str)
pos = atomic_int32_inc(&kmsg_counter);
kmessages[pos % KMSG_SIZE] = str[i];
#ifdef CONFIG_VGA
if (early_print == VGA_EARLY_PRINT)
if (early_print & VGA_EARLY_PRINT)
vga_putchar(str[i]);
#endif
#ifdef CONFIG_UART
if (early_print == UART_EARLY_PRINT)
if (early_print & UART_EARLY_PRINT)
if (str[i] == '\n')
uart_putchar('\r');
uart_putchar(str[i]);
#endif
}

View file

@ -1,4 +1,4 @@
C_source := memory.c vma.c
C_source := memory.c vma.c malloc.c
MODULE := mm
include $(TOPDIR)/Makefile.inc

213
mm/malloc.c Normal file
View file

@ -0,0 +1,213 @@
/*
* Copyright 2014 Steffen Vogel, Chair for Operating Systems,
* RWTH Aachen University
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* This file is part of MetalSVM.
*/
/**
* @author Steffen Vogel <steffen.vogel@rwth-aachen.de>
*/
#include <metalsvm/malloc.h>
#include <metalsvm/spinlock.h>
#include <metalsvm/stdio.h>
#include <metalsvm/memory.h>
/// A linked list for each binary size exponent
static buddy_t* buddy_lists[BUDDY_LISTS] = { NULL };
/// Lock for the buddy lists
static spinlock_t buddy_lock = SPINLOCK_INIT;
/** @brief Check if larger free buddies are available */
static inline int buddy_large_avail(uint8_t exp)
{
while (exp<BUDDY_MAX && !buddy_lists[exp-BUDDY_MIN])
exp++;
return exp != BUDDY_MAX;
}
/** @brief Calculate the required buddy size */
static inline int buddy_exp(size_t sz)
{
int exp;
for (exp=0; sz>(1<<exp); exp++);
if (exp > BUDDY_MAX)
exp = 0;
if (exp < BUDDY_MIN)
exp = BUDDY_MIN;
return exp;
}
/** @brief Get a free buddy by potentially splitting a larger one */
static buddy_t* buddy_get(int exp)
{
spinlock_lock(&buddy_lock);
buddy_t** list = &buddy_lists[exp-BUDDY_MIN];
buddy_t* buddy = *list;
buddy_t* split;
if (buddy)
// there is already a free buddy =>
// we remove it from the list
*list = buddy->next;
else if (exp >= BUDDY_ALLOC && !buddy_large_avail(exp))
// theres no free buddy larger than exp =>
// we can allocate new memory
buddy = (buddy_t*) palloc(1<<exp, 0);
else {
// we recursivly request a larger buddy...
buddy = buddy_get(exp+1);
if (BUILTIN_EXPECT(!buddy, 0))
goto out;
// ... and split it, by putting the second half back to the list
split = (buddy_t*) ((size_t) buddy + (1<<exp));
split->next = *list;
*list = split;
}
out:
spinlock_unlock(&buddy_lock);
return buddy;
}
/** @brief Put a buddy back to its free list
*
* TODO: merge adjacent buddies (memory compaction)
*/
static void buddy_put(buddy_t* buddy)
{
spinlock_lock(&buddy_lock);
buddy_t** list = &buddy_lists[buddy->prefix.exponent-BUDDY_MIN];
buddy->next = *list;
*list = buddy;
spinlock_unlock(&buddy_lock);
}
void buddy_dump()
{
size_t free = 0;
int i;
for (i=0; i<BUDDY_LISTS; i++) {
buddy_t* buddy;
int exp = i+BUDDY_MIN;
if (buddy_lists[i])
kprintf("buddy_list[%u] (exp=%u, size=%lu bytes):\n", i, exp, 1<<exp);
for (buddy=buddy_lists[i]; buddy; buddy=buddy->next) {
kprintf(" %p -> %p \n", buddy, buddy->next);
free += 1<<exp;
}
}
kprintf("free buddies: %lu bytes\n", free);
}
void* palloc(size_t sz, uint32_t flags)
{
size_t phyaddr, viraddr;
uint32_t npages = PAGE_FLOOR(sz) >> PAGE_BITS;
kprintf("palloc(%lu) (%lu pages)\n", sz, npages); // TODO: remove
// get free virtual address space
viraddr = vma_alloc(npages*PAGE_SIZE, VMA_HEAP);
if (BUILTIN_EXPECT(!viraddr, 0))
return NULL;
// get continous physical pages
phyaddr = get_pages(npages);
if (BUILTIN_EXPECT(!phyaddr, 0)) {
vma_free(viraddr, viraddr+npages*PAGE_SIZE);
return NULL;
}
// map physical pages to VMA
viraddr = map_region(viraddr, phyaddr, npages, flags);
if (BUILTIN_EXPECT(!viraddr, 0)) {
vma_free(viraddr, viraddr+npages*PAGE_SIZE);
put_pages(phyaddr, npages);
return NULL;
}
return (void*) viraddr;
}
void pfree(void* addr, size_t sz)
{
if (BUILTIN_EXPECT(!addr || !sz, 0))
return;
size_t i;
size_t phyaddr;
size_t viraddr = (size_t) addr & PAGE_MASK;
uint32_t npages = PAGE_FLOOR(sz) >> PAGE_BITS;
// memory is propably not continously mapped! (userspace heap)
for (i=0; i<npages; i++) {
phyaddr = virt_to_phys(viraddr+i*PAGE_SIZE);
put_page(phyaddr);
}
unmap_region(viraddr, npages);
vma_free(viraddr, viraddr+npages*PAGE_SIZE);
}
void* kmalloc(size_t sz)
{
if (BUILTIN_EXPECT(!sz, 0))
return NULL;
// add space for the prefix
sz += sizeof(buddy_t);
int exp = buddy_exp(sz);
if (BUILTIN_EXPECT(!exp, 0))
return NULL;
buddy_t* buddy = buddy_get(exp);
if (BUILTIN_EXPECT(!buddy, 0))
return NULL;
// setup buddy prefix
buddy->prefix.magic = BUDDY_MAGIC;
buddy->prefix.exponent = exp;
kprintf("kmalloc(%lu) = %p\n", sz, buddy+1); // TODO: remove
// pointer arithmetic: we hide the prefix
return buddy+1;
}
void kfree(void *addr)
{
if (BUILTIN_EXPECT(!addr, 0))
return;
kprintf("kfree(%lu)\n", addr); // TODO: remove
buddy_t* buddy = (buddy_t*) addr - 1; // get prefix
// check magic
if (BUILTIN_EXPECT(buddy->prefix.magic != BUDDY_MAGIC, 0))
return;
buddy_put(buddy);
}

View file

@ -20,7 +20,7 @@
#include <metalsvm/stdio.h>
#include <metalsvm/string.h>
#include <metalsvm/stdlib.h>
#include <metalsvm/mmu.h>
#include <metalsvm/memory.h>
#include <metalsvm/spinlock.h>
#include <metalsvm/time.h>
#include <metalsvm/processor.h>
@ -37,17 +37,15 @@
#endif
/*
* 0 => free
* 1 => occupied
*
* Set whole address space as occupied
* Set whole address space as occupied:
* 0 => free, 1 => occupied
*/
static uint8_t bitmap[BITMAP_SIZE]; // = {[0 ... BITMAP_SIZE-1] = 0xFF};
static spinlock_t bitmap_lock = SPINLOCK_INIT;
static size_t alloc_start;
atomic_int32_t total_pages = ATOMIC_INIT(0);
atomic_int32_t total_allocated_pages = ATOMIC_INIT(0);
atomic_int32_t total_available_pages = ATOMIC_INIT(0);
static uint8_t bitmap[BITMAP_SIZE] = {[0 ... BITMAP_SIZE-1] = 0xFF};
static spinlock_t bitmap_lock = SPINLOCK_INIT;
atomic_int32_t total_pages = ATOMIC_INIT(0);
atomic_int32_t total_allocated_pages = ATOMIC_INIT(0);
atomic_int32_t total_available_pages = ATOMIC_INIT(0);
/*
* Note that linker symbols are not variables, they have no memory allocated for
@ -64,20 +62,12 @@ inline static int page_marked(size_t i)
return (bitmap[index] & (1 << mod));
}
inline static int page_unmarked(size_t i)
{
return !page_marked(i);
}
inline static void page_set_mark(size_t i)
{
size_t index = i >> 3;
size_t mod = i & 0x7;
//if (page_marked(i))
// kprintf("page %u is alread marked\n", i);
bitmap[index] = bitmap[index] | (1 << mod);
bitmap[index] = bitmap[index] | (1 << mod);
}
inline static void page_clear_mark(size_t i)
@ -85,107 +75,214 @@ inline static void page_clear_mark(size_t i)
size_t index = i / 8;
size_t mod = i % 8;
if (page_unmarked(i))
kprintf("page %u is already unmarked\n", i);
bitmap[index] = bitmap[index] & ~(1 << mod);
}
size_t get_pages(size_t npages)
{
size_t cnt, off;
if (BUILTIN_EXPECT(!npages, 0))
return 0;
if (BUILTIN_EXPECT(npages > atomic_int32_read(&total_available_pages), 0))
return 0;
spinlock_lock(&bitmap_lock);
off = 1;
while (off <= BITMAP_SIZE*8 - npages) {
for (cnt=0; cnt<npages; cnt++) {
if (page_marked(off+cnt))
goto next;
}
for (cnt=0; cnt<npages; cnt++) {
page_set_mark(off+cnt);
}
spinlock_unlock(&bitmap_lock);
atomic_int32_add(&total_allocated_pages, npages);
atomic_int32_sub(&total_available_pages, npages);
return off << PAGE_BITS;
next: off += cnt+1;
}
spinlock_unlock(&bitmap_lock);
return 0;
}
int put_pages(size_t phyaddr, size_t npages)
{
size_t i, ret = 0;
size_t base = phyaddr >> PAGE_BITS;
if (BUILTIN_EXPECT(!phyaddr, 0))
return -EINVAL;
if (BUILTIN_EXPECT(!npages, 0))
return -EINVAL;
spinlock_lock(&bitmap_lock);
for (i=0; i<npages; i++) {
if (page_marked(base+i)) {
page_clear_mark(base+i);
ret++;
}
}
spinlock_unlock(&bitmap_lock);
atomic_int32_sub(&total_allocated_pages, ret);
atomic_int32_add(&total_available_pages, ret);
kprintf("put_pages: phyaddr=%#lx, npages = %d, ret = %d\n",
phyaddr, npages, ret); // TODO: remove
return ret;
}
int copy_page(size_t pdest, size_t psrc)
{
static size_t viraddr;
if (!viraddr) { // statically allocate virtual memory area
viraddr = vma_alloc(2 * PAGE_SIZE, VMA_HEAP);
if (BUILTIN_EXPECT(!viraddr, 0))
return -ENOMEM;
}
// map pages
size_t vsrc = map_region(viraddr, psrc, 1, MAP_KERNEL_SPACE);
size_t vdest = map_region(viraddr + PAGE_SIZE, pdest, 1, MAP_KERNEL_SPACE);
if (BUILTIN_EXPECT(!vsrc || !vdest, 0)) {
unmap_region(viraddr, 2);
return -ENOMEM;
}
kprintf("copy_page: copy page frame from: %#lx (%#lx) to %#lx (%#lx)\n", vsrc, psrc, vdest, pdest); // TODO remove
// copy the whole page
memcpy((void*) vdest, (void*) vsrc, PAGE_SIZE);
// householding
unmap_region(viraddr, 2);
return 0;
}
int mmu_init(void)
{
size_t kernel_size;
unsigned int i;
size_t addr;
int ret = 0;
// at first, set default value of the bitmap
memset(bitmap, 0xFF, sizeof(uint8_t)*BITMAP_SIZE);
#ifdef CONFIG_MULTIBOOT
if (mb_info && (mb_info->flags & MULTIBOOT_INFO_MEM_MAP)) {
size_t end_addr;
multiboot_memory_map_t* mmap = (multiboot_memory_map_t*) ((size_t) mb_info->mmap_addr);
multiboot_memory_map_t* mmap_end = (void*) ((size_t) mb_info->mmap_addr + mb_info->mmap_length);
if (mb_info) {
if (mb_info->flags & MULTIBOOT_INFO_MEM_MAP) {
multiboot_memory_map_t* mmap = (multiboot_memory_map_t*) ((size_t) mb_info->mmap_addr);
multiboot_memory_map_t* mmap_end = (void*) ((size_t) mb_info->mmap_addr + mb_info->mmap_length);
while (mmap < mmap_end) {
if (mmap->type == MULTIBOOT_MEMORY_AVAILABLE) {
/* set the available memory as "unused" */
addr = mmap->addr;
end_addr = addr + mmap->len;
while (addr < end_addr) {
page_clear_mark(addr >> PAGE_SHIFT);
addr += PAGE_SIZE;
atomic_int32_inc(&total_pages);
atomic_int32_inc(&total_available_pages);
// mark available memory as free
while (mmap < mmap_end) {
if (mmap->type == MULTIBOOT_MEMORY_AVAILABLE) {
for (addr=mmap->addr; addr < mmap->addr + mmap->len; addr += PAGE_SIZE) {
page_clear_mark(addr >> PAGE_BITS);
atomic_int32_inc(&total_pages);
atomic_int32_inc(&total_available_pages);
}
}
mmap++;
}
mmap++;
}
} else {
kputs("Unable to initialize the memory management subsystem\n");
while(1) {
HALT;
else if (mb_info->flags & MULTIBOOT_INFO_MEM) {
size_t page;
size_t pages_lower = mb_info->mem_lower >> 2; /* KiB to page number */
size_t pages_upper = mb_info->mem_upper >> 2;
for (page=0; page<pages_lower; page++)
page_clear_mark(page);
for (page=0; page<pages_upper; page++)
page_clear_mark(page + 256); /* 1 MiB == 256 pages offset */
atomic_int32_add(&total_pages, pages_lower + pages_upper);
atomic_int32_add(&total_available_pages, pages_lower + pages_upper);
}
else {
kputs("Unable to initialize the memory management subsystem\n");
while (1) HALT;
}
// mark mb_info as used
page_set_mark((size_t) mb_info >> PAGE_BITS);
atomic_int32_inc(&total_allocated_pages);
atomic_int32_dec(&total_available_pages);
// mark modules list as used
if (mb_info->flags & MULTIBOOT_INFO_MODS) {
for(addr=mb_info->mods_addr; addr<mb_info->mods_addr+mb_info->mods_count*sizeof(multiboot_module_t); addr+=PAGE_SIZE) {
page_set_mark(addr >> PAGE_BITS);
atomic_int32_inc(&total_allocated_pages);
atomic_int32_dec(&total_available_pages);
}
}
}
#elif defined(CONFIG_ROCKCREEK)
/* of course, the first slots belong to the private memory */
// of course, the first slots belong to the private memory
for(addr=0x00; addr<1*0x1000000; addr+=PAGE_SIZE) {
page_clear_mark(addr >> PAGE_SHIFT);
if (addr > addr + PAGE_SIZE)
break;
atomic_int32_inc(&total_pages);
atomic_int32_inc(&total_available_pages);
}
// Note: The last slot belongs always to the private memory.
for(addr=0xFF000000; addr<0xFFFFFFFF; addr+=PAGE_SIZE) {
page_clear_mark(addr >> PAGE_SHIFT);
page_clear_mark(addr >> PAGE_BITS);
if (addr > addr + PAGE_SIZE)
break;
atomic_int32_inc(&total_pages);
atomic_int32_inc(&total_available_pages);
}
/*
* Mark the bootinfo as used.
*/
page_set_mark((size_t)bootinfo >> PAGE_SHIFT);
// Note: The last slot belongs always to the private memory.
for(addr=0xFF000000; addr<0xFFFFFFFF; addr+=PAGE_SIZE) {
page_clear_mark(addr >> PAGE_BITS);
if (addr > addr + PAGE_SIZE)
break;
atomic_int32_inc(&total_pages);
atomic_int32_inc(&total_available_pages);
}
// mark the bootinfo as used.
page_set_mark((size_t)bootinfo >> PAGE_BITS);
atomic_int32_inc(&total_allocated_pages);
atomic_int32_dec(&total_available_pages);
#else
#error Currently, MetalSVM supports only the Multiboot specification or the RockCreek processor!
#endif
kernel_size = (size_t) &kernel_end - (size_t) &kernel_start;
if (kernel_size & (PAGE_SIZE-1))
kernel_size += PAGE_SIZE - (kernel_size & (PAGE_SIZE-1));
atomic_int32_add(&total_allocated_pages, kernel_size >> PAGE_SHIFT);
atomic_int32_sub(&total_available_pages, kernel_size >> PAGE_SHIFT);
/* set kernel space as used */
for(i=(size_t) &kernel_start >> PAGE_SHIFT; i < (size_t) &kernel_end >> PAGE_SHIFT; i++)
page_set_mark(i);
if ((size_t) &kernel_end & (PAGE_SIZE-1))
page_set_mark(i);
alloc_start = (size_t) &kernel_end >> PAGE_SHIFT;
if ((size_t) &kernel_end & (PAGE_SIZE-1))
alloc_start++;
// mark kernel as used
for(addr=(size_t) &kernel_start; addr<(size_t) &kernel_end; addr+=PAGE_SIZE) {
page_set_mark(addr >> PAGE_BITS);
atomic_int32_inc(&total_allocated_pages);
atomic_int32_dec(&total_available_pages);
}
#if MAX_CORES > 1
// reserve physical page for SMP boot code
page_set_mark(SMP_SETUP_ADDR >> PAGE_SHIFT);
atomic_int32_add(&total_allocated_pages, 1);
atomic_int32_sub(&total_available_pages, 1);
page_set_mark(SMP_SETUP_ADDR >> PAGE_BITS);
atomic_int32_inc(&total_allocated_pages);
atomic_int32_dec(&total_available_pages);
#endif
// enable paging and map SMP, VGA, Multiboot modules etc.
ret = paging_init();
if (ret) {
if (BUILTIN_EXPECT(ret, 0)) {
kprintf("Failed to initialize paging: %d\n", ret);
return ret;
}
ret = vma_init();
if (BUILTIN_EXPECT(ret, 0)) {
kprintf("Failed to initialize VMA regions: %d\n", ret);
return ret;
}
#ifdef CONFIG_MULTIBOOT
/*
* Modules like the init ram disk are already loaded.
@ -193,23 +290,9 @@ int mmu_init(void)
*/
if (mb_info && (mb_info->flags & MULTIBOOT_INFO_MODS)) {
multiboot_module_t* mmodule = (multiboot_module_t*) ((size_t) mb_info->mods_addr);
/*
* Mark the mb_info as used.
*/
page_set_mark((size_t)mb_info >> PAGE_SHIFT);
atomic_int32_inc(&total_allocated_pages);
atomic_int32_dec(&total_available_pages);
for(addr = mb_info->mods_addr; addr < mb_info->mods_addr + mb_info->mods_count * sizeof(multiboot_module_t); addr += PAGE_SIZE) {
page_set_mark(addr >> PAGE_SHIFT);
atomic_int32_inc(&total_allocated_pages);
atomic_int32_dec(&total_available_pages);
}
for(i=0; i<mb_info->mods_count; i++, mmodule++) {
for(addr=mmodule->mod_start; addr<mmodule->mod_end; addr+=PAGE_SIZE) {
page_set_mark(addr >> PAGE_SHIFT);
for(i=0; i<mb_info->mods_count; i++) {
for(addr=mmodule[i].mod_start; addr<mmodule[i].mod_end; addr+=PAGE_SIZE) {
page_set_mark(addr >> PAGE_BITS);
atomic_int32_inc(&total_allocated_pages);
atomic_int32_dec(&total_available_pages);
}
@ -228,7 +311,7 @@ int mmu_init(void)
// define the residual private slots as free
for(addr=1*0x1000000; addr<slots*0x1000000; addr+=PAGE_SIZE) {
page_clear_mark(addr >> PAGE_SHIFT);
page_clear_mark(addr >> PAGE_BITS);
if (addr > addr + PAGE_SIZE)
break;
atomic_int32_inc(&total_pages);
@ -239,9 +322,9 @@ int mmu_init(void)
* The init ram disk are already loaded.
* Therefore, we set these pages as used.
*/
for(addr=bootinfo->addr; addr < bootinfo->addr+bootinfo->size; addr+=PAGE_SIZE) {
// This area is already mapped, so we need to virt_to_phys() these addresses.
page_set_mark(virt_to_phys(addr) >> PAGE_SHIFT);
for(addr=bootinfo->addr; addr<bootinfo->addr+bootinfo->size; addr+=PAGE_SIZE) {
// this area is already mapped, so we need to virt_to_phys() these addresses.
page_set_mark(virt_to_phys(addr) >> PAGE_BITS);
atomic_int32_inc(&total_allocated_pages);
atomic_int32_dec(&total_available_pages);
}
@ -250,148 +333,3 @@ int mmu_init(void)
return ret;
}
/*
* Use first fit algorithm to find a suitable physical memory region
*/
size_t get_pages(uint32_t npages)
{
uint32_t i, j, l;
uint32_t k = 0;
size_t ret = 0;
if (BUILTIN_EXPECT(!npages, 0))
return ret;
if (BUILTIN_EXPECT(npages > atomic_int32_read(&total_available_pages), 0))
return ret;
spinlock_lock(&bitmap_lock);
i = alloc_start;
next_try:
while((k < BITMAP_SIZE) && page_marked(i)) {
k++;
i = (i+1) & (BITMAP_SIZE-1);
}
if (k >= BITMAP_SIZE)
goto oom;
for(j=1; (j<npages) && (i+j < BITMAP_SIZE) && (k < BITMAP_SIZE); j++, k++) {
if (page_marked(i+j)) {
i = (i+j) & (BITMAP_SIZE-1);
goto next_try;
}
}
if (i+j >= BITMAP_SIZE) {
i = 0;
goto next_try;
}
if (k >= BITMAP_SIZE)
goto oom;
ret = i*PAGE_SIZE;
//kprintf("alloc: ret 0x%x, i = %d, j = %d, npages = %d\n", ret, i, j, npages);
for(l=i; l<i+j; l++)
page_set_mark(l);
alloc_start = i+j;
spinlock_unlock(&bitmap_lock);
atomic_int32_add(&total_allocated_pages, npages);
atomic_int32_sub(&total_available_pages, npages);
return ret;
oom:
spinlock_unlock(&bitmap_lock);
return ret;
}
int put_page(size_t phyaddr)
{
uint32_t index = phyaddr >> PAGE_SHIFT;
if (BUILTIN_EXPECT(!phyaddr, 0))
return -EINVAL;
spinlock_lock(&bitmap_lock);
page_clear_mark(index);
spinlock_unlock(&bitmap_lock);
atomic_int32_sub(&total_allocated_pages, 1);
atomic_int32_add(&total_available_pages, 1);
return 0;
}
void* mem_allocation(size_t sz, uint32_t flags)
{
size_t phyaddr, viraddr;
uint32_t npages = sz >> PAGE_SHIFT;
if (sz & (PAGE_SIZE-1))
npages++;
phyaddr = get_pages(npages);
if (BUILTIN_EXPECT(!phyaddr, 0))
return 0;
viraddr = map_region(0, phyaddr, npages, flags);
return (void*) viraddr;
}
void* kmalloc(size_t sz)
{
return mem_allocation(sz, MAP_KERNEL_SPACE);
}
void kfree(void* addr, size_t sz)
{
uint32_t index, npages, i;
size_t phyaddr;
if (BUILTIN_EXPECT(!addr && !sz, 0))
return;
npages = sz >> PAGE_SHIFT;
if (sz & (PAGE_SIZE-1))
npages++;
spinlock_lock(&bitmap_lock);
for(i=0; i<npages; i++) {
unmap_region((size_t) addr+i*PAGE_SIZE, 1);
phyaddr = virt_to_phys((size_t) addr+i*PAGE_SIZE);
if (!phyaddr)
continue;
index = phyaddr >> PAGE_SHIFT;
page_clear_mark(index);
}
spinlock_unlock(&bitmap_lock);
vm_free((size_t) addr, npages);
atomic_int32_sub(&total_allocated_pages, npages);
atomic_int32_add(&total_available_pages, npages);
}
void* create_stack(void)
{
return kmalloc(KERNEL_STACK_SIZE);
}
int destroy_stack(task_t* task)
{
if (BUILTIN_EXPECT(!task || !task->stack, 0))
return -EINVAL;
kfree(task->stack, KERNEL_STACK_SIZE);
return 0;
}

417
mm/vma.c
View file

@ -1,5 +1,5 @@
/*
* Copyright 2011 Stefan Lankes, Chair for Operating Systems,
* Copyright 2011 Steffen Vogel, Chair for Operating Systems,
* RWTH Aachen University
*
* Licensed under the Apache License, Version 2.0 (the "License");
@ -17,85 +17,392 @@
* This file is part of MetalSVM.
*/
#include <metalsvm/vma.h>
#include <metalsvm/stdlib.h>
#include <metalsvm/stdio.h>
#include <metalsvm/tasks_types.h>
#include <metalsvm/spinlock.h>
#include <metalsvm/vma.h>
#include <metalsvm/errno.h>
/*
* add a new virtual memory region to the list of VMAs
#ifdef CONFIG_MULTIBOOT
#include <asm/multiboot.h>
#endif
/*
* Note that linker symbols are not variables, they have no memory allocated for
* maintaining a value, rather their address is their value.
*/
int vma_add(task_t* task, size_t start, size_t end, uint32_t type)
extern const void kernel_start;
extern const void kernel_end;
/*
* Kernel space VMA list and lock
*
* For bootstrapping we initialize the VMA list with one empty VMA
* (start == end) and expand this VMA by calls to vma_alloc()
*/
static vma_t vma_boot = { VMA_KERN_MIN, VMA_KERN_MIN, VMA_HEAP };
static vma_t* vma_list = &vma_boot;
static spinlock_t vma_lock = SPINLOCK_INIT;
// TODO: we might move the architecture specific VMA regions to a
// seperate function arch_vma_init()
int vma_init()
{
vma_t* new_vma;
if (BUILTIN_EXPECT(!task || start > end, 0))
int ret;
// add Kernel
ret = vma_add(PAGE_CEIL((size_t) &kernel_start),
PAGE_FLOOR((size_t) &kernel_end),
VMA_READ|VMA_WRITE|VMA_EXECUTE|VMA_CACHEABLE);
if (BUILTIN_EXPECT(ret, 0))
goto out;
#ifdef CONFIG_VGA
// add VGA video memory
ret = vma_add(VIDEO_MEM_ADDR, VIDEO_MEM_ADDR + PAGE_SIZE, VMA_READ|VMA_WRITE);
if (BUILTIN_EXPECT(ret, 0))
goto out;
#endif
#if MAX_CORES > 1
// add SMP boot page
ret = vma_add(SMP_SETUP_ADDR, SMP_SETUP_ADDR + PAGE_SIZE,
VMA_READ|VMA_WRITE|VMA_EXECUTE|VMA_CACHEABLE);
if (BUILTIN_EXPECT(ret, 0))
goto out;
#endif
#ifdef CONFIG_MULTIBOOT
// add Multiboot structures as modules
if (mb_info) {
ret = vma_add(PAGE_CEIL((size_t) mb_info),
PAGE_FLOOR((size_t) mb_info + sizeof(multiboot_info_t)),
VMA_READ|VMA_CACHEABLE);
if (BUILTIN_EXPECT(ret, 0))
goto out;
if (mb_info->flags & MULTIBOOT_INFO_MEM_MAP) {
ret = vma_add(PAGE_CEIL((size_t) mb_info->mmap_addr),
PAGE_FLOOR((size_t) mb_info->mmap_addr + mb_info->mmap_length),
VMA_READ|VMA_CACHEABLE);
}
if (mb_info->flags & MULTIBOOT_INFO_MODS) {
multiboot_module_t* mmodule = (multiboot_module_t*) ((size_t) mb_info->mods_addr);
ret = vma_add(PAGE_CEIL((size_t) mb_info->mods_addr),
PAGE_FLOOR((size_t) mb_info->mods_addr + mb_info->mods_count*sizeof(multiboot_module_t)),
VMA_READ|VMA_CACHEABLE);
int i;
for(i=0; i<mb_info->mods_count; i++) {
ret = vma_add(PAGE_CEIL(mmodule[i].mod_start),
PAGE_FLOOR(mmodule[i].mod_end),
VMA_READ|VMA_WRITE|VMA_CACHEABLE);
if (BUILTIN_EXPECT(ret, 0))
goto out;
}
}
}
#endif
out:
return ret;
}
size_t vma_alloc(size_t size, uint32_t flags)
{
task_t* task = per_core(current_task);
spinlock_t* lock;
vma_t** list;
kprintf("vma_alloc: size = %#lx, flags = %#x\n", size, flags); // TODO: remove
size_t base, limit; // boundaries for search
size_t start, end; // boundaries of free gaps
if (flags & VMA_USER) {
base = VMA_USER_MIN;
limit = VMA_USER_MAX;
list = &task->vma_list;
lock = &task->vma_lock;
}
else {
base = VMA_KERN_MIN;
limit = VMA_KERN_MAX;
list = &vma_list;
lock = &vma_lock;
}
spinlock_lock(lock);
// first fit search for free memory area
vma_t* pred = NULL; // vma before current gap
vma_t* succ = *list; // vma after current gap
do {
start = (pred) ? pred->end : base;
end = (succ) ? succ->start : limit;
if (start + size < end && start >= base && start + size < limit)
goto found; // we found a gap which is large enough and in the bounds
pred = succ;
succ = (pred) ? pred->next : NULL;
} while (pred || succ);
fail:
spinlock_unlock(lock); // we were unlucky to find a free gap
return 0;
found:
if (pred && pred->flags == flags)
pred->end = start + size; // resize VMA
else {
// insert new VMA
vma_t* new = kmalloc(sizeof(vma_t));
if (BUILTIN_EXPECT(!new, 0))
goto fail;
new->start = start;
new->end = start + size;
new->flags = flags;
new->next = succ;
new->prev = pred;
if (succ)
succ->prev = new;
if (pred)
pred->next = new;
else
*list = new;
}
spinlock_unlock(lock);
return start;
}
int vma_free(size_t start, size_t end)
{
task_t* task = per_core(current_task);
spinlock_t* lock;
vma_t* vma;
vma_t** list;
kprintf("vma_free: start = %#lx, end = %#lx\n", start, end); // TODO: remove
if (BUILTIN_EXPECT(start >= end, 0))
return -EINVAL;
new_vma = kmalloc(sizeof(new_vma));
if (!new_vma)
if (end < VMA_KERN_MAX) {
lock = &vma_lock;
list = &vma_list;
}
else if (start >= VMA_KERN_MAX) {
lock = &task->vma_lock;
list = &task->vma_list;
}
if (BUILTIN_EXPECT(!*list, 0))
return -EINVAL;
spinlock_lock(lock);
// search vma
vma = *list;
while (vma) {
if (start >= vma->start && end <= vma->end) break;
vma = vma->next;
}
if (BUILTIN_EXPECT(!vma, 0)) {
spinlock_unlock(lock);
return -EINVAL;
}
// free/resize vma
if (start == vma->start && end == vma->end) {
if (vma == *list)
*list = vma->next; // update list head
if (vma->prev)
vma->prev->next = vma->next;
if (vma->next)
vma->next->prev = vma->prev;
kfree(vma);
}
else if (start == vma->start)
vma->start = end;
else if (end == vma->end)
vma->end = start;
else {
vma_t* new = kmalloc(sizeof(vma_t));
if (BUILTIN_EXPECT(!new, 0)) {
spinlock_unlock(lock);
return -ENOMEM;
}
new->end = vma->end;
vma->end = start;
new->start = end;
new->next = vma->next;
vma->next = new;
new->prev = vma;
}
spinlock_unlock(lock);
return 0;
}
int vma_add(size_t start, size_t end, uint32_t flags)
{
task_t* task = per_core(current_task);
spinlock_t* lock;
vma_t** list;
if (BUILTIN_EXPECT(start >= end, 0))
return -EINVAL;
if (flags & VMA_USER) {
list = &task->vma_list;
lock = &task->vma_lock;
// check if address is in userspace
if (BUILTIN_EXPECT(start < VMA_KERN_MAX, 0))
return -EINVAL;
}
else {
list = &vma_list;
lock = &vma_lock;
// check if address is in kernelspace
if (BUILTIN_EXPECT(end >= VMA_KERN_MAX, 0))
return -EINVAL;
}
kprintf("vma_add: start = %#lx, end = %#lx, flags = %#x\n", start, end, flags); // TODO: remove
spinlock_lock(lock);
// search gap
vma_t* pred = NULL;
vma_t* succ = *list;
while (pred || succ) {
if ((!pred || pred->end <= start) &&
(!succ || succ->start >= end))
break;
pred = succ;
succ = (succ) ? succ->next : NULL;
}
if (BUILTIN_EXPECT(*list && !pred && !succ, 0)) {
spinlock_unlock(lock);
return -EINVAL;
}
// insert new VMA
vma_t* new = kmalloc(sizeof(vma_t));
if (BUILTIN_EXPECT(!new, 0)) {
spinlock_unlock(lock);
return -ENOMEM;
spinlock_lock(&task->vma_lock);
new_vma->start = start;
new_vma->end = end;
new_vma->type = type;
if (!(task->vma_list)) {
new_vma->next = new_vma->prev = NULL;
task->vma_list = new_vma;
} else {
vma_t* tmp = task->vma_list;
while (tmp->next && tmp->start < start)
tmp = tmp->next;
new_vma->next = tmp->next;
new_vma->prev = tmp;
tmp->next = new_vma;
}
spinlock_unlock(&task->vma_lock);
new->start = start;
new->end = end;
new->flags = flags;
new->next = succ;
new->prev = pred;
if (succ)
succ->prev = new;
if (pred)
pred->next = new;
else
*list = new;
spinlock_unlock(lock);
return 0;
}
int vma_dump(task_t* task)
int copy_vma_list(task_t* src, task_t* dest)
{
vma_t* tmp;
spinlock_init(&dest->vma_lock);
if (BUILTIN_EXPECT(!task, 0))
return -EINVAL;
spinlock_lock(&src->vma_lock);
spinlock_lock(&dest->vma_lock);
vma_t* last = NULL;
vma_t* old;
for (old=src->vma_list; old; old=old->next) {
vma_t *new = kmalloc(sizeof(vma_t));
if (BUILTIN_EXPECT(!new, 0)) {
spinlock_unlock(&dest->vma_lock);
spinlock_unlock(&src->vma_lock);
return -ENOMEM;
}
new->start = old->start;
new->end = old->end;
new->flags = old->flags;
new->prev = last;
if (last)
last->next = new;
else
dest->vma_list = new;
last = new;
}
spinlock_unlock(&dest->vma_lock);
spinlock_unlock(&src->vma_lock);
return 0;
}
int drop_vma_list(task_t *task)
{
vma_t* vma;
spinlock_lock(&task->vma_lock);
tmp = task->vma_list;
while (tmp) {
kprintf("%8x - %8x: ", tmp->start, tmp->end);
if (tmp->type & VMA_READ)
kputs("r");
else
kputs("-");
if (tmp->type & VMA_WRITE)
kputs("w");
else
kputs("-");
if (tmp->type & VMA_EXECUTE)
kputs("x");
else
kputs("-");
kputs("\n");
tmp = tmp->next;
while ((vma = task->vma_list)) {
task->vma_list = vma->next;
kfree(vma);
}
spinlock_unlock(&task->vma_lock);
return 0;
}
void vma_dump()
{
void print_vma(vma_t *vma) {
while (vma) {
kprintf("0x%lx - 0x%lx: size=%x, flags=%c%c%c\n", vma->start, vma->end, vma->end - vma->start,
(vma->flags & VMA_READ) ? 'r' : '-',
(vma->flags & VMA_WRITE) ? 'w' : '-',
(vma->flags & VMA_EXECUTE) ? 'x' : '-');
vma = vma->next;
}
}
task_t* task = per_core(current_task);
kputs("Kernelspace VMAs:\n");
spinlock_lock(&vma_lock);
print_vma(vma_list);
spinlock_unlock(&vma_lock);
kputs("Userspace VMAs:\n");
spinlock_lock(&task->vma_lock);
print_vma(task->vma_list);
spinlock_unlock(&task->vma_lock);
}

View file

@ -1,6 +1,6 @@
TOPDIR = $(shell pwd)
TOPDIR = $(shell pwd)
ARCH = x86
BIT=32
BIT = 32
ifeq ($(ARCH),x86)
ifeq ($(BIT),32)
@ -19,10 +19,10 @@ TMP = $(TOPDIR)/tmp
OPT = --disable-shared --disable-multilib --enable-newlib-hw-fp --disable-newlib-multithread --disable-newlib-reent-small
default: $(ARCH)
$(MAKE) ARCH=$(ARCH) TARGET=$(TARGET) CFLAGS+="-ffreestanding -Wall -I$(NEWLIB)/include -I../../include -I../../arch/$(ARCH)/include" LDFLAGS+="-nostdlib -L$(NEWLIB)/lib" -C net depend
$(MAKE) ARCH=$(ARCH) TARGET=$(TARGET) CFLAGS+="-ffreestanding -Wall -I$(NEWLIB)/include -I../../include -I../../arch/$(ARCH)/include" LDFLAGS+="-nostdlib -L$(NEWLIB)/lib" -C net
$(MAKE) ARCH=$(ARCH) TARGET=$(TARGET) CFLAGS+="-ffreestanding -Wall -I$(NEWLIB)/include -I../../include -I../../arch/$(ARCH)/include" LDFLAGS+="-nostdlib -L$(NEWLIB)/lib" -C examples depend
$(MAKE) ARCH=$(ARCH) TARGET=$(TARGET) CFLAGS+="-ffreestanding -Wall -I$(NEWLIB)/include -I../../include -I../../arch/$(ARCH)/include" LDFLAGS+="-nostdlib -L$(NEWLIB)/lib" -C examples
$(MAKE) ARCH=$(ARCH) BIT=$(BIT) TARGET=$(TARGET) CFLAGS+="-ffreestanding -Wall -I$(NEWLIB)/include -I../../include -I../../arch/$(ARCH)/include" LDFLAGS+="-nostdlib -L$(NEWLIB)/lib" -C net depend
$(MAKE) ARCH=$(ARCH) BIT=$(BIT) TARGET=$(TARGET) CFLAGS+="-ffreestanding -Wall -I$(NEWLIB)/include -I../../include -I../../arch/$(ARCH)/include" LDFLAGS+="-nostdlib -L$(NEWLIB)/lib" -C net
$(MAKE) ARCH=$(ARCH) BIT=$(BIT) TARGET=$(TARGET) CFLAGS+="-ffreestanding -Wall -I$(NEWLIB)/include -I../../include -I../../arch/$(ARCH)/include" LDFLAGS+="-nostdlib -L$(NEWLIB)/lib" -C examples depend
$(MAKE) ARCH=$(ARCH) BIT=$(BIT) TARGET=$(TARGET) CFLAGS+="-ffreestanding -Wall -I$(NEWLIB)/include -I../../include -I../../arch/$(ARCH)/include" LDFLAGS+="-nostdlib -L$(NEWLIB)/lib" -C examples
$(ARCH):
$(RM) $(TMP)

View file

@ -1,68 +1,84 @@
ARCH = x86
BIT = 32
NEWLIB = ../x86/i586-metalsvm-elf32
MAKE = make
STRIP_DEBUG = --strip-debug
KEEP_DEBUG = --only-keep-debug
LDFLAGS =
override LDFLAGS += -T link.ld
ifeq ($(BIT),64)
# Default section offsets in x86-64 ELF files are aligned to the page-size.
# For x86-64 the pagesize is huge (2 MB) with the consquence of large sparse
# ELF files (which lead to a huge initrd). To solve this, we manually set the page-size to 4 KB.
override LDFLAGS += -Wl,-n,-z,max-page-size=0x1000
endif
# other implicit rules
%.o : %.c
$(CC_FOR_TARGET) -c $(CFLAGS) -o $@ $<
default: all
all: memtest hello tests jacobi mshell server client rlogind
all: memtest hello tests jacobi mshell server client rlogind fork
jacobi: jacobi.o
$(CC_FOR_TARGET) -T link.ld -o $@ $(LDFLAGS) $< -lm
$(CC_FOR_TARGET) $(LDFLAGS) -o $@ $< -lm
$(OBJCOPY_FOR_TARGET) $(KEEP_DEBUG) $@ $@.sym
$(OBJCOPY_FOR_TARGET) $(STRIP_DEBUG) $@
chmod a-x $@.sym
memtest: memtest.o
$(CC_FOR_TARGET) -T link.ld -o $@ $(LDFLAGS) $<
$(CC_FOR_TARGET) $(LDFLAGS) -o $@ $<
$(OBJCOPY_FOR_TARGET) $(KEEP_DEBUG) $@ $@.sym
$(OBJCOPY_FOR_TARGET) $(STRIP_DEBUG) $@
chmod a-x $@.sym
fork: fork.o
$(CC_FOR_TARGET) $(LDFLAGS) -o $@ $<
$(OBJCOPY_FOR_TARGET) $(KEEP_DEBUG) $@ $@.sym
$(OBJCOPY_FOR_TARGET) $(STRIP_DEBUG) $@
chmod a-x $@.sym
tests: tests.o
$(CC_FOR_TARGET) -T link.ld -o $@ $(LDFLAGS) $<
$(CC_FOR_TARGET) $(LDFLAGS) -o $@ $<
$(OBJCOPY_FOR_TARGET) $(KEEP_DEBUG) $@ $@.sym
$(OBJCOPY_FOR_TARGET) $(STRIP_DEBUG) $@
chmod a-x $@.sym
hello: hello.o
$(CC_FOR_TARGET) -T link.ld -o $@ $(LDFLAGS) $<
$(CC_FOR_TARGET) $(LDFLAGS) -o $@ $<
$(OBJCOPY_FOR_TARGET) $(KEEP_DEBUG) $@ $@.sym
$(OBJCOPY_FOR_TARGET) $(STRIP_DEBUG) $@
chmod a-x $@.sym
mshell: mshell.o
$(CC_FOR_TARGET) -T link.ld -o $@ $(LDFLAGS) $<
$(CC_FOR_TARGET) $(LDFLAGS) -o $@ $<
$(OBJCOPY_FOR_TARGET) $(KEEP_DEBUG) $@ $@.sym
$(OBJCOPY_FOR_TARGET) $(STRIP_DEBUG) $@
chmod a-x $@.sym
rlogind: rlogind.o
$(CC_FOR_TARGET) -T link.ld -o $@ $(LDFLAGS) $< -lsocket
$(CC_FOR_TARGET) $(LDFLAGS) -o $@ $< -lsocket
$(OBJCOPY_FOR_TARGET) $(KEEP_DEBUG) $@ $@.sym
$(OBJCOPY_FOR_TARGET) $(STRIP_DEBUG) $@
chmod a-x $@.sym
server: server.o
$(CC_FOR_TARGET) -T link.ld -o $@ $(LDFLAGS) $< -lsocket
$(CC_FOR_TARGET) $(LDFLAGS) -o $@ $< -lsocket
$(OBJCOPY_FOR_TARGET) $(KEEP_DEBUG) $@ $@.sym
$(OBJCOPY_FOR_TARGET) $(STRIP_DEBUG) $@
chmod a-x $@.sym
client: client.o
$(CC_FOR_TARGET) -T link.ld -o $@ $(LDFLAGS) $< -lsocket
$(CC_FOR_TARGET) $(LDFLAGS) -o $@ $< -lsocket
$(OBJCOPY_FOR_TARGET) $(KEEP_DEBUG) $@ $@.sym
$(OBJCOPY_FOR_TARGET) $(STRIP_DEBUG) $@
chmod a-x $@.sym
clean:
$(RM) hello tests server client rlogind memtest mshell jacobi hello *.sym *.o *~
$(RM) hello tests server client rlogind memtest fork mshell jacobi hello *.sym *.o *~
depend:
$(CC_FOR_TARGET) -MM $(CFLAGS) *.c > Makefile.dep

59
newlib/examples/fork.c Normal file
View file

@ -0,0 +1,59 @@
/*
* Copyright 2013 Steffen Vogel, Chair for Operating Systems,
* RWTH Aachen University
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* This file is part of MetalSVM.
*/
/**
* @author Steffen Vogel <steffen.vogel@rwth-aachen.de>
*/
#include <stdlib.h>
#include <stdio.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <unistd.h>
#include <errno.h>
int main(int argc, char** argv)
{
printf("======== USER: test forking...\n");
int id = fork();
int ret = 0;
if (id < 0) {
perror(" PARENT: fork");
exit(-1);
}
else if (id == 0) {
printf(" CHILD: This is the child. My pid is %u\n", getpid());
printf(" CHILD: Running memtest...\n");
const char *argv[] = {"/bin/memtest", "512", "kb", "10", NULL};
execve(argv[0], argv, NULL);
perror(" CHILD: exec() returned: ");
ret = -1;
}
else {
printf(" PARENT: Here is the parent. My pid is %u\n", getpid());
wait(&ret);
printf(" PARENT: My child returned with code %i...\n", ret);
}
return ret;
}

View file

@ -68,7 +68,7 @@ int main(int argc, char** argv)
exit(1);
}
testdirent = readdir(testdir);
printf("1. Dirent: %s", testdirent->d_name);
printf("1. Dirent: %s\n", testdirent->d_name);
closedir(testdir);
return errno;

View file

@ -17,44 +17,90 @@
* This file is part of MetalSVM.
*/
/**
* @author Steffen Vogel <steffen.vogel@rwth-aachen.de>
*/
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <unistd.h>
#include <fcntl.h>
#include <errno.h>
#include <dirent.h>
#include <time.h>
#include <sys/times.h>
void sleep(int sec) {
struct tms tms;
clock_t t, s = times(&tms);
do {
t = times(&tms);
}
while (t - s <= 1000 * sec);
}
int print_usage() {
printf("usage: [size mb/kb/b]");
exit(0);
printf("usage: size mb/kb/b [chunks]\n");
exit(-1);
}
int main(int argc, char** argv)
{
int m = 0;
uint32_t size = 0;
if(argc <= 2)
print_usage();
if(argc == 3) {
if(!strcmp(argv[2], "mb"))
m = 1024*1024;
else if(!strcmp(argv[2], "kb"))
m = 1024;
else if(!strcmp(argv[2], "b"))
m = 0;
else
print_usage();
}
if(argc > 3)
print_usage();
size = atoi(argv[1]);
if(size <= 0)
int multp = 0;
int size = 0;
int chunks = 1;
void **test;
if (argc <= 2 || argc > 4)
print_usage();
size *= m;
uint8_t* test = malloc(size);
printf("malloc(%d) - START: %p END: %p \n", size, test, test + size);
size = atoi(argv[1]);
if (size <= 0)
print_usage();
if (!strcasecmp(argv[2], "mb"))
multp = (1 << 20);
else if (!strcasecmp(argv[2], "kb"))
multp = (1 << 10);
else if (!strcasecmp(argv[2], "b"))
multp = (1 << 0);
else
print_usage();
size *= multp;
if (argc == 4)
chunks = atoi(argv[3]);
test = malloc(chunks * sizeof(void *));
printf("malloc(%lu)\n", chunks * sizeof(void *));
if (!test) {
printf("malloc(%lu) - FAILED!\n", chunks * sizeof(void *));
exit(-1);
}
// allocate...
int i;
for (i = 0; i < chunks; i++) {
test[i] = malloc(size);
if (test[i])
printf("malloc(%d)\tCHUNK: %d START: %p END: %p\n", size, i, test[i], test[i] + size);
else
printf("malloc(%d)\tFAILED! Abort allocation, start with freeing memory\n", size);
sleep(1);
}
// and release again
for (i = 0; i < chunks; i++) {
if (test[i]) {
free(test[i]);
printf("free(%p)\tCHUNK: %d\n", test[i], i);
}
sleep(1);
}
free(test);
printf("free(%p)\n", test);
return 0;
}

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -1,248 +0,0 @@
#
# Makefile.in for etc
#
DESTDIR =
prefix = /tmp
exec_prefix = ${prefix}
srcdir = .
bindir = ${exec_prefix}/bin
libdir = ${exec_prefix}/lib
tooldir = $(libdir)
datadir = ${datarootdir}
mandir = ${datarootdir}/man
man1dir = $(mandir)/man1
man2dir = $(mandir)/man2
man3dir = $(mandir)/man3
man4dir = $(mandir)/man4
man5dir = $(mandir)/man5
man6dir = $(mandir)/man6
man7dir = $(mandir)/man7
man8dir = $(mandir)/man8
man9dir = $(mandir)/man9
datarootdir = ${prefix}/share
docdir = ${datarootdir}/doc/${PACKAGE}
infodir = ${datarootdir}/info
pdfdir = ${datarootdir}/doc/${PACKAGE}
htmldir = ${docdir}
SHELL = /bin/sh
INSTALL = /usr/bin/install -c
INSTALL_PROGRAM = /usr/bin/install -c
INSTALL_DATA = /usr/bin/install -c -m 644
MAKEINFO = `if [ -f ../texinfo/makeinfo/makeinfo ]; \
then echo ../texinfo/makeinfo/makeinfo; \
else echo makeinfo; fi`
TEXI2DVI = `if [ -f ../texinfo/util/texi2dvi ]; \
then echo ../texinfo/util/texi2dvi; \
else echo texi2dvi; fi`
TEXI2PDF = `if [ -f ../texinfo/util/texi2dvi ]; \
then echo "../texinfo/util/texi2dvi --pdf"; \
else echo "texi2dvi --pdf"; fi`
TEXI2HTML = `if [ -f ../texinfo/makeinfo/makeinfo ]; \
then echo "../texinfo/makeinfo/makeinfo --html"; \
else echo "makeinfo --html"; fi`
DVIPS = dvips
# Where to find texinfo.tex to format documentation with TeX.
TEXIDIR = $(srcdir)/../texinfo
#### Host, target, and site specific Makefile fragments come in here.
###
INFOFILES = standards.info configure.info
DVIFILES = standards.dvi configure.dvi
PDFFILES = standards.pdf configure.pdf
HTMLFILES = standards.html configure.html
all: info
install install-strip: install-info
uninstall:
info:
for f in $(INFOFILES); do \
if test -f $(srcdir)/`echo $$f | sed -e 's/.info$$/.texi/'`; then \
if $(MAKE) "MAKEINFO=$(MAKEINFO)" $$f; then \
true; \
else \
exit 1; \
fi; \
fi; \
done
install-info: info
$(SHELL) $(srcdir)/../mkinstalldirs $(DESTDIR)$(infodir)
if test ! -f standards.info; then cd $(srcdir); fi; \
if test -f standards.info; then \
for i in standards.info*; do \
$(INSTALL_DATA) $$i $(DESTDIR)$(infodir)/$$i; \
done; \
fi
if test ! -f configure.info; then cd $(srcdir); fi; \
if test -f configure.info; then \
for i in configure.info*; do \
$(INSTALL_DATA) $$i $(DESTDIR)$(infodir)/$$i; \
done; \
fi
html:
for f in $(HTMLFILES); do \
if test -f $(srcdir)/`echo $$f | sed -e 's/.html$$/.texi/'`; then \
if $(MAKE) "TEXI2HTML=$(TEXI2HTML)" $$f; then \
true; \
else \
exit 1; \
fi; \
fi; \
done
install-html: html
$(SHELL) $(srcdir)/../mkinstalldirs $(DESTDIR)$(htmldir)
if test ! -f standards.html; then cd $(srcdir); fi; \
if test -f standards.html; then \
for i in standards.html*; do \
$(INSTALL_DATA) $$i $(DESTDIR)$(htmldir)/$$i; \
done; \
fi
if test ! -f configure.html; then cd $(srcdir); fi; \
if test -f configure.html; then \
for i in configure.html*; do \
$(INSTALL_DATA) $$i $(DESTDIR)$(htmldir)/$$i; \
done; \
fi
dvi:
for f in $(DVIFILES); do \
if test -f $(srcdir)/`echo $$f | sed -e 's/.dvi$$/.texi/'`; then \
if $(MAKE) "TEXI2DVI=$(TEXI2DVI)" $$f; then \
true; \
else \
exit 1; \
fi; \
fi; \
done
pdf:
for f in $(PDFFILES); do \
if test -f $(srcdir)/`echo $$f | sed -e 's/.pdf$$/.texi/'`; then \
if $(MAKE) "TEXI2PDF=$(TEXI2PDF)" $$f; then \
true; \
else \
exit 1; \
fi; \
fi; \
done
install-pdf: pdf
$(SHELL) $(srcdir)/../mkinstalldirs $(DESTDIR)$(pdfdir)/etc
if test ! -f standards.pdf; then cd $(srcdir); fi; \
if test -f standards.pdf; then \
for i in standards.pdf*; do \
$(INSTALL_DATA) $$i $(DESTDIR)$(pdfdir)/etc/$$i; \
done; \
fi
if test ! -f configure.pdf; then cd $(srcdir); fi; \
if test -f configure.pdf; then \
for i in configure.pdf*; do \
$(INSTALL_DATA) $$i $(DESTDIR)$(pdfdir)/etc/$$i; \
done; \
fi
standards.info: $(srcdir)/standards.texi $(srcdir)/make-stds.texi
$(MAKEINFO) --no-split -I$(srcdir) -o standards.info $(srcdir)/standards.texi
standards.html: $(srcdir)/standards.texi $(srcdir)/make-stds.texi
$(TEXI2HTML) --no-split -I$(srcdir) -o standards.html $(srcdir)/standards.texi
standards.dvi: $(srcdir)/standards.texi
TEXINPUTS=$(TEXIDIR):$$TEXINPUTS $(TEXI2DVI) $(srcdir)/standards.texi
standards.ps: standards.dvi
$(DVIPS) standards.dvi -o standards.ps
standards.pdf: $(srcdir)/standards.texi
TEXINPUTS=$(TEXIDIR):$$TEXINPUTS $(TEXI2PDF) $(srcdir)/standards.texi
# makeinfo requires images to be in the current directory.
configure.info: $(srcdir)/configure.texi $(srcdir)/configdev.tin $(srcdir)/configbuild.tin
rm -f configdev.txt configbuild.txt
cp $(srcdir)/configdev.tin configdev.txt
cp $(srcdir)/configbuild.tin configbuild.txt
$(MAKEINFO) -I$(srcdir) -o configure.info $(srcdir)/configure.texi
rm -f configdev.txt configbuild.txt
# texi2dvi wants both the .txt and the .eps files.
configure.dvi: $(srcdir)/configure.texi $(srcdir)/configdev.tin $(srcdir)/configbuild.tin $(srcdir)/configdev.ein $(srcdir)/configbuild.ein
rm -f configdev.txt configbuild.txt
cp $(srcdir)/configdev.tin configdev.txt
cp $(srcdir)/configbuild.tin configbuild.txt
rm -f configdev.eps configbuild.eps
cp $(srcdir)/configdev.ein configdev.eps
cp $(srcdir)/configbuild.ein configbuild.eps
TEXINPUTS=$(TEXIDIR):$$TEXINPUTS $(TEXI2DVI) $(srcdir)/configure.texi
rm -f configdev.txt configbuild.txt
rm -f configdev.eps configbuild.eps
# dvips requires images to be in the current directory
configure.ps: configure.dvi $(srcdir)/configdev.ein $(srcdir)/configbuild.ein
rm -f configdev.eps configbuild.eps
cp $(srcdir)/configdev.ein configdev.eps
cp $(srcdir)/configbuild.ein configbuild.eps
$(DVIPS) configure.dvi -o configure.ps
rm -f configdev.eps configbuild.eps
configure.pdf: $(srcdir)/configure.texi $(srcdir)/configdev.tin $(srcdir)/configbuild.tin $(srcdir)/configdev.ein $(srcdir)/configbuild.ein
rm -f configdev.pdf configbuild.pdf
epstopdf $(srcdir)/configdev.ein -outfile=configdev.pdf
epstopdf $(srcdir)/configbuild.ein -outfile=configbuild.pdf
TEXINPUTS=$(TEXIDIR):$$TEXINPUTS $(TEXI2PDF) $(srcdir)/configure.texi
rm -f configdev.pdf configbuild.pdf
configure.html: $(srcdir)/configure.texi
cp $(srcdir)/configdev.jin configdev.jpg
cp $(srcdir)/configbuild.jin configbuild.jpg
$(TEXI2HTML) --no-split -I$(srcdir) -o configure.html $(srcdir)/configure.texi
clean:
rm -f *.aux *.cp *.cps *.dvi *.fn *.fns *.ky *.kys *.log
rm -f *.pg *.pgs *.toc *.tp *.tps *.vr *.vrs
rm -f configdev.txt configbuild.txt
rm -f configdev.eps configbuild.eps
rm -f configdev.jpg configbuild.jpg
mostlyclean: clean
distclean: clean
rm -f Makefile config.status config.cache
maintainer-clean realclean: distclean
rm -f *.html*
rm -f *.info*
Makefile: $(srcdir)/Makefile.in $(host_makefile_frag) $(target_makefile_frag) \
config.status
$(SHELL) ./config.status
AUTOCONF = autoconf
configure_deps = $(srcdir)/configure.in
$(srcdir)/configure: # $(configure_deps)
cd $(srcdir) && $(AUTOCONF)
config.status: $(srcdir)/configure
$(SHELL) ./config.status --recheck
## these last targets are for standards.texi conformance
dist:
check:
installcheck:
TAGS:

View file

@ -56,9 +56,8 @@ L1:
call rax
L2:
; register a function to be called at normal process termination
push __do_global_dtors
mov rdi, __do_global_dtors
call atexit
pop rax
; call init function
call __do_global_ctors
@ -76,13 +75,17 @@ L4:
; arguments are already on the stack
; call the user's function
pop rdi ; argc
pop rsi ; argv pointer
pop rdx ; env pointer
call main
; call exit from the C library so atexit gets called, and the
; C++ destructors get run. This calls our exit routine below
; when it's done.
; call "exit"
push rax
mov rdi, rax
call exit
; endless loop

View file

@ -85,7 +85,7 @@ syscall(int nr, unsigned long arg0, unsigned long arg1, unsigned long arg2,
asm volatile (_SYSCALLSTR(INT_SYSCALL)
: "=a" (res)
: "0" (nr), "b" (arg0), "c" (arg1), "d" (arg2), "S" (arg3), "D" (arg4)
: "D" (nr), "S" (arg0), "d" (arg1), "c" (arg2), "b" (arg3), "a" (arg4)
: "memory", "cc");
return res;

View file

@ -1 +0,0 @@
# serdep.tmp

View file

@ -2,6 +2,21 @@
symbol-file metalsvm.sym
target remote localhost:1234
# Configure breakpoints and everything as you wish here.
break main
# Debugging 32bit code
#set architecture i386
#break stublet
#continue
# Debugging 64bit code
set architecture i386:x86-64
#break main
# Set memory watchpoint
#rwatch apic_mp
# Debugging userspace
#add-symbol-file newlib/examples/memtest.sym 0x40200000
#break main
#continue # skip kernel main()
continue

15
script.ipxe Normal file
View file

@ -0,0 +1,15 @@
#!ipxe
# iPXE is a open source network boot firmware.
# It provides a full PXE implementation enhanced with additional features such as
# booting from HTTP, FTP, iSCSI SAN, Fibre Channel SAN, Wireless, WAN or Infiniband
#
# http://ipxe.org/
#
# We use it to rapidly compile & debug metalsvm on real hardware.
# This script is fetched and executed by iPXE. Thus enables easy changes in the boot
# procedure without recompiling iPXE and reflashing your USB thumbdrive or network boot ROM.
kernel http://134.130.62.174:8080/metalsvm.elf
module http://134.130.62.174:8080/tools/initrd.img
boot