diff --git a/.gitignore b/.gitignore index 2ecbcd97..cae77b40 100644 --- a/.gitignore +++ b/.gitignore @@ -12,6 +12,7 @@ tags Makefile include/metalsvm/config.h +include/metalsvm/config.inc tools/make_initrd newlib/examples/hello newlib/examples/jacobi diff --git a/Doxyfile b/Doxyfile index fbb573bc..315bdf5c 100644 --- a/Doxyfile +++ b/Doxyfile @@ -1,4 +1,4 @@ -# Doxyfile 1.7.3 +# Doxyfile 1.8.1.1 # This file describes the settings to be used by the documentation system # doxygen (www.doxygen.org) for a project. @@ -22,10 +22,11 @@ DOXYFILE_ENCODING = UTF-8 -# The PROJECT_NAME tag is a single word (or a sequence of words surrounded -# by quotes) that should identify the project. +# The PROJECT_NAME tag is a single word (or sequence of words) that should +# identify the project. Note that if you do not use Doxywizard you need +# to put quotes around the project name if it contains spaces. -PROJECT_NAME = MetalSVM +PROJECT_NAME = "MetalSVM" # The PROJECT_NUMBER tag can be used to enter a project or revision number. # This could be handy for archiving the generated documentation or @@ -33,7 +34,9 @@ PROJECT_NAME = MetalSVM PROJECT_NUMBER = -# Using the PROJECT_BRIEF tag one can provide an optional one line description for a project that appears at the top of each page and should give viewer a quick idea about the purpose of the project. Keep the description short. +# Using the PROJECT_BRIEF tag one can provide an optional one line description +# for a project that appears at the top of each page and should give viewer +# a quick idea about the purpose of the project. Keep the description short. PROJECT_BRIEF = "A Bare-Metal Hypervisor for Non-Coherent Memory-Coupled Cores" @@ -42,14 +45,14 @@ PROJECT_BRIEF = "A Bare-Metal Hypervisor for Non-Coherent Memory-Couple # exceed 55 pixels and the maximum width should not exceed 200 pixels. # Doxygen will copy the logo to the output directory. -PROJECT_LOGO = documentation/img/lfbs_logo.gif +PROJECT_LOGO = documentation/img/lfbs_logo.gif # The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) # base path where the generated documentation will be put. # If a relative path is entered, it will be relative to the location # where doxygen was started. If left blank the current directory will be used. -OUTPUT_DIRECTORY = documentation +OUTPUT_DIRECTORY = documentation/ # If the CREATE_SUBDIRS tag is set to YES, then doxygen will create # 4096 sub-directories (in 2 levels) under the output directory of each output @@ -181,7 +184,7 @@ SEPARATE_MEMBER_PAGES = NO # The TAB_SIZE tag can be used to set the number of spaces in a tab. # Doxygen uses this value to replace tabs by spaces in code fragments. -TAB_SIZE = 2 +TAB_SIZE = 8 # This tag can be used to specify a number of aliases that acts # as commands in the documentation. An alias has the form "name=value". @@ -192,6 +195,13 @@ TAB_SIZE = 2 ALIASES = +# This tag can be used to specify a number of word-keyword mappings (TCL only). +# A mapping has the form "name=value". For example adding +# "class=itcl::class" will allow you to use the command class in the +# itcl::class meaning. + +TCL_SUBST = + # Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C # sources only. Doxygen will then generate output that is more tailored for C. # For instance, some of the names that are used will be different. The list @@ -230,6 +240,15 @@ OPTIMIZE_OUTPUT_VHDL = NO EXTENSION_MAPPING = +# If MARKDOWN_SUPPORT is enabled (the default) then doxygen pre-processes all +# comments according to the Markdown format, which allows for more readable +# documentation. See http://daringfireball.net/projects/markdown/ for details. +# The output of markdown processing is further processed by doxygen, so you +# can mix doxygen, HTML, and XML commands with Markdown formatting. +# Disable only in case of backward compatibilities issues. + +MARKDOWN_SUPPORT = YES + # If you use STL classes (i.e. std::string, std::vector, etc.) but do not want # to include (a tag file for) the STL sources as input, then you should # set this tag to YES in order to let doxygen match functions declarations and @@ -257,7 +276,7 @@ SIP_SUPPORT = NO # setting a simple type. If this is not the case, or you want to show the # methods anyway, you should set this option to NO. -IDL_PROPERTY_SUPPORT = NO +IDL_PROPERTY_SUPPORT = YES # If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC # tag is set to YES, then doxygen will reuse the documentation of the first @@ -274,6 +293,22 @@ DISTRIBUTE_GROUP_DOC = NO SUBGROUPING = YES +# When the INLINE_GROUPED_CLASSES tag is set to YES, classes, structs and +# unions are shown inside the group in which they are included (e.g. using +# @ingroup) instead of on a separate page (for HTML and Man pages) or +# section (for LaTeX and RTF). + +INLINE_GROUPED_CLASSES = NO + +# When the INLINE_SIMPLE_STRUCTS tag is set to YES, structs, classes, and +# unions with only public data fields will be shown inline in the documentation +# of the scope in which they are defined (i.e. file, namespace, or group +# documentation), provided this scope is documented. If set to NO (the default), +# structs, classes, and unions are shown on a separate page (for HTML and Man +# pages) or section (for LaTeX and RTF). + +INLINE_SIMPLE_STRUCTS = YES + # When TYPEDEF_HIDES_STRUCT is enabled, a typedef of a struct, union, or enum # is documented as struct, union, or enum with the name of the typedef. So # typedef struct TypeS {} TypeT, will appear in the documentation as a struct @@ -296,10 +331,21 @@ TYPEDEF_HIDES_STRUCT = YES # a logarithmic scale so increasing the size by one will roughly double the # memory usage. The cache size is given by this formula: # 2^(16+SYMBOL_CACHE_SIZE). The valid range is 0..9, the default is 0, -# corresponding to a cache size of 2^16 = 65536 symbols +# corresponding to a cache size of 2^16 = 65536 symbols. SYMBOL_CACHE_SIZE = 0 +# Similar to the SYMBOL_CACHE_SIZE the size of the symbol lookup cache can be +# set using LOOKUP_CACHE_SIZE. This cache is used to resolve symbols given +# their name and scope. Since this can be an expensive process and often the +# same symbol appear multiple times in the code, doxygen keeps a cache of +# pre-resolved symbols. If the cache is too small doxygen will become slower. +# If the cache is too large, memory is wasted. The cache size is given by this +# formula: 2^(16+LOOKUP_CACHE_SIZE). The valid range is 0..9, the default is 0, +# corresponding to a cache size of 2^16 = 65536 symbols. + +LOOKUP_CACHE_SIZE = 0 + #--------------------------------------------------------------------------- # Build related configuration options #--------------------------------------------------------------------------- @@ -316,6 +362,10 @@ EXTRACT_ALL = NO EXTRACT_PRIVATE = NO +# If the EXTRACT_PACKAGE tag is set to YES all members with package or internal scope will be included in the documentation. + +EXTRACT_PACKAGE = NO + # If the EXTRACT_STATIC tag is set to YES all static members of a file # will be included in the documentation. @@ -449,8 +499,11 @@ SORT_GROUP_NAMES = NO SORT_BY_SCOPE_NAME = NO -# If the STRICT_PROTO_MATCHING option is enabled and doxygen fails to do proper type resolution of all parameters of a function it will reject a -# match between the prototype and the implementation of a member function even if there is only one candidate or it is obvious which candidate to choose by doing a simple string match. By disabling STRICT_PROTO_MATCHING doxygen +# If the STRICT_PROTO_MATCHING option is enabled and doxygen fails to +# do proper type resolution of all parameters of a function it will reject a +# match between the prototype and the implementation of a member function even +# if there is only one candidate or it is obvious which candidate to choose +# by doing a simple string match. By disabling STRICT_PROTO_MATCHING doxygen # will still accept a match between prototype and implementation in such cases. STRICT_PROTO_MATCHING = NO @@ -459,25 +512,25 @@ STRICT_PROTO_MATCHING = NO # disable (NO) the todo list. This list is created by putting \todo # commands in the documentation. -GENERATE_TODOLIST = NO +GENERATE_TODOLIST = YES # The GENERATE_TESTLIST tag can be used to enable (YES) or # disable (NO) the test list. This list is created by putting \test # commands in the documentation. -GENERATE_TESTLIST = NO +GENERATE_TESTLIST = YES # The GENERATE_BUGLIST tag can be used to enable (YES) or # disable (NO) the bug list. This list is created by putting \bug # commands in the documentation. -GENERATE_BUGLIST = NO +GENERATE_BUGLIST = YES # The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or # disable (NO) the deprecated list. This list is created by putting # \deprecated commands in the documentation. -GENERATE_DEPRECATEDLIST= NO +GENERATE_DEPRECATEDLIST= YES # The ENABLED_SECTIONS tag can be used to enable conditional # documentation sections, marked by \if sectionname ... \endif. @@ -500,12 +553,6 @@ MAX_INITIALIZER_LINES = 30 SHOW_USED_FILES = YES -# If the sources in your project are distributed over multiple directories -# then setting the SHOW_DIRECTORIES tag to YES will show the directory hierarchy -# in the documentation. The default is NO. - -SHOW_DIRECTORIES = YES - # Set the SHOW_FILES tag to NO to disable the generation of the Files page. # This will remove the Files entry from the Quick Index and from the # Folder Tree View (if specified). The default is YES. @@ -517,7 +564,7 @@ SHOW_FILES = YES # This will remove the Namespaces entry from the Quick Index # and from the Folder Tree View (if specified). The default is YES. -SHOW_NAMESPACES = NO +SHOW_NAMESPACES = YES # The FILE_VERSION_FILTER tag can be used to specify a program or script that # doxygen should invoke to get the current version for each file (typically from @@ -531,13 +578,23 @@ FILE_VERSION_FILTER = # The LAYOUT_FILE tag can be used to specify a layout file which will be parsed # by doxygen. The layout file controls the global structure of the generated -# output files in an output format independent way. The create the layout file +# output files in an output format independent way. To create the layout file # that represents doxygen's defaults, run doxygen with the -l option. # You can optionally specify a file name after the option, if omitted # DoxygenLayout.xml will be used as the name of the layout file. LAYOUT_FILE = documentation/tmpl/layout.xml +# The CITE_BIB_FILES tag can be used to specify one or more bib files +# containing the references data. This must be a list of .bib files. The +# .bib extension is automatically appended if omitted. Using this command +# requires the bibtex tool to be installed. See also +# http://en.wikipedia.org/wiki/BibTeX for more info. For LaTeX the style +# of the bibliography can be controlled using LATEX_BIB_STYLE. To use this +# feature you need bibtex and perl available in the search path. + +CITE_BIB_FILES = + #--------------------------------------------------------------------------- # configuration options related to warning and progress messages #--------------------------------------------------------------------------- @@ -599,14 +656,14 @@ WARN_LOGFILE = # with spaces. INPUT = ./fs \ - ./include \ - ./kernel \ - ./drivers \ - ./arch \ - ./libkern \ - ./mm \ - ./tools \ - ./documentation/text + ./include \ + ./kernel \ + ./drivers \ + ./arch \ + ./libkern \ + ./mm \ + ./tools \ + ./documentation/text # This tag can be used to specify the character encoding of the source files # that doxygen parses. Internally doxygen uses the UTF-8 encoding, which is @@ -624,11 +681,11 @@ INPUT_ENCODING = UTF-8 # *.hxx *.hpp *.h++ *.idl *.odl *.cs *.php *.php3 *.inc *.m *.mm *.dox *.py # *.f90 *.f *.for *.vhd *.vhdl -FILE_PATTERNS += *.c -FILE_PATTERNS += *.h -FILE_PATTERNS += *.asm -FILE_PATTERNS += *.S -FILE_PATTERNS += *.dox +FILE_PATTERNS = *.c \ + *.h \ + *.asm \ + *.S \ + *.dox # The RECURSIVE tag can be used to turn specify whether or not subdirectories # should be searched for input files as well. Possible values are YES and NO. @@ -636,13 +693,15 @@ FILE_PATTERNS += *.dox RECURSIVE = YES -# The EXCLUDE tag can be used to specify files and/or directories that should +# The EXCLUDE tag can be used to specify files and/or directories that should be # excluded from the INPUT source files. This way you can easily exclude a # subdirectory from a directory tree whose root is specified with the INPUT tag. +# Note that relative paths are relative to the directory from which doxygen is +# run. EXCLUDE = -# The EXCLUDE_SYMLINKS tag can be used select whether or not files or +# The EXCLUDE_SYMLINKS tag can be used to select whether or not files or # directories that are symbolic links (a Unix file system feature) are excluded # from the input. @@ -744,9 +803,9 @@ INLINE_SOURCES = NO # Setting the STRIP_CODE_COMMENTS tag to YES (the default) will instruct # doxygen to hide any special comment blocks from generated source code -# fragments. Normal C and C++ comments will always remain visible. +# fragments. Normal C, C++ and Fortran comments will always remain visible. -STRIP_CODE_COMMENTS = YES +STRIP_CODE_COMMENTS = NO # If the REFERENCED_BY_RELATION tag is set to YES # then for each documented function all documented @@ -780,7 +839,7 @@ USE_HTAGS = NO # will generate a verbatim copy of the header file for each class for # which an include is specified. Set to NO to disable this. -VERBATIM_HEADERS = NO +VERBATIM_HEADERS = YES #--------------------------------------------------------------------------- # configuration options related to the alphabetical class index @@ -828,27 +887,43 @@ HTML_FILE_EXTENSION = .html # The HTML_HEADER tag can be used to specify a personal HTML header for # each generated HTML page. If it is left blank doxygen will generate a -# standard header. +# standard header. Note that when using a custom header you are responsible +# for the proper inclusion of any scripts and style sheets that doxygen +# needs, which is dependent on the configuration options used. +# It is advised to generate a default header using "doxygen -w html +# header.html footer.html stylesheet.css YourConfigFile" and then modify +# that header. Note that the header is subject to change so you typically +# have to redo this when upgrading to a newer version of doxygen or when +# changing the value of configuration settings such as GENERATE_TREEVIEW! -HTML_HEADER = documentation/tmpl/header.html +HTML_HEADER = ./documentation/tmpl/header.html # The HTML_FOOTER tag can be used to specify a personal HTML footer for # each generated HTML page. If it is left blank doxygen will generate a # standard footer. -HTML_FOOTER = documentation/tmpl/footer.html +HTML_FOOTER = ./documentation/tmpl/footer.html # The HTML_STYLESHEET tag can be used to specify a user-defined cascading # style sheet that is used by each HTML page. It can be used to # fine-tune the look of the HTML output. If the tag is left blank doxygen # will generate a default style sheet. Note that doxygen will try to copy # the style sheet file to the HTML output directory, so don't put your own -# stylesheet in the HTML output directory as well, or it will be erased! +# style sheet in the HTML output directory as well, or it will be erased! -HTML_STYLESHEET = documentation/tmpl/stylesheet.css +HTML_STYLESHEET = ./documentation/tmpl/stylesheet.css + +# The HTML_EXTRA_FILES tag can be used to specify one or more extra images or +# other source files which should be copied to the HTML output directory. Note +# that these files will be copied to the base HTML output directory. Use the +# $relpath$ marker in the HTML_HEADER and/or HTML_FOOTER files to load these +# files. In the HTML_STYLESHEET file, use the file name only. Also note that +# the files will be copied as-is; there are no commands or markers available. + +HTML_EXTRA_FILES = # The HTML_COLORSTYLE_HUE tag controls the color of the HTML output. -# Doxygen will adjust the colors in the stylesheet and background images +# Doxygen will adjust the colors in the style sheet and background images # according to this color. Hue is specified as an angle on a colorwheel, # see http://en.wikipedia.org/wiki/Hue for more information. # For instance the value 0 represents red, 60 is yellow, 120 is green, @@ -878,20 +953,23 @@ HTML_COLORSTYLE_GAMMA = 80 HTML_TIMESTAMP = YES -# If the HTML_ALIGN_MEMBERS tag is set to YES, the members of classes, -# files or namespaces will be aligned in HTML using tables. If set to -# NO a bullet list will be used. - -HTML_ALIGN_MEMBERS = YES - # If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML # documentation will contain sections that can be hidden and shown after the -# page has loaded. For this to work a browser that supports -# JavaScript and DHTML is required (for instance Mozilla 1.0+, Firefox -# Netscape 6.0+, Internet explorer 5.0+, Konqueror, or Safari). +# page has loaded. HTML_DYNAMIC_SECTIONS = NO +# With HTML_INDEX_NUM_ENTRIES one can control the preferred number of +# entries shown in the various tree structured indices initially; the user +# can expand and collapse entries dynamically later on. Doxygen will expand +# the tree to such a level that at most the specified number of entries are +# visible (unless a fully collapsed tree already exceeds this amount). +# So setting the number of entries 1 will produce a full collapsed tree by +# default. 0 is a special value representing an infinite number of entries +# and will result in a full expanded tree by default. + +HTML_INDEX_NUM_ENTRIES = 100 + # If the GENERATE_DOCSET tag is set to YES, additional index files # will be generated that can be used as input for Apple's Xcode 3 # integrated development environment, introduced with OSX 10.5 (Leopard). @@ -910,14 +988,14 @@ GENERATE_DOCSET = NO # documentation sets from a single provider (such as a company or product suite) # can be grouped. -DOCSET_FEEDNAME = "Doxygen generated docs" +DOCSET_FEEDNAME = "MetalSVM Documentation" # When GENERATE_DOCSET tag is set to YES, this tag specifies a string that # should uniquely identify the documentation set bundle. This should be a # reverse domain-name style string, e.g. com.mycompany.MyDocSet. Doxygen # will append .docset to the name. -DOCSET_BUNDLE_ID = de.rwth-aachen.lfbs +DOCSET_BUNDLE_ID = org.metalsvm # When GENERATE_PUBLISHER_ID tag specifies a string that should uniquely identify # the documentation publisher. This should be a reverse domain-name style @@ -1043,18 +1121,14 @@ GENERATE_ECLIPSEHELP = NO ECLIPSE_DOC_ID = org.doxygen.Project -# The DISABLE_INDEX tag can be used to turn on/off the condensed index at -# top of each HTML page. The value NO (the default) enables the index and -# the value YES disables it. +# The DISABLE_INDEX tag can be used to turn on/off the condensed index (tabs) +# at top of each HTML page. The value NO (the default) enables the index and +# the value YES disables it. Since the tabs have the same information as the +# navigation tree you can set this option to NO if you already set +# GENERATE_TREEVIEW to YES. DISABLE_INDEX = NO -# This tag can be used to set the number of enum values (range [0,1..20]) -# that doxygen will group on one line in the generated HTML documentation. -# Note that a value of 0 will completely suppress the enum values from appearing in the overview section. - -ENUM_VALUES_PER_LINE = 4 - # The GENERATE_TREEVIEW tag is used to specify whether a tree-like index # structure should be generated to display hierarchical information. # If the tag value is set to YES, a side panel will be generated @@ -1062,13 +1136,17 @@ ENUM_VALUES_PER_LINE = 4 # is generated for HTML Help). For this to work a browser that supports # JavaScript, DHTML, CSS and frames is required (i.e. any modern browser). # Windows users are probably better off using the HTML help feature. +# Since the tree basically has the same information as the tab index you +# could consider to set DISABLE_INDEX to NO when enabling this option. -GENERATE_TREEVIEW = NO +GENERATE_TREEVIEW = YES -# By enabling USE_INLINE_TREES, doxygen will generate the Groups, Directories, -# and Class Hierarchy pages using a tree view instead of an ordered list. +# The ENUM_VALUES_PER_LINE tag can be used to set the number of enum values +# (range [0,1..20]) that doxygen will group on one line in the generated HTML +# documentation. Note that a value of 0 will completely suppress the enum +# values from appearing in the overview section. -USE_INLINE_TREES = NO +ENUM_VALUES_PER_LINE = 4 # If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be # used to set the initial width (in pixels) of the frame in which the tree @@ -1101,7 +1179,7 @@ FORMULA_TRANSPARENT = YES # (see http://www.mathjax.org) which uses client side Javascript for the # rendering instead of using prerendered bitmaps. Use this if you do not # have LaTeX installed or if you want to formulas look prettier in the HTML -# output. When enabled you also need to install MathJax separately and +# output. When enabled you may also need to install MathJax separately and # configure the path to it using the MATHJAX_RELPATH option. USE_MATHJAX = NO @@ -1110,11 +1188,18 @@ USE_MATHJAX = NO # HTML output directory using the MATHJAX_RELPATH option. The destination # directory should contain the MathJax.js script. For instance, if the mathjax # directory is located at the same level as the HTML output directory, then -# MATHJAX_RELPATH should be ../mathjax. The default value points to the mathjax.org site, so you can quickly see the result without installing -# MathJax, but it is strongly recommended to install a local copy of MathJax -# before deployment. +# MATHJAX_RELPATH should be ../mathjax. The default value points to +# the MathJax Content Delivery Network so you can quickly see the result without +# installing MathJax. +# However, it is strongly recommended to install a local +# copy of MathJax from http://www.mathjax.org before deployment. -MATHJAX_RELPATH = http://www.mathjax.org/mathjax +MATHJAX_RELPATH = http://cdn.mathjax.org/mathjax/latest + +# The MATHJAX_EXTENSIONS tag can be used to specify one or MathJax extension +# names that should be enabled during MathJax rendering. + +MATHJAX_EXTENSIONS = # When the SEARCHENGINE tag is enabled doxygen will generate a search box # for the HTML output. The underlying search engine uses javascript @@ -1189,6 +1274,13 @@ EXTRA_PACKAGES = LATEX_HEADER = +# The LATEX_FOOTER tag can be used to specify a personal LaTeX footer for +# the generated latex document. The footer should contain everything after +# the last chapter. If it is left blank doxygen will generate a +# standard footer. Notice: only use this tag if you know what you are doing! + +LATEX_FOOTER = + # If the PDF_HYPERLINKS tag is set to YES, the LaTeX that is generated # is prepared for conversion to pdf (using ps2pdf). The pdf file will # contain links (just like the HTML output) instead of page references @@ -1222,6 +1314,12 @@ LATEX_HIDE_INDICES = NO LATEX_SOURCE_CODE = NO +# The LATEX_BIB_STYLE tag can be used to specify the style to use for the +# bibliography, e.g. plainnat, or ieeetr. The default style is "plain". See +# http://en.wikipedia.org/wiki/BibTeX for more info. + +LATEX_BIB_STYLE = plain + #--------------------------------------------------------------------------- # configuration options related to the RTF output #--------------------------------------------------------------------------- @@ -1253,7 +1351,7 @@ COMPACT_RTF = NO RTF_HYPERLINKS = NO -# Load stylesheet definitions from file. Syntax is similar to doxygen's +# Load style sheet definitions from file. Syntax is similar to doxygen's # config file, i.e. a series of assignments. You only have to provide # replacements, missing definitions are set to their default value. @@ -1395,10 +1493,10 @@ MACRO_EXPANSION = YES # then the macro expansion is limited to the macros specified with the # PREDEFINED and EXPAND_AS_DEFINED tags. -EXPAND_ONLY_PREDEF = YES +EXPAND_ONLY_PREDEF = NO # If the SEARCH_INCLUDES tag is set to YES (the default) the includes files -# in the INCLUDE_PATH (see below) will be search if a #include is found. +# pointed to by INCLUDE_PATH will be searched when a #include is found. SEARCH_INCLUDES = YES @@ -1423,31 +1521,14 @@ INCLUDE_FILE_PATTERNS = # undefined via #undef or recursively expanded use the := operator # instead of the = operator. -# Doxygen messes up the attribute lines as c-structure names -PREDEFINED = __attribute__ (x)= \ - __attribute__(x)= \ - __attribute__ ((x))= \ - __attribute__((x))= \ - HAVE_ARCH_STRLEN \ - HAVE_ARCH_STRNCPY \ - HAVE_ARCH_STRCPY \ - HAVE_ARCH_MEMCPY \ - HAVE_ARCH_MEMSET \ - CONFIG_VGA \ - CONFIG_PCI \ - CONFIG_LWIP \ - CONFIG_VGA \ - CONFIG_KEYBOARD \ - CONFIG_MULTIBOOT \ - CONFIG_ROCKCREEK \ - SCC \ - MS_BAREMETAL \ - GORY +PREDEFINED = __attribute__(x)= \ + __attribute__ (x)= # If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then # this tag can be used to specify a list of macro names that should be expanded. # The macro definition that is found in the sources will be used. -# Use the PREDEFINED tag if you want to use a different macro definition that overrules the definition found in the source code. +# Use the PREDEFINED tag if you want to use a different macro definition that +# overrules the definition found in the source code. EXPAND_AS_DEFINED = @@ -1462,22 +1543,18 @@ SKIP_FUNCTION_MACROS = YES # Configuration::additions related to external references #--------------------------------------------------------------------------- -# The TAGFILES option can be used to specify one or more tagfiles. -# Optionally an initial location of the external documentation -# can be added for each tagfile. The format of a tag file without -# this location is as follows: +# The TAGFILES option can be used to specify one or more tagfiles. For each +# tag file the location of the external documentation should be added. The +# format of a tag file without this location is as follows: # # TAGFILES = file1 file2 ... # Adding location for the tag files is done as follows: # # TAGFILES = file1=loc1 "file2 = loc2" ... -# where "loc1" and "loc2" can be relative or absolute paths or -# URLs. If a location is present for each tag, the installdox tool -# does not have to be run to correct the links. -# Note that each tag file must have a unique name -# (where the name does NOT include the path) -# If a tag file is not located in the directory in which doxygen -# is run, you must also specify the path to the tagfile here. +# where "loc1" and "loc2" can be relative or absolute paths +# or URLs. Note that each tag file must have a unique name (where the name does +# NOT include the path). If a tag file is not located in the directory in which +# doxygen is run, you must also specify the path to the tagfile here. TAGFILES = @@ -1513,7 +1590,7 @@ PERL_PATH = /usr/bin/perl # this option also works with HAVE_DOT disabled, but it is recommended to # install and use dot, since it yields more powerful graphs. -CLASS_DIAGRAMS = NO +CLASS_DIAGRAMS = YES # You can define message sequence charts within doxygen comments using the \msc # command. Doxygen will then run the mscgen tool (see @@ -1535,7 +1612,7 @@ HIDE_UNDOC_RELATIONS = YES # toolkit from AT&T and Lucent Bell Labs. The other options in this section # have no effect if this option is set to NO (the default) -HAVE_DOT = YES +HAVE_DOT = NO # The DOT_NUM_THREADS specifies the number of dot invocations doxygen is # allowed to run in parallel. When set to 0 (the default) doxygen will @@ -1545,13 +1622,12 @@ HAVE_DOT = YES DOT_NUM_THREADS = 0 -# By default doxygen will write a font called Helvetica to the output -# directory and reference it in all dot files that doxygen generates. -# When you want a differently looking font you can specify the font name -# using DOT_FONTNAME. You need to make sure dot is able to find the font, -# which can be done by putting it in a standard location or by setting the -# DOTFONTPATH environment variable or by setting DOT_FONTPATH to the directory -# containing the font. +# By default doxygen will use the Helvetica font for all dot files that +# doxygen generates. When you want a differently looking font you can specify +# the font name using DOT_FONTNAME. You need to make sure dot is able to find +# the font, which can be done by putting it in a standard location or by setting +# the DOTFONTPATH environment variable or by setting DOT_FONTPATH to the +# directory containing the font. DOT_FONTNAME = Helvetica @@ -1560,19 +1636,18 @@ DOT_FONTNAME = Helvetica DOT_FONTSIZE = 10 -# By default doxygen will tell dot to use the output directory to look for the -# FreeSans.ttf font (which doxygen will put there itself). If you specify a -# different font using DOT_FONTNAME you can set the path where dot -# can find it using this tag. +# By default doxygen will tell dot to use the Helvetica font. +# If you specify a different font using DOT_FONTNAME you can use DOT_FONTPATH to +# set the path where dot can find it. DOT_FONTPATH = # If the CLASS_GRAPH and HAVE_DOT tags are set to YES then doxygen # will generate a graph for each documented class showing the direct and # indirect inheritance relations. Setting this tag to YES will force the -# the CLASS_DIAGRAMS tag to NO. +# CLASS_DIAGRAMS tag to NO. -CLASS_GRAPH = NO +CLASS_GRAPH = YES # If the COLLABORATION_GRAPH and HAVE_DOT tags are set to YES then doxygen # will generate a graph for each documented class showing the direct and @@ -1592,6 +1667,15 @@ GROUP_GRAPHS = YES UML_LOOK = NO +# If the UML_LOOK tag is enabled, the fields and methods are shown inside +# the class node. If there are many fields or methods and many nodes the +# graph may become too big to be useful. The UML_LIMIT_NUM_FIELDS +# threshold limits the number of items for each type to make the size more +# managable. Set this to 0 for no limit. Note that the threshold may be +# exceeded by 50% before the limit is enforced. + +UML_LIMIT_NUM_FIELDS = 10 + # If set to YES, the inheritance and collaboration graphs will show the # relations between templates and their instances. @@ -1609,7 +1693,7 @@ INCLUDE_GRAPH = YES # documented header file showing the documented files that directly or # indirectly include this file. -INCLUDED_BY_GRAPH = NO +INCLUDED_BY_GRAPH = YES # If the CALL_GRAPH and HAVE_DOT options are set to YES then # doxygen will generate a call dependency graph for every global function @@ -1630,9 +1714,9 @@ CALLER_GRAPH = NO # If the GRAPHICAL_HIERARCHY and HAVE_DOT tags are set to YES then doxygen # will generate a graphical hierarchy of all classes instead of a textual one. -GRAPHICAL_HIERARCHY = NO +GRAPHICAL_HIERARCHY = YES -# If the DIRECTORY_GRAPH, SHOW_DIRECTORIES and HAVE_DOT tags are set to YES +# If the DIRECTORY_GRAPH and HAVE_DOT tags are set to YES # then doxygen will show the dependencies a directory has on other directories # in a graphical way. The dependency relations are determined by the #include # relations between the files in the directories. @@ -1640,11 +1724,22 @@ GRAPHICAL_HIERARCHY = NO DIRECTORY_GRAPH = YES # The DOT_IMAGE_FORMAT tag can be used to set the image format of the images -# generated by dot. Possible values are png, svg, gif or svg. -# If left blank png will be used. +# generated by dot. Possible values are svg, png, jpg, or gif. +# If left blank png will be used. If you choose svg you need to set +# HTML_FILE_EXTENSION to xhtml in order to make the SVG files +# visible in IE 9+ (other browsers do not have this requirement). DOT_IMAGE_FORMAT = png +# If DOT_IMAGE_FORMAT is set to svg, then this option can be set to YES to +# enable generation of interactive SVG images that allow zooming and panning. +# Note that this requires a modern browser other than Internet Explorer. +# Tested and working are Firefox, Chrome, Safari, and Opera. For IE 9+ you +# need to set HTML_FILE_EXTENSION to xhtml in order to make the SVG files +# visible. Older versions of IE do not have SVG support. + +INTERACTIVE_SVG = NO + # The tag DOT_PATH can be used to specify the path where the dot tool can be # found. If left blank, it is assumed the dot tool can be found in the path. diff --git a/Makefile.example b/Makefile.example index e5c6d694..855559cc 100644 --- a/Makefile.example +++ b/Makefile.example @@ -1,48 +1,64 @@ -TOPDIR = $(shell pwd) +TOPDIR = $(shell pwd) ARCH = x86 +# For 64bit support, you have define BIT as 64 +BIT=32 NAME = metalsvm LWIPDIRS = lwip/src/arch lwip/src/api lwip/src/core lwip/src/core/ipv4 lwip/src/netif DRIVERDIRS = drivers/net drivers/char KERNDIRS = libkern kernel mm fs apps arch/$(ARCH)/kernel arch/$(ARCH)/mm arch/$(ARCH)/scc $(LWIPDIRS) $(DRIVERDIRS) SUBDIRS = $(KERNDIRS) -STACKPROT=-fno-stack-protector +STACKPROT = -fno-stack-protector # Set your own cross compiler tool chain prefix here -CROSSCOMPREFIX= +CROSSCOMPREFIX = # Uncomment both lines if compiling for the SCC! -#CROSSCOMPREFIX=i386-unknown-linux-gnu- -#STACKPROT= +#CROSSCOMPREFIX = i386-unknown-linux-gnu- +#STACKPROT = -CC_FOR_TARGET=$(CROSSCOMPREFIX)gcc -CXX_FOR_TARGET=$(CROSSCOMPREFIX)g++ -GCC_FOR_TARGET=$(CROSSCOMPREFIX)gcc -AR_FOR_TARGET=$(CROSSCOMPREFIX)ar -AS_FOR_TARGET=$(CROSSCOMPREFIX)as -LD_FOR_TARGET=$(CROSSCOMPREFIX)ld -NM_FOR_TARGET=$(CROSSCOMPREFIX)nm -OBJDUMP_FOR_TARGET=$(CROSSCOMPREFIX)objdump -OBJCOPY_FOR_TARGET=$(CROSSCOMPREFIX)objcopy -RANLIB_FOR_TARGET=$(CROSSCOMPREFIX)ranlib -STRIP_FOR_TARGET=$(CROSSCOMPREFIX)strip -READELF_FOR_TARGET=$(CROSSCOMPREFIX)readelf - -NASM = nasm -EMU=qemu -GDB=gdb +CC_FOR_TARGET = $(CROSSCOMPREFIX)gcc +CXX_FOR_TARGET = $(CROSSCOMPREFIX)g++ +GCC_FOR_TARGET = $(CROSSCOMPREFIX)gcc +CPP_FOR_TARGET = $(CROSSCOMPREFIX)cpp +AR_FOR_TARGET = $(CROSSCOMPREFIX)ar +AS_FOR_TARGET = $(CROSSCOMPREFIX)as +LD_FOR_TARGET = $(CROSSCOMPREFIX)ld +NM_FOR_TARGET = $(CROSSCOMPREFIX)nm +OBJDUMP_FOR_TARGET = $(CROSSCOMPREFIX)objdump +OBJCOPY_FOR_TARGET = $(CROSSCOMPREFIX)objcopy +RANLIB_FOR_TARGET = $(CROSSCOMPREFIX)ranlib +STRIP_FOR_TARGET = $(CROSSCOMPREFIX)strip +READELF_FOR_TARGET = $(CROSSCOMPREFIX)readelf MAKE = make -NASMFLAGS = -felf32 -g -INCLUDE = -I$(TOPDIR)/include -I$(TOPDIR)/arch/$(ARCH)/include -I$(TOPDIR)/lwip/src/include -I$(TOPDIR)/lwip/src/include/ipv4 -I$(TOPDIR)/drivers -# Compiler options for final code -CFLAGS = -g -m32 -march=i586 -Wall -O2 -fno-builtin -fstrength-reduce -fomit-frame-pointer -finline-functions -nostdinc $(INCLUDE) $(STACKPROT) -# Compiler options for debuuging -#CFLAGS = -g -O -m32 -march=i586 -Wall -fno-builtin -DWITH_FRAME_POINTER -nostdinc $(INCLUDE) $(STACKPROT) -ARFLAGS = rsv RM = rm -rf -LDFLAGS = -T link.ld -z max-page-size=4096 --defsym __BUILD_DATE=$(shell date +'%Y%m%d') --defsym __BUILD_TIME=$(shell date +'%H%M%S') +NASM = nasm +# For 64bit code, you have to use qemu-system-x86_64 +QEMU = qemu-system-i386 +GDB = gdb + +# For 64bit support, you have to define -felf64 instead of -felf32 +NASMFLAGS = -felf32 -g -i$(TOPDIR)/include/metalsvm/ +INCLUDE = -I$(TOPDIR)/include -I$(TOPDIR)/arch/$(ARCH)/include -I$(TOPDIR)/lwip/src/include -I$(TOPDIR)/lwip/src/include/ipv4 -I$(TOPDIR)/drivers +# For 64bit support, you have to define "-m64 -mno-red-zone" instead of "-m32 -march=i586" +# Compiler options for final code +CFLAGS = -g -m32 -march=i586 -Wall -O2 -fstrength-reduce -fomit-frame-pointer -finline-functions -ffreestanding $(INCLUDE) $(STACKPROT) +# Compiler options for debuging +#CFLAGS = -g -O -m32 -march=i586 -Wall -fomit-frame-pointer -ffreestanding $(INCLUDE) $(STACKPROT) +ARFLAGS = rsv +LDFLAGS = -T link$(BIT).ld -z max-page-size=4096 --defsym __BUILD_DATE=$(shell date +'%Y%m%d') --defsym __BUILD_TIME=$(shell date +'%H%M%S') STRIP_DEBUG = --strip-debug KEEP_DEBUG = --only-keep-debug +OUTPUT_FORMAT = -O elf32-i386 +# For 64bit support, you have to define -m64 instead of "-m32 -march=i586" +CFLAGS_FOR_NEWLIB = -m32 -march=i586 -O2 $(STACKPROT) +# For 64bit support, you have to define -m64 instead of "-m32 -march=i586" +LDFLAGS_FOR_NEWLIB = -m32 -march=i586 +# For 64bit support, you have to define -m64 instead of "-m32" +CFLAGS_FOR_TOOLS = -m32 -O2 -Wall +LDFLAGS_FOR_TOOLS = +# For 64bit support, you have to define -felf64 instead of -felf32 +NASMFLAGS_FOR_NEWLIB = -felf32 # Prettify output V = 0 @@ -56,7 +72,7 @@ default: all all: newlib tools $(NAME).elf newlib: - $(MAKE) ARCH=$(ARCH) LDFLAGS="-m32" CFLAGS="-m32 -O2 -march=i586 $(STACKPROT)" NASMFLAGS="$(NASMFLAGS)" CC_FOR_TARGET=$(CC_FOR_TARGET) \ + $(MAKE) ARCH=$(ARCH) BIT=$(BIT) LDFLAGS="$(LDFLAGS_FOR_NEWLIB)" CFLAGS="$(CFLAGS_FOR_NEWLIB)" NASMFLAGS="$(NASMFLAGS_FOR_NEWLIB)" CC_FOR_TARGET=$(CC_FOR_TARGET) \ CXX_FOR_TARGET=$(CXX_FOR_TARGET) \ GCC_FOR_TARGET=$(GCC_FOR_TARGET) \ AR_FOR_TARGET=$(AR_FOR_TARGET) \ @@ -68,21 +84,22 @@ newlib: RANLIB_FOR_TARGET=$(RANLIB_FOR_TARGET) \ STRIP_FOR_TARGET=$(STRIP_FOR_TARGET) \ READELF_FOR_TARGET=$(READELF_FOR_TARGET) -C newlib + tools: - $(MAKE) -C tools + $(MAKE) CFLAGS="$(CFLAGS_FOR_TOOLS)" LDFLAGS="$(LDFLAGS_FOR_TOOLS)" -C tools $(NAME).elf: $Q$(LD_FOR_TARGET) $(LDFLAGS) -o $(NAME).elf $^ @echo [OBJCOPY] $(NAME).sym $Q$(OBJCOPY_FOR_TARGET) $(KEEP_DEBUG) $(NAME).elf $(NAME).sym @echo [OBJCOPY] $(NAME).elf - $Q$(OBJCOPY_FOR_TARGET) $(STRIP_DEBUG) $(NAME).elf + $Q$(OBJCOPY_FOR_TARGET) $(STRIP_DEBUG) $(OUTPUT_FORMAT) $(NAME).elf qemu: newlib tools $(NAME).elf - qemu -monitor stdio -smp 2 -net nic,model=rtl8139 -net user,hostfwd=tcp::12345-:4711 -net dump -kernel metalsvm.elf -initrd tools/initrd.img + $(QEMU) -monitor stdio -smp 2 -net nic,model=rtl8139 -net user,hostfwd=tcp::12345-:4711 -net dump -kernel metalsvm.elf -initrd tools/initrd.img qemudbg: newlib tools $(NAME).elf - qemu -S -s -smp 2 -net nic,model=rtl8139 -net user,hostfwd=tcp::12345-:4711 -net dump -kernel metalsvm.elf -initrd tools/initrd.img + $(QEMU) -s -S -smp 2 -net nic,model=rtl8139 -net user,hostfwd=tcp::12345-:4711 -net dump -kernel metalsvm.elf -initrd tools/initrd.img gdb: $(NAME).elf make qemudbg > /dev/null & @@ -104,10 +121,15 @@ veryclean: clean @echo [CC] $@ $Q$(CC_FOR_TARGET) -c -D__KERNEL__ $(CFLAGS) -o $@ $< @echo [DEP] $*.dep - $Q$(CC_FOR_TARGET) -MF $*.dep -MT $*.o -MM $(CFLAGS) $< + $Q$(CPP_FOR_TARGET) -MF $*.dep -MT $*.o -MM -D__KERNEL__ $(CFLAGS) $< +include/metalsvm/config.inc: include/metalsvm/config.h + @echo "; This file is generated automatically from the config.h file." > include/metalsvm/config.inc + @echo "; Before editing this, you should consider editing config.h." >> include/metalsvm/config.inc + @awk '/^#define MAX_CORES/{ print "%define MAX_CORES", $$3 }' include/metalsvm/config.h >> include/metalsvm/config.inc + @awk '/^#define KERNEL_STACK_SIZE/{ print "%define KERNEL_STACK_SIZE", $$3 }' include/metalsvm/config.h >> include/metalsvm/config.inc -%.o : %.asm +%.o : %.asm include/metalsvm/config.inc @echo [ASM] $@ $Q$(NASM) $(NASMFLAGS) -o $@ $< diff --git a/Makefile.scc b/Makefile.scc new file mode 100644 index 00000000..7e466274 --- /dev/null +++ b/Makefile.scc @@ -0,0 +1,138 @@ +TOPDIR = $(shell pwd) +ARCH = x86 +# For 64bit support, you have define BIT as 64 +BIT=32 +NAME = metalsvm +LWIPDIRS = lwip/src/arch lwip/src/api lwip/src/core lwip/src/core/ipv4 lwip/src/netif +DRIVERDIRS = drivers/net drivers/char +KERNDIRS = libkern kernel mm fs apps arch/$(ARCH)/kernel arch/$(ARCH)/mm arch/$(ARCH)/scc $(LWIPDIRS) $(DRIVERDIRS) +SUBDIRS = $(KERNDIRS) +STACKPROT = -fno-stack-protector + +# Set your own cross compiler tool chain prefix here +CROSSCOMPREFIX = + +# Uncomment both lines if compiling for the SCC! +CROSSCOMPREFIX = i386-unknown-linux-gnu- +STACKPROT = + +CC_FOR_TARGET = $(CROSSCOMPREFIX)gcc +CXX_FOR_TARGET = $(CROSSCOMPREFIX)g++ +GCC_FOR_TARGET = $(CROSSCOMPREFIX)gcc +CPP_FOR_TARGET = $(CROSSCOMPREFIX)cpp +AR_FOR_TARGET = $(CROSSCOMPREFIX)ar +AS_FOR_TARGET = $(CROSSCOMPREFIX)as +LD_FOR_TARGET = $(CROSSCOMPREFIX)ld +NM_FOR_TARGET = $(CROSSCOMPREFIX)nm +OBJDUMP_FOR_TARGET = $(CROSSCOMPREFIX)objdump +OBJCOPY_FOR_TARGET = $(CROSSCOMPREFIX)objcopy +RANLIB_FOR_TARGET = $(CROSSCOMPREFIX)ranlib +STRIP_FOR_TARGET = $(CROSSCOMPREFIX)strip +READELF_FOR_TARGET = $(CROSSCOMPREFIX)readelf + +MAKE = make +RM = rm -rf +NASM = nasm +# For 64bit code, you have to use qemu-system-x86_64 +QEMU = qemu-system-i386 +GDB = gdb + +# For 64bit support, you have to define -felf64 instead of -felf32 +NASMFLAGS = -felf32 -g -i$(TOPDIR)/include/metalsvm/ +INCLUDE = -I$(TOPDIR)/include -I$(TOPDIR)/arch/$(ARCH)/include -I$(TOPDIR)/lwip/src/include -I$(TOPDIR)/lwip/src/include/ipv4 -I$(TOPDIR)/drivers +# For 64bit support, you have to define "-m64 -mno-red-zone" instead of "-m32 -march=i586" +# Compiler options for final code +CFLAGS = -g -m32 -march=i586 -Wall -O2 -fstrength-reduce -fomit-frame-pointer -finline-functions -ffreestanding $(INCLUDE) $(STACKPROT) +# Compiler options for debuging +#CFLAGS = -g -O -m32 -march=i586 -Wall -fomit-frame-pointer -ffreestanding $(INCLUDE) $(STACKPROT) +ARFLAGS = rsv +LDFLAGS = -T link$(BIT).ld -z max-page-size=4096 --defsym __BUILD_DATE=$(shell date +'%Y%m%d') --defsym __BUILD_TIME=$(shell date +'%H%M%S') +STRIP_DEBUG = --strip-debug +KEEP_DEBUG = --only-keep-debug +OUTPUT_FORMAT = -O elf32-i386 +# For 64bit support, you have to define -m64 instead of "-m32 -march=i586" +CFLAGS_FOR_NEWLIB = -m32 -march=i586 -O2 $(STACKPROT) +# For 64bit support, you have to define -m64 instead of "-m32 -march=i586" +LDFLAGS_FOR_NEWLIB = -m32 -march=i586 +# For 64bit support, you have to define -m64 instead of "-m32" +CFLAGS_FOR_TOOLS = -m32 -O2 -Wall +LDFLAGS_FOR_TOOLS = +# For 64bit support, you have to define -felf64 instead of -felf32 +NASMFLAGS_FOR_NEWLIB = -felf32 + +# Prettify output +V = 0 +ifeq ($V,0) + Q = @ + P = > /dev/null +endif + +default: all + +all: newlib tools $(NAME).elf + +newlib: + $(MAKE) ARCH=$(ARCH) BIT=$(BIT) LDFLAGS="$(LDFLAGS_FOR_NEWLIB)" CFLAGS="$(CFLAGS_FOR_NEWLIB)" NASMFLAGS="$(NASMFLAGS_FOR_NEWLIB)" CC_FOR_TARGET=$(CC_FOR_TARGET) \ + CXX_FOR_TARGET=$(CXX_FOR_TARGET) \ + GCC_FOR_TARGET=$(GCC_FOR_TARGET) \ + AR_FOR_TARGET=$(AR_FOR_TARGET) \ + AS_FOR_TARGET=$(AS_FOR_TARGET) \ + LD_FOR_TARGET=$(LD_FOR_TARGET) \ + NM_FOR_TARGET=$(NM_FOR_TARGET) \ + OBJDUMP_FOR_TARGET=$(OBJDUMP_FOR_TARGET) \ + OBJCOPY_FOR_TARGET=$(OBJCOPY_FOR_TARGET) \ + RANLIB_FOR_TARGET=$(RANLIB_FOR_TARGET) \ + STRIP_FOR_TARGET=$(STRIP_FOR_TARGET) \ + READELF_FOR_TARGET=$(READELF_FOR_TARGET) -C newlib + +tools: + $(MAKE) CFLAGS="$(CFLAGS_FOR_TOOLS)" LDFLAGS="$(LDFLAGS_FOR_TOOLS)" -C tools + +$(NAME).elf: + $Q$(LD_FOR_TARGET) $(LDFLAGS) -o $(NAME).elf $^ + @echo [OBJCOPY] $(NAME).sym + $Q$(OBJCOPY_FOR_TARGET) $(KEEP_DEBUG) $(NAME).elf $(NAME).sym + @echo [OBJCOPY] $(NAME).elf + $Q$(OBJCOPY_FOR_TARGET) $(STRIP_DEBUG) $(OUTPUT_FORMAT) $(NAME).elf + +qemu: newlib tools $(NAME).elf + $(QEMU) -monitor stdio -smp 2 -net nic,model=rtl8139 -net user,hostfwd=tcp::12345-:4711 -net dump -kernel metalsvm.elf -initrd tools/initrd.img + +qemudbg: newlib tools $(NAME).elf + $(QEMU) -s -S -smp 2 -net nic,model=rtl8139 -net user,hostfwd=tcp::12345-:4711 -net dump -kernel metalsvm.elf -initrd tools/initrd.img + +gdb: $(NAME).elf + make qemudbg > /dev/null & + $(GDB) -x script.gdb + +clean: + $Q$(RM) $(NAME).elf $(NAME).sym *~ + $Q$(MAKE) -C tools clean + @echo Cleaned. + +veryclean: clean + $Q$(MAKE) -C newlib veryclean + @echo Very cleaned + +#depend: +# for i in $(SUBDIRS); do $(MAKE) -k -C $$i depend; done + +%.o : %.c + @echo [CC] $@ + $Q$(CC_FOR_TARGET) -c -D__KERNEL__ $(CFLAGS) -o $@ $< + @echo [DEP] $*.dep + $Q$(CPP_FOR_TARGET) -MF $*.dep -MT $*.o -MM -D__KERNEL__ $(CFLAGS) $< + +include/metalsvm/config.inc: include/metalsvm/config.h + @echo "; This file is generated automatically from the config.h file." > include/metalsvm/config.inc + @echo "; Before editing this, you should consider editing config.h." >> include/metalsvm/config.inc + @awk '/^#define MAX_CORES/{ print "%define MAX_CORES", $$3 }' include/metalsvm/config.h >> include/metalsvm/config.inc + @awk '/^#define KERNEL_STACK_SIZE/{ print "%define KERNEL_STACK_SIZE", $$3 }' include/metalsvm/config.h >> include/metalsvm/config.inc + +%.o : %.asm include/metalsvm/config.inc + @echo [ASM] $@ + $Q$(NASM) $(NASMFLAGS) -o $@ $< + +.PHONY: default all clean emu gdb newlib tools + +include $(addsuffix /Makefile,$(SUBDIRS)) diff --git a/apps/Makefile b/apps/Makefile index e45d1a38..9dbcce1f 100644 --- a/apps/Makefile +++ b/apps/Makefile @@ -1,4 +1,4 @@ -C_source := tests.c echo.c netio.c laplace.c gfx_client.c gfx_generic.c +C_source := tests.c echo.c netio.c jacobi.c laplace.c gfx_client.c gfx_generic.c MODULE := apps include $(TOPDIR)/Makefile.inc diff --git a/apps/echo.c b/apps/echo.c index db5abdcb..0aa62ad4 100644 --- a/apps/echo.c +++ b/apps/echo.c @@ -32,7 +32,9 @@ #include -#ifdef CONFIG_LWIP +#include "tests.h" + +#if defined(CONFIG_LWIP) && defined(START_ECHO) #include @@ -101,6 +103,299 @@ echo_init(void) } /*-----------------------------------------------------------------------------------*/ +#else + +#include +#include +#include + +/* + * TCP echo server example using raw API. + * + * Echos all bytes sent by connecting client, + * and passively closes when client is done. + * + */ + + +static struct tcp_pcb *echo_pcb; + +enum echo_states +{ + ES_NONE = 0, + ES_ACCEPTED, + ES_RECEIVED, + ES_CLOSING +}; + +struct echo_state +{ + u8_t state; + u8_t retries; + struct tcp_pcb *pcb; + /* pbuf (chain) to recycle */ + struct pbuf *p; +}; + +static err_t echo_accept(void *arg, struct tcp_pcb *newpcb, err_t err); +static err_t echo_recv(void *arg, struct tcp_pcb *tpcb, struct pbuf *p, err_t err); +static void echo_error(void *arg, err_t err); +static err_t echo_poll(void *arg, struct tcp_pcb *tpcb); +static err_t echo_sent(void *arg, struct tcp_pcb *tpcb, u16_t len); +static void echo_send(struct tcp_pcb *tpcb, struct echo_state *es); +static void echo_close(struct tcp_pcb *tpcb, struct echo_state *es); + +void +echo_init(void) +{ + echo_pcb = tcp_new(); + if (echo_pcb != NULL) + { + err_t err; + + err = tcp_bind(echo_pcb, IP_ADDR_ANY, 7); + if (err == ERR_OK) + { + echo_pcb = tcp_listen(echo_pcb); + tcp_accept(echo_pcb, echo_accept); + } else { + /* abort? output diagnostic? */ + } + } else { + /* abort? output diagnostic? */ + } +} + +static err_t +echo_accept(void *arg, struct tcp_pcb *newpcb, err_t err) +{ + err_t ret_err; + struct echo_state *es; + + LWIP_UNUSED_ARG(arg); + LWIP_UNUSED_ARG(err); + + /* commonly observed practive to call tcp_setprio(), why? */ + tcp_setprio(newpcb, TCP_PRIO_MIN); + + es = (struct echo_state *)mem_malloc(sizeof(struct echo_state)); + if (es != NULL) + { + es->state = ES_ACCEPTED; + es->pcb = newpcb; + es->retries = 0; + es->p = NULL; + /* pass newly allocated es to our callbacks */ + tcp_arg(newpcb, es); + tcp_recv(newpcb, echo_recv); + tcp_err(newpcb, echo_error); + tcp_poll(newpcb, echo_poll, 0); + ret_err = ERR_OK; + } else { + ret_err = ERR_MEM; + } + return ret_err; +} + +static err_t +echo_recv(void *arg, struct tcp_pcb *tpcb, struct pbuf *p, err_t err) +{ + struct echo_state *es; + err_t ret_err; + + LWIP_ASSERT("arg != NULL",arg != NULL); + es = (struct echo_state *)arg; + if (p == NULL) + { + /* remote host closed connection */ + es->state = ES_CLOSING; + if(es->p == NULL) + { + /* we're done sending, close it */ + echo_close(tpcb, es); + } else { + /* we're not done yet */ + tcp_sent(tpcb, echo_sent); + echo_send(tpcb, es); + } + ret_err = ERR_OK; + } else if(err != ERR_OK) { + /* cleanup, for unkown reason */ + if (p != NULL) + { + es->p = NULL; + pbuf_free(p); + } + ret_err = err; + } else if(es->state == ES_ACCEPTED) { + /* first data chunk in p->payload */ + es->state = ES_RECEIVED; + /* store reference to incoming pbuf (chain) */ + es->p = p; + /* install send completion notifier */ + tcp_sent(tpcb, echo_sent); + echo_send(tpcb, es); + ret_err = ERR_OK; + } else if (es->state == ES_RECEIVED) { + /* read some more data */ + if(es->p == NULL) + { + es->p = p; + tcp_sent(tpcb, echo_sent); + echo_send(tpcb, es); + } else { + struct pbuf *ptr; + + /* chain pbufs to the end of what we recv'ed previously */ + ptr = es->p; + pbuf_chain(ptr,p); + } + ret_err = ERR_OK; + } else if(es->state == ES_CLOSING) { + /* odd case, remote side closing twice, trash data */ + tcp_recved(tpcb, p->tot_len); + es->p = NULL; + pbuf_free(p); + ret_err = ERR_OK; + } else { + /* unkown es->state, trash data */ + tcp_recved(tpcb, p->tot_len); + es->p = NULL; + pbuf_free(p); + ret_err = ERR_OK; + } + return ret_err; +} + +static void +echo_error(void *arg, err_t err) +{ + struct echo_state *es; + + LWIP_UNUSED_ARG(err); + + es = (struct echo_state *)arg; + if (es != NULL) + { + mem_free(es); + } +} + +static err_t +echo_poll(void *arg, struct tcp_pcb *tpcb) +{ + err_t ret_err; + struct echo_state *es; + + es = (struct echo_state *)arg; + if (es != NULL) + { + if (es->p != NULL) + { + /* there is a remaining pbuf (chain) */ + tcp_sent(tpcb, echo_sent); + echo_send(tpcb, es); + } else { + /* no remaining pbuf (chain) */ + if(es->state == ES_CLOSING) + { + echo_close(tpcb, es); + } + } + ret_err = ERR_OK; + } else { + /* nothing to be done */ + tcp_abort(tpcb); + ret_err = ERR_ABRT; + } + return ret_err; +} + +static err_t +echo_sent(void *arg, struct tcp_pcb *tpcb, u16_t len) +{ + struct echo_state *es; + + LWIP_UNUSED_ARG(len); + + es = (struct echo_state *)arg; + es->retries = 0; + + if(es->p != NULL) + { + /* still got pbufs to send */ + tcp_sent(tpcb, echo_sent); + echo_send(tpcb, es); + } else { + /* no more pbufs to send */ + if(es->state == ES_CLOSING) + { + echo_close(tpcb, es); + } + } + return ERR_OK; +} + +static void +echo_send(struct tcp_pcb *tpcb, struct echo_state *es) +{ + struct pbuf *ptr; + err_t wr_err = ERR_OK; + + while ((wr_err == ERR_OK) && + (es->p != NULL) && + (es->p->len <= tcp_sndbuf(tpcb))) + { + ptr = es->p; + + /* enqueue data for transmission */ + wr_err = tcp_write(tpcb, ptr->payload, ptr->len, 1); + if (wr_err == ERR_OK) + { + u16_t plen; + u8_t freed; + + plen = ptr->len; + /* continue with next pbuf in chain (if any) */ + es->p = ptr->next; + if(es->p != NULL) + { + /* new reference! */ + pbuf_ref(es->p); + } + /* chop first pbuf from chain */ + do + { + /* try hard to free pbuf */ + freed = pbuf_free(ptr); + } while(freed == 0); + /* we can read more data now */ + tcp_recved(tpcb, plen); + } else if(wr_err == ERR_MEM) { + /* we are low on memory, try later / harder, defer to poll */ + es->p = ptr; + } else { + /* other problem ?? */ + } + } +} + +static void +echo_close(struct tcp_pcb *tpcb, struct echo_state *es) +{ + tcp_arg(tpcb, NULL); + tcp_sent(tpcb, NULL); + tcp_recv(tpcb, NULL); + tcp_err(tpcb, NULL); + tcp_poll(tpcb, NULL, 0); + + if (es != NULL) + { + mem_free(es); + } + tcp_close(tpcb); +} + #endif /* LWIP_NETCONN */ -#endif +#endif /* CONFIG_LWIP */ diff --git a/apps/gfx_client.c b/apps/gfx_client.c index 5614849d..c07026a4 100644 --- a/apps/gfx_client.c +++ b/apps/gfx_client.c @@ -19,11 +19,13 @@ #include -#ifdef CONFIG_LWIP +#if defined(CONFIG_LWIP) && LWIP_SOCKET #include "gfx_client.h" #include +#ifdef CONFIG_GFX + static int myrank; static int sockfd; @@ -38,7 +40,9 @@ int gfx_init(char* ip_str, char* port_str, int rank) { char* hostname; int port; struct sockaddr_in serveraddr; +#if USE_GETHOSTBYNAME struct hostent *server; +#endif //*pargc -=2; myrank = rank; @@ -144,3 +148,5 @@ int gfx_finalize(){ } #endif + +#endif diff --git a/apps/gfx_client.h b/apps/gfx_client.h index c36743c7..77da7c6c 100644 --- a/apps/gfx_client.h +++ b/apps/gfx_client.h @@ -23,8 +23,10 @@ #include #include #include +#include "tests.h" -#ifdef CONFIG_LWIP + +#if defined(CONFIG_GFX) && defined(CONFIG_LWIP) && LWIP_SOCKET #define BUFSIZE 1024 diff --git a/apps/gfx_generic.c b/apps/gfx_generic.c index 63b6e570..69ef0ece 100644 --- a/apps/gfx_generic.c +++ b/apps/gfx_generic.c @@ -20,22 +20,7 @@ #include "gfx_client.h" #include "gfx_generic.h" -#ifdef CONFIG_LWIP - -int GFX_init(int* pargc, char*** pargv, int rank) -{ - return gfx_init(pargc, pargv, rank); -} - -int GFX_send(char* buf, int size, int tag) -{ - return gfx_send(buf, size, tag); -} - -int GFX_finalize() -{ - return gfx_finalize(); -} +#if defined(CONFIG_GFX) && defined(CONFIG_LWIP) && LWIP_SOCKET int GFX_update() { diff --git a/apps/gfx_generic.h b/apps/gfx_generic.h index a2cb57a0..1b1ded4c 100644 --- a/apps/gfx_generic.h +++ b/apps/gfx_generic.h @@ -22,11 +22,11 @@ #include "gfx_client.h" -#ifdef CONFIG_LWIP +#if defined(CONFIG_LWIP) && defined(CONFIG_GFX) && LWIP_SOCKET -int GFX_init(int* pargc, char*** pargv, int rank); -int GFX_send(char* buf, int size, int tag); -int GFX_finalize(); +#define GFX_init(ip_str, port_str, rank) gfx_init(ip_str, port_str, rank) +#define GFX_send(buf, size, tag) gfx_send(buf, size, tag) +#define GFX_finalize() gfx_finalize() int GFX_update(); diff --git a/apps/jacobi.c b/apps/jacobi.c new file mode 100644 index 00000000..c111c495 --- /dev/null +++ b/apps/jacobi.c @@ -0,0 +1,374 @@ + +/* + * Copyright 2011 Stefan Lankes, Alexander Pilz, Maximilian Marx, Michael Ober, + * Chair for Operating Systems, RWTH Aachen University + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "tests.h" + +#ifdef START_KERNEL_JACOBI + +#define MATRIX_SIZE 256 +#define MAXVALUE 1337 +#define PAGE_SIZE 4096 +#define CACHE_SIZE (256*1024) +#define SIZE ((MATRIX_SIZE+1)*MATRIX_SIZE*sizeof(double)+2*MATRIX_SIZE*sizeof(double)+10*PAGE_SIZE+CACHE_SIZE) +#define ALIGN(x,a) (((x)+(a)-1)&~((a)-1)) +#define RAND_MAX 32767 + +//#define SVM_TYPE SVM_STRONG +#define SVM_TYPE SVM_LAZYRELEASE + +#define fabs(x) (x) >= 0 ? (x) : -1.0*(x) + +static unsigned int seed = 0; + +static int srand(unsigned int s) +{ + seed = s; + + return 0; +} + +/* Pseudo-random generator based on Minimal Standard by + Lewis, Goodman, and Miller in 1969. + + I[j+1] = a*I[j] (mod m) + + where a = 16807 + m = 2147483647 + + Using Schrage's algorithm, a*I[j] (mod m) can be rewritten as: + + a*(I[j] mod q) - r*{I[j]/q} if >= 0 + a*(I[j] mod q) - r*{I[j]/q} + m otherwise + + where: {} denotes integer division + q = {m/a} = 127773 + r = m (mod a) = 2836 + + note that the seed value of 0 cannot be used in the calculation as + it results in 0 itself +*/ + +static int rand(void) +{ + long k; + long s = (long)(seed); + if (s == 0) + s = 0x12345987; + k = s / 127773; + s = 16807 * (s - k * 127773) - 2836 * k; + if (s < 0) + s += 2147483647; + seed = (unsigned int)s; + return (int)(s & RAND_MAX); +} + +static inline void cache_invalidate(void) +{ + asm volatile ( ".byte 0x0f; .byte 0x0a;\n" ); // CL1FLUSHMB +} + +static int generate_empty_matrix(double*** A , unsigned int N, int rankID) { + unsigned int iCnt; + int i,j; + unsigned int iter_start, iter_end, pad; + int num = RCCE_NP; + + pad = N/num; + if (pad % 4) { + pad -= pad % 4; + + unsigned int p = (N - num * pad) / 4; + + if (rankID < p) { + iter_start = rankID*(pad+4); + iter_end = (rankID+1)*(pad+4); + } else { + iter_start = p*(pad+4)+(rankID-p)*pad; + iter_end = p*(pad+4)+(rankID+1-p)*pad; + } + } else { + iter_start = rankID*pad; + iter_end = (rankID+1)*pad; + } + kprintf("iter_start %d, iter_end %d\n", iter_start, iter_end); + + *A = (double**) kmalloc((N+1)*sizeof(double*)); + + if (*A == NULL) + return -2; /* Error */ + + svm_barrier(SVM_TYPE); + + **A = (double*) svm_malloc((N+1)*N*sizeof(double), SVM_TYPE); + + if (**A == NULL) + return -2; /* Error */ + + svm_barrier(SVM_TYPE); + + for(iCnt=1; iCnt Sum |A[i][j]| with (i != j) + */ + + (*A)[i][i] = sum + 2.0; + (*A)[i][N] += sum + 2.0; + } + + svm_flush(0); + svm_invalidate(); + } + + svm_barrier(SVM_TYPE); + + return 0; +} + +int jacobi(void* argv) +{ + volatile double* temp = NULL; + volatile double* swap; + unsigned int i, j, k, iter_start, iter_end, pad; + unsigned int iterations = 0; + double error, norm, max = 0.0; + double** A=0; + volatile double* X; + volatile double* X_old; + double xi; + uint64_t start, stop; + int rankID, num; + + rankID = RCCE_IAM; + num = RCCE_NP; + + if (generate_empty_matrix(&A,MATRIX_SIZE,rankID) < 0) + { + kprintf("generate_empty_matrix() failed...\n"); + return -1; + + } + + if (rankID == 0) + kprintf("generate_empty_matrix() done...\n"); + + svm_barrier(SVM_TYPE); + + X = (double*) svm_malloc(MATRIX_SIZE*sizeof(double), SVM_TYPE); + X_old = (double*) svm_malloc(MATRIX_SIZE*sizeof(double), SVM_TYPE); + + if (X == NULL || X_old == NULL) + { + kprintf("X or X_old is NULL...\n"); + return -1; + } + + temp = (double*) svm_malloc(PAGE_SIZE, SVM_LAZYRELEASE); + if (temp == NULL) + { + kprintf("temp is NULL...\n"); + return -1; + } + + if (rankID == 0) { + memset((void*)temp, 0x00, PAGE_SIZE); + for(i=0; i 1) && (rankID == 0)) { /* write always a complete cache line */ + memset((void*)temp, 0, CACHE_LINE); + svm_flush(0); + } + + svm_barrier(SVM_TYPE); + + for(i=iter_start; i 1) { + RCCE_acquire_lock(0); + svm_invalidate(); + norm += temp[0]; + temp[0] = norm; + svm_flush(0); + RCCE_release_lock(0); + + svm_barrier(SVM_LAZYRELEASE); + norm = temp[0]; + } +#endif + + /* check the break condition */ + norm /= (double) MATRIX_SIZE; + + if (norm < 0.0000001) + ; //break; + } else { + svm_barrier(SVM_TYPE); + } + + //if (k % 100 == 0) + // kprintf("k = %d\n", k); + } + + stop = rdtsc(); + + if (MATRIX_SIZE < 16) { + kprintf("Print the solution...\n"); + /* print solution */ + for(i=0; i 0.01) + kprintf("Result is on position %d wrong (%d/10000 != 1.0, error %d/10000)\n", i, (int) (10000.0*X[i]), (int) (10000.0*error)); + } + kprintf("maximal error is %d/10000\n", (int) (10000.0*max)); + + kprintf("\nmatrix size: %d x %d\n", MATRIX_SIZE, MATRIX_SIZE); + kprintf("number of iterations: %d\n", iterations); + kprintf("Calculation time: %llu ms (%llu ticks)\n", (stop-start)/(1000ULL*get_cpu_frequency()), stop-start); + + return 0; +} + +#endif diff --git a/apps/laplace.c b/apps/laplace.c index 91f9e407..83a5182f 100644 --- a/apps/laplace.c +++ b/apps/laplace.c @@ -22,7 +22,9 @@ #include #include -#ifdef CONFIG_ROCKCREEK +#include "tests.h" + +#ifdef START_KERNEL_LAPLACE #include #include @@ -49,17 +51,14 @@ #define TMAX (100*50) -//#define DATA unsigned int -#define DATA double +//#define DATA volatile unsigned int +#define DATA volatile double + //#define FIX 1024 #define FIX 1 -#define USE_STRONG 1 -#define USE_LAZYRELEASE 0 - -#if USE_STRONG && USE_LAZYRELEASE -#error Please, use only one memory model -#endif +#define SVM_TYPE SVM_STRONG +//#define SVM_TYPE SVM_LAZYRELEASE static inline double pow(double a, int b) { @@ -76,7 +75,6 @@ int laplace(void *arg) { //char* argv[] = {"/bin/laplace", "192.168.4.254", "12301", NULL}; //int argc = 3; - uint32_t flags; #ifdef _USE_GFX uint32_t ret; #endif @@ -89,23 +87,21 @@ int laplace(void *arg) int n; int m; - volatile DATA **NewValues; - volatile DATA **OldValues; + DATA **NewValues; + DATA **OldValues; - volatile DATA **tmp; + DATA **tmp; - volatile char **BufValues; + char **BufValues; uint64_t start, end; - flags = irq_nested_disable(); - my_rank = RCCE_ue(); - num_ranks = RCCE_num_ues(); - irq_nested_enable(flags); + my_rank = RCCE_IAM; + num_ranks = RCCE_NP; #ifdef _USE_GFX kprintf("Laplace calls gfx_init\n"); - ret = gfx_init("192.168.4.254" /*&argc */ , "5000" /*&argv */ , my_rank); + ret = GFX_init("192.168.4.254" /*&argc */ , "5000" /*&argv */ , my_rank); kprintf("gfx_init: %d\n", ret); #endif @@ -118,7 +114,7 @@ int laplace(void *arg) if (my_rank == num_ranks - 1) n += N % num_ranks; - kprintf("(%d) %d x %d / offsets: %d, %d / (%d x %d)\n", my_rank, N, M, I, J, n, m); + kprintf("(%d of %d) %d x %d / offsets: %d, %d / (%d x %d)\n", my_rank, num_ranks, N, M, I, J, n, m); #ifdef _USE_GFX if (my_rank == 0) { @@ -140,19 +136,15 @@ int laplace(void *arg) #endif NewValues = (volatile DATA **)kmalloc((N + 2) * sizeof(DATA *)); -#if USE_STRONG - NewValues[0] = (DATA *) svmmalloc((N + 2) * (M + 2) * sizeof(DATA), SVM_STRONG); -#elif USE_LAZYRELEASE - NewValues[0] = (DATA *) svmmalloc((N + 2) * (M + 2) * sizeof(DATA), SVM_LAZYRELEASE); +#ifdef SVM_TYPE + NewValues[0] = (DATA *) svm_malloc((N + 2) * (M + 2) * sizeof(DATA), SVM_TYPE); #else NewValues[0] = (DATA *) kmalloc((N + 2) * (M + 2) * sizeof(DATA)); #endif OldValues = (volatile DATA **)kmalloc((N + 2) * sizeof(DATA *)); -#if USE_STRONG - OldValues[0] = (DATA *) svmmalloc((N + 2) * (M + 2) * sizeof(DATA), SVM_STRONG); -#elif USE_LAZYRELEASE - OldValues[0] = (DATA *) svmmalloc((N + 2) * (M + 2) * sizeof(DATA), SVM_LAZYRELEASE); +#ifdef SVM_TYPE + OldValues[0] = (DATA *) svm_malloc((N + 2) * (M + 2) * sizeof(DATA), SVM_TYPE); #else OldValues[0] = (DATA *) kmalloc((N + 2) * (M + 2) * sizeof(DATA)); #endif @@ -162,14 +154,16 @@ int laplace(void *arg) OldValues[i] = OldValues[i - 1] + (M + 2); } - BufValues = (volatile char **)kmalloc((N) * sizeof(char *)); + BufValues = (char **)kmalloc((N) * sizeof(char *)); BufValues[0] = (char *)kmalloc((N) * (M) * sizeof(char)); for (i = 1; i < N; i++) { BufValues[i] = BufValues[i - 1] + (M); } - RCCE_barrier(&RCCE_COMM_WORLD); +#ifdef SVM_TYPE + svm_barrier(SVM_TYPE); +#endif kprintf("(%d) Memory allocated!\n", my_rank); @@ -178,46 +172,67 @@ int laplace(void *arg) int height = N + 2; int width = M + 2; - /*if (my_rank == 0) { - for (i = 0; i < N + 2; i++) { - for (j = 0; j < M + 2; j++) {*/ - { - for (i = I; i < I + n + 2; i++) { - for (j = 0; j < M + 2; j++) { + if (my_rank == 0) { + for (j = 0; j < m + 2; j++) { + double X = (((double)(J+j) / (double)width) * 5.0) - 2.5; + double Y = 0.0; + double Z = 0.0; - double X = (((double)j / (double)width) * 5.0) - 2.5; - double Y = (((double)i / (double)height) * 5.0) - 2.5; - double Z = 0.0; + Z = pow((4 - (X + 1) * (X + 1) - 4 * Y * Y), 2) + pow(1.2 * (1 - X), 3) - 10; + if (Z < 0.0) + Z = 1.0; + else if (Z > 0.0) + Z = 0.0; - Z = pow((4 - (X + 1) * (X + 1) - 4 * Y * Y), 2) + pow(1.2 * (1 - X), 3) - 10; - - if (Z < 0.0) - Z = 1.0; - else if (Z > 0.0) - Z = 0.0; - - OldValues[i][j] = NewValues[i][j] = (DATA) ((Z) * 255.0) * FIX; - - //if(NewValues[i][j] < 0) NewValues[i][j] = 0; - } + OldValues[0][J+j] = NewValues[0][J+j] = (DATA) ((Z) * 255.0) * FIX; } } -#if USE_LAZYRELEASE - svm_flush(); - svm_invalidate(); + for (i = 1; i < n+1; i++) { + for (j = 0; j < m + 2; j++) { + double X = (((double)(J+j) / (double)width) * 5.0) - 2.5; + double Y = (((double)(I+i) / (double)height) * 5.0) - 2.5; + double Z = 0.0; + + Z = pow((4 - (X + 1) * (X + 1) - 4 * Y * Y), 2) + pow(1.2 * (1 - X), 3) - 10; + if (Z < 0.0) + Z = 1.0; + else if (Z > 0.0) + Z = 0.0; + + OldValues[I+i][J+j] = NewValues[I+i][J+j] = (DATA) ((Z) * 255.0) * FIX; + } + } + + if (my_rank == num_ranks - 1) { + for (j = 0; j < m + 2; j++) { + double X = (((double)(J+j) / (double)width) * 5.0) - 2.5; + double Y = (((double)(I+n+1) / (double)height) * 5.0) - 2.5; + double Z = 0.0; + + Z = pow((4 - (X + 1) * (X + 1) - 4 * Y * Y), 2) + pow(1.2 * (1 - X), 3) - 10; + if (Z < 0.0) + Z = 1.0; + else if (Z > 0.0) + Z = 0.0; + + OldValues[I+n+1][J+j] = NewValues[I+n+1][J+j] = (DATA) ((Z) * 255.0) * FIX; + } + } + +#ifdef SVM_TYPE + svm_barrier(SVM_TYPE); #endif - RCCE_barrier(&RCCE_COMM_WORLD); kprintf("(%d) Arrays initialized!\n", my_rank); start = rdtsc(); - start = rdtsc(); // START ITERATIONS LOOP for (t = 0; t < TMAX; t++) { //kprintf("(%d): o:%u n:%u \n",my_rank,(unsigned int)(OldValues[I+1][J+1]), (unsigned int)(NewValues[I+1][J+1]) ); + //kprintf("(%d): t: %u\n", my_rank, t); // over all collumns for (i = 1; i < n + 1; i++) { @@ -230,16 +245,14 @@ int laplace(void *arg) OldValues[I + i][J + j + 1]) / 4; } } -#if USE_LAZYRELEASE - svm_flush(); - svm_invalidate(); -#endif tmp = NewValues; NewValues = OldValues; OldValues = tmp; - RCCE_barrier(&RCCE_COMM_WORLD); +#ifdef SVM_TYPE + svm_barrier(SVM_TYPE); +#endif #ifdef _USE_GFX if ((my_rank == 0) && (t % 50 == 0)) { @@ -263,18 +276,22 @@ int laplace(void *arg) GFX_update(); } - RCCE_barrier(&RCCE_COMM_WORLD); +#ifdef SVM_TYPE + svm_barrier(SVM_TYPE); +#endif #endif // END ITERATIONS LOOP } - RCCE_barrier(&RCCE_COMM_WORLD); +#ifdef SVM_TYPE + svm_barrier(SVM_TYPE); +#endif end = rdtsc(); kprintf("Calculation time: %llu ms (%llu ticks)\n", (end-start)/(1000ULL*get_cpu_frequency()), end-start); -#if USE_STRONG || USE_LAZYRELEASE +#ifdef SVM_TYPE svm_statistics(); #endif } diff --git a/apps/netio.c b/apps/netio.c index 9a6c374c..9649e665 100644 --- a/apps/netio.c +++ b/apps/netio.c @@ -24,6 +24,8 @@ #include #include +#include "tests.h" + #ifdef CONFIG_ROCKCREEK #include #include @@ -46,7 +48,7 @@ /* See http://www.nwlab.net/art/netio/netio.html to get the netio tool */ -#ifdef CONFIG_LWIP +#if defined(START_NETIO) && defined(CONFIG_LWIP) #ifdef CONFIG_ROCKCREEK #if USE_SOCKET_BYPASSING // for socket bypassing #include @@ -90,7 +92,7 @@ static struct in_addr addr_server; static int send_data(int socket, void *buffer, size_t size, int flags) { - int rc = send(socket, buffer, size, flags); + ssize_t rc = send(socket, buffer, size, flags); if (rc < 0) { @@ -106,7 +108,7 @@ static int send_data(int socket, void *buffer, size_t size, int flags) static int recv_data(int socket, void *buffer, size_t size, int flags) { - size_t rc = recv(socket, buffer, size, flags); + ssize_t rc = recv(socket, buffer, size, flags); if (rc < 0) { kprintf("recv failed: %d\n", errno); diff --git a/apps/tests.c b/apps/tests.c index d6b8a5eb..4164ec5b 100644 --- a/apps/tests.c +++ b/apps/tests.c @@ -26,6 +26,9 @@ #include #include #include +#ifdef CONFIG_LWIP +#include +#endif #ifdef CONFIG_ROCKCREEK #include #include @@ -36,22 +39,28 @@ #include #endif +#include "tests.h" + +int laplace(void* arg); +int jacobi(void* arg); +void echo_init(void); +void netio_init(void); + +#ifdef START_CONSUMER_PRODUCER static sem_t consuming, producing; static mailbox_int32_t mbox; static int val = 0; -int laplace(void* arg); - static int consumer(void* arg) { int i, m = 0; for(i=0; i<5; i++) { sem_wait(&consuming, 0); - kprintf("Consumer got %d\n", val); - val = 0; - sem_post(&producing); - } + kprintf("Consumer got %d\n", val); + val = 0; + sem_post(&producing); + } for(i=0; i<5; i++) { mailbox_int32_fetch(&mbox, &m, 0); @@ -80,7 +89,9 @@ static int producer(void* arg) return 0; } +#endif +#if defined(START_FOO) || defined(START_JOIN_TEST) static int foo(void* arg) { int i; @@ -89,32 +100,48 @@ static int foo(void* arg) return 0; for(i=0; i<5; i++) { - kprintf("%s\n", (char*) arg); + kprintf("Message from core %u: %s\n", CORE_ID, (char*) arg); sleep(1); } return 42; } +#endif -#ifdef CONFIG_ROCKCREEK +#ifdef START_MAIL_PING static int mail_ping(void* arg) { - //icc_mail_ping(); - icc_mail_ping_irq(); + int i; + + //for(i=0; i<5; ++i) + // icc_mail_ping(); + for(i=0; i<5; ++i) + icc_mail_ping_irq(); + //icc_mail_ping_jitter(); //icc_irq_ping(); + //icc_mail_datarates(); //icc_halt(); return 0; } +#endif - +#ifdef START_MAIL_NOISE static int mail_noise(void*arg) { icc_mail_noise(); // generate noise in the mesh return 0; } +#endif + +#ifdef START_SVM_TEST + +/* N has to be multiple of UEs */ #define N 1024 -//#define N 514 -#define LAZY +//#define N 512 +//#define N 128 + +//#define SVM_TYPE SVM_STRONG +#define SVM_TYPE SVM_LAZYRELEASE volatile static int* A[N]; volatile static int* B[N]; @@ -130,14 +157,19 @@ static int svm_test(void *arg) { uint64_t start, end; uint32_t i, j, k; + + uint32_t svm_flags; + int my_ue, num_ues; register int tmp; + kputs("Start SVM test...\n"); + RCCE_barrier(&RCCE_COMM_WORLD); my_ue = RCCE_ue(); num_ues = RCCE_num_ues(); -#if 0 +#if 1 if (!my_ue) { // allocate and initialize SVM region A[0] = (int*) kmalloc(3*N*N*sizeof(int)); @@ -182,13 +214,19 @@ static int svm_test(void *arg) #endif // allocate and initialize SVM region -#ifndef LAZY - A[0] = (int*) svmmalloc(3*N*N*sizeof(int), SVM_STRONG); -#else - A[0] = (int*) svmmalloc(3*N*N*sizeof(int), SVM_LAZYRELEASE); -#endif - if (!my_ue) + + svm_flags = SVM_TYPE; + if (svm_flags & SVM_LAZYRELEASE) + kputs("Use Lazy Release consistency!\n"); + else + kputs("Use Strong Release consistency!\n"); + + A[0] = (int*) svm_malloc(3*N*N*sizeof(int), svm_flags); + +#if 1 + if (!my_ue) memset((void*) A[0], 0x00, 3*N*N*sizeof(int)); +#endif // initialize matrices for(i=0; i 1) + return -1; + + if (svm_flags & SVM_LAZYRELEASE) + kputs("Use Lazy Release consistency!\n"); + else + kputs("Use Strong Release consistency!\n"); + svm_barrier(svm_flags); + + start = rdtsc(); + start = rdtsc(); + array = (volatile uint32_t*) svm_malloc(size, svm_flags); + end = rdtsc(); + + if (BUILTIN_EXPECT(!array, 0)) { + kprintf("Out of memory\n"); + return -1; + } + + kprintf("Time to allocate %u Bytes: %llu usec (%llu ticks)\n", size, (end-start)/get_cpu_frequency(), end-start); + + svm_barrier(svm_flags); + if (!RCCE_IAM) { + start = rdtsc(); + for(i=0; i> PAGE_SHIFT, (end-start)/get_cpu_frequency(), end-start); + } + + svm_barrier(svm_flags); + if (RCCE_IAM) { + start = rdtsc(); + for(i=0; i> PAGE_SHIFT, (end-start)/get_cpu_frequency(), end-start); + } + + svm_barrier(svm_flags); + if (!RCCE_IAM) { + start = rdtsc(); + for(i=0; i> PAGE_SHIFT, (end-start)/get_cpu_frequency(), end-start); + } + + svm_barrier(svm_flags); + start = rdtsc(); + change_page_permissions((size_t) array, size, VMA_CACHEABLE|VMA_READ); + end = rdtsc(); + kprintf("Time to change access permissions of %u page frames: %llu usec (%llu ticks)\n", size >> PAGE_SHIFT, (end-start)/get_cpu_frequency(), end-start); + + svm_barrier(svm_flags); + svm_free((void*) array, N*N*sizeof(uint32_t)); + + svm_statistics(); + + return 0; +} +#endif + +#ifdef START_JOIN_TEST static int join_test(void* arg) { tid_t id, ret; @@ -275,7 +399,9 @@ static int join_test(void* arg) return 0; } +#endif +#ifdef START_PI #ifndef M_PI #define M_PI 3.14159265358979323846264338327950288 /* pi */ #endif @@ -300,37 +426,153 @@ static int pi(void* arg) return 0; } +#endif + +#ifdef START_MEASURE_CTX_SWITCH +#define REPS 10000 + +volatile uint64_t t1, t2; +volatile int stop = !!0; +volatile int sid = 0; + +static int measure_ctx_switch(void* arg) +{ + int id = !!(int)arg; + int oid = !id; + uint64_t freq = get_cpu_frequency() *1000 *1000; + uint64_t diff, min = (uint64_t)-1, max = 0, avg = 0; + int i; + uint32_t a=0,b,c,d; + + // Size of a timeslice in ticks + uint64_t timeslice = freq / TIMER_FREQ; + + kprintf("ID: %d, ", id); + kprintf("Measuring SW task switching.\n"); + + for (i=0; i < REPS && stop == 0; i++) { + while(id == sid && stop == 0) { + t2 = rdtsc(); + cpuid(0,&a,&b,&c,&d); + } + + cpuid(0,&a,&b,&c,&d); + diff = rdtsc() -t2; + + // The last measurement is garbage + if (stop) break; + // The first ones are garbage, too + if (i < 5) goto next_try; + if (diff >= timeslice) { + i--; + goto next_try; + } + + kprintf("%i: diff= %llu, i= %i\n", id, diff, i); + if (diff > max) max = diff; + if (diff < min) min = diff; + avg += diff; + +next_try: + sid = id; + } + avg /= i-5; + + stop = 1; + + kprintf("maximum gap: %llu ticks\n", max); + kprintf("minimum gap: %llu ticks\n", min); + kprintf("average gap: %llu ticks\n", avg); + kprintf("Timeslice size: %llu ticks\n", timeslice); + + return 0; + } +#endif int test_init(void) { -// char* argv[] = {"/bin/mshell", NULL}; - char* argv[] = {"/bin/tests", NULL}; +#ifdef START_HELLO + char* hello_argv[] = {"/bin/hello", NULL}; +#endif +#ifdef START_TESTS + char* tests_argv[] = {"/bin/tests", NULL}; +#endif +#ifdef START_JACOBI + char* jacobi_argv[] = {"/bin/jacobi", NULL}; +#endif +#ifdef START_MMNIF_TEST char* server_argv[] = {"/bin/server", "6789", NULL}; char* client_argv[] = {"/bin/client", "192.168.0.1", "6789", NULL}; +#endif +#ifdef START_ECHO + echo_init(); +#endif +#ifdef START_NETIO + netio_init(); +#endif +#ifdef START_CONSUMER_PRODUCER sem_init(&producing, 1); sem_init(&consuming, 0); mailbox_int32_init(&mbox); + create_kernel_task(NULL, producer, NULL, NORMAL_PRIO); + create_kernel_task(NULL, consumer, NULL, NORMAL_PRIO); +#endif +#ifdef START_MEASURE_CTX_SWITCH + create_kernel_task(NULL, measure_ctx_switch, (int)0, NORMAL_PRIO); + create_kernel_task(NULL, measure_ctx_switch, (int)1, NORMAL_PRIO); +#endif +#ifdef START_FOO create_kernel_task(NULL, foo, "Hello from foo1", NORMAL_PRIO); + //create_kernel_task_on_core(NULL, foo, "Hello from foo2", NORMAL_PRIO, 1); +#endif +#ifdef START_JOIN_TEST create_kernel_task(NULL, join_test, NULL, NORMAL_PRIO); - //create_kernel_task(NULL, producer, , NORMAL_PRIO); - //create_kernel_task(NULL, consumer, NULL, NORMAL_PRIO); - //create_kernel_task(NULL, mail_ping, NULL, NORMAL_PRIO); - //create_kernel_task(NULL, mail_noise, NULL, NORMAL_PRIO); - //create_kernel_task(NULL, svm_test, NULL, NORMAL_PRIO); - //create_kernel_task(NULL, pi, NULL, NORMAL_PRIO); - //create_kernel_task(NULL, laplace, NULL, NORMAL_PRIO); - //create_user_task(NULL, "/bin/hello", argv); - create_user_task(NULL, "/bin/tests", argv); - //create_user_task(NULL, "/bin/jacobi", argv); - //create_user_task(NULL, "/bin/mshell", argv); - //create_user_task(NULL, "/bin/jacobi", argv); - /*create_user_task(NULL, "/bin/server", server_argv); - if (RCCE_ue() != 0) { +#endif +#ifdef START_MAIL_PING + create_kernel_task(NULL, mail_ping, NULL, NORMAL_PRIO); +#endif +#ifdef START_MAIL_NOISE + create_kernel_task(NULL, mail_noise, NULL, NORMAL_PRIO); +#endif +#ifdef START_SVM_TEST + create_kernel_task(NULL, svm_test, NULL, NORMAL_PRIO); +#endif +#ifdef START_SVM_BENCH + create_kernel_task(NULL, svm_bench, NULL, NORMAL_PRIO); +#endif +#ifdef START_PI + create_kernel_task(NULL, pi, NULL, NORMAL_PRIO); +#endif +#ifdef START_KERNEL_LAPLACE + create_kernel_task(NULL, laplace, NULL, NORMAL_PRIO); +#endif +#ifdef START_KERNEL_JACOBI + create_kernel_task(NULL, jacobi, NULL, NORMAL_PRIO); +#endif +#ifdef START_HELLO + create_user_task(NULL, "/bin/hello", hello_argv); +#endif +#ifdef START_TESTS + create_user_task(NULL, "/bin/tests", tests_argv); +#endif +#ifdef START_JACOBI + create_user_task(NULL, "/bin/jacobi", jacobi_argv); + //create_user_task_on_core(NULL, "/bin/jacobi", jacobi_argv, 1); +#endif +#ifdef START_MMNIF_TEST +#if defined(CONFIG_LWIP) && LWIP_SOCKET + if (RCCE_IAM == 0) { + kprintf("Start /bin/server...\n"); + create_user_task(NULL, "/bin/server", server_argv); + } else { sleep(5); + kprintf("Start /bin/client...\n"); create_user_task(NULL, "/bin/client", client_argv); - }*/ + } +#endif +#endif return 0; } diff --git a/apps/tests.h b/apps/tests.h new file mode 100644 index 00000000..fb27e1c8 --- /dev/null +++ b/apps/tests.h @@ -0,0 +1,53 @@ +/* + * Copyright 2010 Stefan Lankes, Chair for Operating Systems, + * RWTH Aachen University + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * This file is part of MetalSVM. + */ + +#ifndef CONFIG_TEST_H +#define CONFIG_TEST_H + +#include + +// define test applications, which will be started +#ifdef CONFIG_ROCKCREEK +//#define START_SVM_TEST +//#define START_SVM_BENCH +//#define START_MAIL_PING +//#define START_MAIL_NOISE +//#define START_KERNEL_LAPLACE +//#define START_KERNEL_JACOBI +#define START_MMNIF_TEST +#endif +#ifdef CONFIG_LWIP +#define START_ECHO +#ifndef CONFIG_TICKLESS +//#define START_NETIO +#endif +#endif +//#define START_CONSUMER_PRODUCER +#define START_FOO +//#define START_JOIN_TEST +//#define START_PI +//#define START_MEASURE_CTX_SWITCH +//#define START_HELLO +#define START_TESTS +//#define START_JACOBI + +// does our demos require GFX support? +//#define CONFIG_GFX + +#endif diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 017f782d..c4235f8b 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -193,6 +193,8 @@ uint32_t apic_cpu_id(void); int apic_calibration(void); int has_apic(void); int apic_is_enabled(void); +int apic_enable_timer(void); +int apic_disable_timer(void); int ioapic_inton(uint8_t irq, uint8_t apicid); int ioapic_intoff(uint8_t irq, uint8_t apicid); int map_apic(void); diff --git a/arch/x86/include/asm/gdt.h b/arch/x86/include/asm/gdt.h index fb4bd4c1..c86a5d4b 100644 --- a/arch/x86/include/asm/gdt.h +++ b/arch/x86/include/asm/gdt.h @@ -36,35 +36,37 @@ extern "C" { #endif /// This segment is a data segment -#define GDT_FLAG_DATASEG 0x02 +#define GDT_FLAG_DATASEG 0x02 /// This segment is a code segment -#define GDT_FLAG_CODESEG 0x0a -#define GDT_FLAG_TSS 0x09 +#define GDT_FLAG_CODESEG 0x0a +#define GDT_FLAG_TSS 0x09 #define GDT_FLAG_TSS_BUSY 0x02 -#define GDT_FLAG_SEGMENT 0x10 +#define GDT_FLAG_SEGMENT 0x10 /// Privilege level: Ring 0 -#define GDT_FLAG_RING0 0x00 +#define GDT_FLAG_RING0 0x00 /// Privilege level: Ring 1 -#define GDT_FLAG_RING1 0x20 +#define GDT_FLAG_RING1 0x20 /// Privilege level: Ring 2 -#define GDT_FLAG_RING2 0x40 +#define GDT_FLAG_RING2 0x40 /// Privilege level: Ring 3 -#define GDT_FLAG_RING3 0x60 +#define GDT_FLAG_RING3 0x60 /// Segment is present -#define GDT_FLAG_PRESENT 0x80 +#define GDT_FLAG_PRESENT 0x80 /** * @brief Granularity of segment limit * - set: segment limit unit is 4 KB (page size) * - not set: unit is bytes */ -#define GDT_FLAG_4K_GRAN 0x80 +#define GDT_FLAG_4K_GRAN 0x80 /** * @brief Default operand size * - set: 32 bit * - not set: 16 bit */ -#define GDT_FLAG_32_BIT 0x40 +#define GDT_FLAG_16_BIT 0x00 +#define GDT_FLAG_32_BIT 0x40 +#define GDT_FLAG_64_BIT 0x20 /** @brief Defines a GDT entry * @@ -74,17 +76,17 @@ extern "C" { */ typedef struct { /// Lower 16 bits of limit range - unsigned short limit_low; + uint16_t limit_low; /// Lower 16 bits of base address - unsigned short base_low; + uint16_t base_low; /// middle 8 bits of base address - unsigned char base_middle; + uint8_t base_middle; /// Access bits - unsigned char access; + uint8_t access; /// Granularity bits - unsigned char granularity; + uint8_t granularity; /// Higher 8 bits of base address - unsigned char base_high; + uint8_t base_high; } __attribute__ ((packed)) gdt_entry_t; /** @brief defines the GDT pointer structure @@ -93,13 +95,18 @@ typedef struct { */ typedef struct { /// Size of the table in bytes (not the number of entries!) - unsigned short limit; + uint16_t limit; /// Address of the table - unsigned int base; + size_t base; } __attribute__ ((packed)) gdt_ptr_t; /// Defines the maximum number of GDT entries -#define GDT_ENTRIES (5+MAX_TASKS) +#ifdef CONFIG_X86_32 +#define GDT_ENTRIES (5+MAX_TASKS) +#else +// a TSS descriptor is twice larger than a code/data descriptor +#define GDT_ENTRIES (5+MAX_TASKS*2) +#endif #if GDT_ENTRIES > 8192 #error Too many GDT entries! #endif @@ -117,12 +124,12 @@ void gdt_install(void); /** @brief Configures and returns a GDT descriptor with chosen attributes * - * Just feed this function with address, limit and the flags + * Just feed this function with address, limit and the flags * you have seen in idt.h * * @return a preconfigured gdt descriptor */ -gdt_entry_t configure_gdt_entry(unsigned long base, unsigned long limit, +void configure_gdt_entry(gdt_entry_t *dest_entry, unsigned long base, unsigned long limit, unsigned char access, unsigned char gran); #ifdef __cplusplus diff --git a/arch/x86/include/asm/icc.h b/arch/x86/include/asm/icc.h index 5075e7ed..b9b82c24 100644 --- a/arch/x86/include/asm/icc.h +++ b/arch/x86/include/asm/icc.h @@ -42,7 +42,8 @@ extern bootinfo_t* bootinfo; enum icc_mail_requests { PING_REQ=1, PING_RESP, - SVM_REQUEST, + SVM_REQ, + SVM_RESP, NOISE, }; @@ -52,7 +53,9 @@ void icc_mail_check(void); int icc_mail_ping(void); int icc_send_gic_irq(int core_num); int icc_mail_ping_irq(void); +int icc_mail_ping_jitter(void); int icc_mail_noise(void); +void icc_wait(int tag); #endif diff --git a/arch/x86/include/asm/idt.h b/arch/x86/include/asm/idt.h index a30cddb1..5ab81dda 100644 --- a/arch/x86/include/asm/idt.h +++ b/arch/x86/include/asm/idt.h @@ -71,15 +71,21 @@ extern "C" { */ typedef struct { /// Handler function's lower 16 address bits - unsigned short base_lo; + uint16_t base_lo; /// Handler function's segment selector. - unsigned short sel; + uint16_t sel; /// These bits are reserved by Intel - unsigned char always0; + uint8_t always0; /// These 8 bits contain flags. Exact use depends on the type of interrupt gate. - unsigned char flags; + uint8_t flags; /// Higher 16 bits of handler function's base address - unsigned short base_hi; + uint16_t base_hi; +#ifdef CONFIG_X86_64 + /// In 64 bit mode, the "highest" 32 bits of the handler function's base address + uint32_t base_hi64; + /// resvered entries + uint32_t reserved; +#endif } __attribute__ ((packed)) idt_entry_t; /** @brief Defines the idt pointer structure. @@ -89,9 +95,9 @@ typedef struct { */ typedef struct { /// Size of the IDT in bytes (not the number of entries!) - unsigned short limit; + uint16_t limit; /// Base address of the IDT - unsigned int base; + size_t base; } __attribute__ ((packed)) idt_ptr_t; /** @brief Installs IDT @@ -120,8 +126,8 @@ void idt_set_gate(unsigned char num, size_t base, unsigned short sel, * * @return a preconfigured idt descriptor */ -idt_entry_t configure_idt_entry(size_t base, unsigned short sel, - unsigned char flags); +void configure_idt_entry(idt_entry_t *dest_entry, size_t base, + unsigned short sel, unsigned char flags); #ifdef __cplusplus } diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h index 91e4939f..6595695c 100644 --- a/arch/x86/include/asm/irq.h +++ b/arch/x86/include/asm/irq.h @@ -65,6 +65,18 @@ int irq_uninstall_handler(unsigned int irq); */ int irq_init(void); +/** @brief Disable the timer interrupt + * + * @return 0 on success + */ +int disable_timer_irq(void); + +/** @brief Enable the timer interrupt + * + * @return 0 on success + */ +int enable_timer_irq(void); + #ifdef __cplusplus } #endif diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h index 093fb61f..8e911152 100644 --- a/arch/x86/include/asm/irqflags.h +++ b/arch/x86/include/asm/irqflags.h @@ -51,8 +51,8 @@ inline static void irq_disable(void) { * @return The set of flags which have been set until now */ inline static uint32_t irq_nested_disable(void) { - uint32_t flags; - asm volatile("pushf; cli; popl %0": "=r"(flags) : : "memory"); + size_t flags; + asm volatile("pushf; cli; pop %0": "=r"(flags) : : "memory"); if (flags & (1 << 9)) return 1; return 0; @@ -83,8 +83,8 @@ inline static void irq_nested_enable(uint32_t flags) { */ inline static uint32_t is_irq_enabled(void) { - uint32_t flags; - asm volatile("pushf; popl %0": "=r"(flags) : : "memory"); + size_t flags; + asm volatile("pushf; pop %0": "=r"(flags) : : "memory"); if (flags & (1 << 9)) return 1; return 0; diff --git a/arch/x86/include/asm/page.h b/arch/x86/include/asm/page.h index 5d35ac53..d02242fd 100644 --- a/arch/x86/include/asm/page.h +++ b/arch/x86/include/asm/page.h @@ -43,6 +43,7 @@ #define _PAGE_BIT_GLOBAL 8 /* Global TLB entry PPro+ */ #define _PAGE_BIT_SVM_STRONG 9 /* mark a virtual address range as used by the SVM system */ #define _PAGE_BIT_SVM_LAZYRELEASE 10 /* mark a virtual address range as used by the SVM system */ +#define _PAGE_BIT_SVM_INIT 11 /* mark if the MBP proxy is used */ /// Page is present #define PG_PRESENT (1 << _PAGE_BIT_PRESENT) @@ -67,9 +68,12 @@ /// Pattern flag #define PG_PAT (1 << _PAGE_BIT_PAT) /// This virtual address range is used by SVM system as marked +#define PG_SVM PG_SVM_STRONG #define PG_SVM_STRONG (1 << _PAGE_BIT_SVM_STRONG) /// This virtual address range is used by SVM system as marked #define PG_SVM_LAZYRELEASE (1 << _PAGE_BIT_SVM_LAZYRELEASE) +/// Currently, no page frame is behind this page (only the MBP proxy) +#define PG_SVM_INIT (1 << _PAGE_BIT_SVM_INIT) /// This is a whole set of flags (PRESENT,RW,ACCESSED,DIRTY) for kernelspace tables #define KERN_TABLE (PG_PRESENT|PG_RW|PG_ACCESSED|PG_DIRTY) @@ -79,27 +83,33 @@ #define KERN_PAGE (PG_PRESENT|PG_RW|PG_GLOBAL) /// This is a whole set of flags (PRESENT,RW,USER) for userspace pages #define USER_PAGE (PG_PRESENT|PG_RW|PG_USER) + +#if __SIZEOF_POINTER__ == 4 +#define PGT_ENTRIES 1024 +#elif __SIZEOF_POINTER__ == 8 +#define PGT_ENTRIES 512 +#endif /** @brief Page table structure * * This structure keeps page table entries.\n - * A page table consists of 1024 entries. + * On a 32bit system, a page table consists normally of 1024 entries. */ typedef struct page_table { /// Page table entries are unsigned 32bit integers. - uint32_t entries[1024]; + size_t entries[PGT_ENTRIES]; } page_table_t __attribute__ ((aligned (4096))); /** @brief Page directory structure * * This structure keeps page directory entries.\ - * A page directory consists of 1024 entries. + * On a 32bit system, a page directory consists normally of 1024 entries. */ typedef struct page_dir { /// Page dir entries are unsigned 32bit integers. - uint32_t entries[1024]; + size_t entries[PGT_ENTRIES]; } page_dir_t __attribute__ ((aligned (4096))); /** @brief Converts a virtual address to a physical diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 08f925c3..808e3857 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -68,11 +68,16 @@ inline static uint32_t has_fxsr(void) return (cpu_info.feature1 & CPU_FEATURE_FXSR); } -inline static uint32_t has_xmm(void) +inline static uint32_t has_sse(void) { return (cpu_info.feature1 & CPU_FEATURE_SSE); } +inline static uint32_t has_sse2(void) +{ + return (cpu_info.feature1 & CPU_FEATURE_SSE2); +} + inline static uint32_t has_avx(void) { return (cpu_info.feature2 & CPU_FEATURE_AVX); @@ -124,15 +129,11 @@ inline static int get_return_value(void) { } /* Force strict CPU ordering */ -#ifdef CONFIG_ROCKCREEK -inline static void mb(void) { asm volatile ("lock; addl $0,0(%%esp)" ::: "memory", "cc"); } -inline static void rmb(void) { asm volatile ("lock; addl $0,0(%%esp)" ::: "memory", "cc"); } -inline static void wmb(void) { asm volatile ("lock; addl $0,0(%%esp)" ::: "memory", "cc"); } -#else -inline static void mb(void) { asm volatile("mfence" ::: "memory"); } -inline static void rmb(void) { asm volatile("lfence" ::: "memory"); } -inline static void wmb(void) { asm volatile("sfence" ::: "memory"); } -#endif +typedef void (*func_memory_barrier)(void); + +extern func_memory_barrier mb; +extern func_memory_barrier rmb; +extern func_memory_barrier wmb; /** @brief Read out CPU ID * @@ -151,7 +152,7 @@ inline static void wmb(void) { asm volatile("sfence" ::: "memory"); } * @param d EDX value will be stores here */ inline static void cpuid(uint32_t code, uint32_t* a, uint32_t* b, uint32_t* c, uint32_t* d) { - asm volatile ("cpuid" : "=a"(*a), "=b"(*b), "=c"(*c), "=d"(*d) : "0"(code)); + asm volatile ("cpuid" : "=a"(*a), "=b"(*b), "=c"(*c), "=d"(*d) : "0"(code), "2"(*c)); } /** @brief Read MSR @@ -173,8 +174,8 @@ inline static uint64_t rdmsr(uint32_t msr) { /** @brief Read cr0 register * @return cr0's value */ -static inline uint32_t read_cr0(void) { - uint32_t val; +static inline size_t read_cr0(void) { + size_t val; asm volatile("mov %%cr0, %0" : "=r"(val)); return val; } @@ -182,15 +183,15 @@ static inline uint32_t read_cr0(void) { /** @brief Write a value into cr0 register * @param val The value you want to write into cr0 */ -static inline void write_cr0(uint32_t val) { +static inline void write_cr0(size_t val) { asm volatile("mov %0, %%cr0" : : "r"(val)); } /** @brief Read cr2 register * @return cr2's value */ -static inline uint32_t read_cr2(void) { - uint32_t val; +static inline size_t read_cr2(void) { + size_t val; asm volatile("mov %%cr2, %0" : "=r"(val)); return val; } @@ -198,8 +199,8 @@ static inline uint32_t read_cr2(void) { /** @brief Read cr3 register * @return cr3's value */ -static inline uint32_t read_cr3(void) { - uint32_t val; +static inline size_t read_cr3(void) { + size_t val; asm volatile("mov %%cr3, %0" : "=r"(val)); return val; } @@ -207,15 +208,15 @@ static inline uint32_t read_cr3(void) { /** @brief Write a value into cr3 register * @param val The value you want to write into cr3 */ -static inline void write_cr3(uint32_t val) { +static inline void write_cr3(size_t val) { asm volatile("mov %0, %%cr3" : : "r"(val)); } /** @brief Read cr4 register * @return cr4's value */ -static inline uint32_t read_cr4(void) { - uint32_t val; +static inline size_t read_cr4(void) { + size_t val; asm volatile("mov %%cr4, %0" : "=r"(val)); return val; } @@ -223,7 +224,7 @@ static inline uint32_t read_cr4(void) { /** @brief Write a value into cr4 register * @param val The value you want to write into cr4 */ -static inline void write_cr4(uint32_t val) { +static inline void write_cr4(size_t val) { asm volatile("mov %0, %%cr4" : : "r"(val)); } @@ -308,11 +309,6 @@ static inline size_t lsb(size_t i) return ret; } -/** @brief Read extended instruction pointer - * @return The EIP's value - */ -uint32_t read_eip(void); - /// A one-instruction-do-nothing #define NOP1 asm volatile ("nop") /// Do nothing for 2 instructions @@ -321,7 +317,11 @@ uint32_t read_eip(void); #define NOP4 asm volatile ("nop;nop;nop;nop") /// Do nothing for 8 instructions #define NOP8 asm volatile ("nop;nop;nop;nop;nop;nop;nop;nop") -#define HALT asm volatile ("hlt"); +#ifndef CONFIG_TICKLESS +#define HALT asm volatile ("hlt") +#else +#define HALT asm volatile ("nop;nop;nop;nop;nop;nop;nop;nop") +#endif /** @brief Init several subsystems * diff --git a/arch/x86/include/asm/stddef.h b/arch/x86/include/asm/stddef.h index f3ef4468..e83032e1 100644 --- a/arch/x86/include/asm/stddef.h +++ b/arch/x86/include/asm/stddef.h @@ -32,6 +32,8 @@ extern "C" { #endif +#if __SIZEOF_POINTER__ == 4 +#define CONFIG_X86_32 /// A popular type for addresses typedef unsigned long size_t; /// Pointer differences @@ -40,6 +42,19 @@ typedef long ptrdiff_t; typedef long ssize_t; typedef long off_t; #endif +#elif __SIZEOF_POINTER__ == 8 +#define CONFIG_X86_64 +// A popular type for addresses +typedef unsigned long long size_t; +/// Pointer differences +typedef long long ptrdiff_t; +#ifdef __KERNEL__ +typedef long long ssize_t; +typedef long long off_t; +#endif +#else +#error unsupported architecture +#endif /// Unsigned 64 bit integer typedef unsigned long long uint64_t; @@ -70,33 +85,79 @@ typedef unsigned int wint_t; * All the interrupt handler routines use this type for their only parameter. */ struct state { +#ifdef CONFIG_X86_32 /// EDI register - unsigned int edi; + uint32_t edi; /// ESI register - unsigned int esi; + uint32_t esi; /// EBP register - unsigned int ebp; + uint32_t ebp; /// ESP register - unsigned int esp; + uint32_t esp; /// EBX register - unsigned int ebx; + uint32_t ebx; /// EDX register - unsigned int edx; + uint32_t edx; /// ECX register - unsigned int ecx; + uint32_t ecx; /// EAX register - unsigned int eax; /* pushed by 'pusha' */ + uint32_t eax; /* pushed by 'pusha' */ /// Interrupt number - unsigned int int_no; + uint32_t int_no; // pushed by the processor automatically - unsigned int error; - unsigned int eip; - unsigned int cs; - unsigned int eflags; - unsigned int useresp; - unsigned int ss; + uint32_t error; + uint32_t eip; + uint32_t cs; + uint32_t eflags; + uint32_t useresp; + uint32_t ss; +#elif defined(CONFIG_X86_64) + /// R15 register + uint64_t r15; + /// R14 register + uint64_t r14; + /// R13 register + uint64_t r13; + /// R12 register + uint64_t r12; + /// R11 register + uint64_t r11; + /// R10 register + uint64_t r10; + /// R9 register + uint64_t r9; + /// R8 register + uint64_t r8; + /// RDI register + uint64_t rdi; + /// RSI register + uint64_t rsi; + /// RBP register + uint64_t rbp; + /// (pseudo) RSP register + uint64_t rsp; + /// RBX register + uint64_t rbx; + /// RDX register + uint64_t rdx; + /// RCX register + uint64_t rcx; + /// RAX register + uint64_t rax; + + /// Interrupt number + uint64_t int_no; + + // pushed by the processor automatically + uint64_t error; + uint64_t rip; + uint64_t cs; + uint64_t rflags; + uint64_t userrsp; + uint64_t ss; +#endif }; uint32_t apic_cpu_id(void); diff --git a/arch/x86/include/asm/string.h b/arch/x86/include/asm/string.h index 66f5e617..ee39ff4a 100644 --- a/arch/x86/include/asm/string.h +++ b/arch/x86/include/asm/string.h @@ -96,11 +96,12 @@ inline static void *memcpy(void *dest, const void *src, size_t count) */ inline static void *memcpy(void* dest, const void *src, size_t count) { - int32_t i, j, k; + size_t i, j, k; if (BUILTIN_EXPECT(!dest || !src, 0)) return dest; +#ifdef CONFIG_X86_32 asm volatile ( "cld; rep movsl\n\t" "movl %4, %%ecx\n\t" @@ -108,6 +109,15 @@ inline static void *memcpy(void* dest, const void *src, size_t count) "rep movsb\n\t" : "=&c"(i), "=&D"(j), "=&S"(k) : "0"(count/4), "g"(count), "1"(dest), "2"(src) : "memory","cc"); +#elif defined(CONFIG_X86_64) + asm volatile ( + "cld; rep movsq\n\t" + "movq %4, %%rcx\n\t" + "andq $7, %%rcx\n\t" + "rep movsb\n\t" + : "=&c"(i), "=&D"(j), "=&S"(k) + : "0"(count/8), "g"(count), "1"(dest), "2"(src) : "memory","cc"); +#endif return dest; } @@ -128,7 +138,7 @@ inline static void *memcpy(void* dest, const void *src, size_t count) */ inline static void *memset(void* dest, int val, size_t count) { - int32_t i, j; + size_t i, j; if (BUILTIN_EXPECT(!dest, 0)) return dest; @@ -155,7 +165,7 @@ inline static void *memset(void* dest, int val, size_t count) */ inline static void *memset(void* dest, int val, size_t count) { - int32_t i, j; + size_t i, j; if (BUILTIN_EXPECT(!dest, 0)) return dest; @@ -184,15 +194,22 @@ inline static void *memset(void* dest, int val, size_t count) inline static size_t strlen(const char* str) { size_t len = 0; - uint32_t i, j; + size_t i, j; if (BUILTIN_EXPECT(!str, 0)) return len; +#ifdef CONFIG_X86_32 asm volatile("not %%ecx; cld; repne scasb; not %%ecx; dec %%ecx" : "=&c"(len), "=&D"(i), "=&a"(j) : "2"(0), "1"(str), "0"(len) : "memory","cc"); +#elif defined(CONFIG_X86_64) + asm volatile("not %%rcx; cld; repne scasb; not %%rcx; dec %%rcx" + : "=&c"(len), "=&D"(i), "=&a"(j) + : "2"(0), "1"(str), "0"(len) + : "memory","cc"); +#endif return len; } diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h index 301fda74..f2e3060d 100644 --- a/arch/x86/include/asm/svm.h +++ b/arch/x86/include/asm/svm.h @@ -21,6 +21,7 @@ #define __ARCH_SVM_H__ #include +#include #ifdef CONFIG_ROCKCREEK #include #endif @@ -31,8 +32,13 @@ extern "C" { #ifdef CONFIG_ROCKCREEK +//#define SVM_WB + #define SVM_STRONG (1 << 0) #define SVM_LAZYRELEASE (1 << 1) +#define SVM_L2 (1 << 4) +#define SVM_STRONG_L2 SVM_STRONG|SVM_L2 +#define SVM_LAZYRELEASE_L2 SVM_LAZYRELEASE|SVM_L2 /** @brief Init routine of the SVM subsystem * @@ -49,13 +55,15 @@ int svm_init(void); * * @return Pointer to the new memory range */ -void* svmmalloc(size_t sizei, uint32_t flags); +void* svm_malloc(size_t size, uint32_t flags); /** @brief Frees memory, which is managed by the SVM subsystem * * Like RCCE function, belongs svmfree to the synchronous function. */ -void svmfree(void* addr, size_t size); +void svm_free(void* addr, size_t size); + +int svm_barrier(uint32_t flags); /** @brief Request for exlusive access * @@ -64,6 +72,15 @@ void svmfree(void* addr, size_t size); */ int svm_access_request(size_t addr); +/** @brief Allocate n shared pages + * + * @param n number of requested pages + * @return physical address of the shared pages + */ +size_t shmalloc(uint32_t n); + +int svm_alloc_page(size_t addr, page_table_t* pgt); + /** @brief emit page to core ue * * @return @@ -71,24 +88,28 @@ int svm_access_request(size_t addr); */ int svm_emit_page(size_t addr, int ue); +#ifdef CONFIG_ROCKCREEK /* @brief invalidate the cache entries for all SVM regions */ +#ifndef SVM_WB static inline void svm_invalidate(void) { asm volatile ( ".byte 0x0f; .byte 0x0a;\n" ); // CL1FLUSHMB } +#else +void svm_invalidate(void); +#endif -/* *brief flushs the cache for all SVM regions +/* @brief flushs the cache for all SVM regions */ -#ifdef CONFIG_ROCKCREEK #ifndef SVM_WB -static inline void svm_flush(void) +static inline void svm_flush(size_t unused) { // need to write to another line to make sure the write combine buffer gets flushed - *(int *)RCCE_fool_write_combine_buffer = 1; + *(volatile int *)RCCE_fool_write_combine_buffer = 1; } #else -void svm_flush(void); +void svm_flush(size_t addr); #endif #endif diff --git a/arch/x86/include/asm/tasks.h b/arch/x86/include/asm/tasks.h index 9f45b691..fac78eca 100644 --- a/arch/x86/include/asm/tasks.h +++ b/arch/x86/include/asm/tasks.h @@ -55,6 +55,13 @@ int arch_fork(task_t* task); */ void switch_task(uint32_t id); +/** + * @brief Switch to current task + * + * @param stack Pointer to the old stack pointer + */ +void switch_context(size_t** stack); + /** @brief Setup a default frame for a new task * * @param task Pointer to the task structure @@ -64,16 +71,24 @@ void switch_task(uint32_t id); * - 0 on success * - -EINVAL (-22) on failure */ -int create_default_frame(task_t* task, internal_entry_point_t ep, void* arg); +int create_default_frame(task_t* task, entry_point_t ep, void* arg); /** @brief Register a task's TSS at GDT * - * @param task Pointer to task structure * @return * - 0 on success - * - -EINVAL (-22) on failure */ -int register_task(task_t* task); +static inline int register_task(void) +{ +#ifdef CONFIG_X86_32 + uint16_t sel = (CORE_ID+5) << 3; +#else + uint16_t sel = (CORE_ID*2+5) << 3; +#endif + asm volatile ("ltr %%ax" : : "a"(sel)); + + return 0; +} /** @brief Jump back to user code * @@ -84,19 +99,17 @@ int register_task(task_t* task); */ static inline int jump_to_user_code(uint32_t ep, uint32_t stack) { +#ifdef CONFIG_X86_32 asm volatile ("mov %0, %%ds; mov %0, %%fs; mov %0, %%gs; mov %0, %%es" :: "r"(0x23)); asm volatile ("push $0x23; push %0; push $0x1B; push %1" :: "r"(stack), "r"(ep)); asm volatile ("lret" ::: "cc"); return 0; +#else + return -22; +#endif } -/** @brief determines the stack of a specific task - * - * @return start address of a specific task - */ -size_t get_stack(uint32_t id); - #ifdef __cplusplus } #endif diff --git a/arch/x86/include/asm/tasks_types.h b/arch/x86/include/asm/tasks_types.h index 194a0410..fa65f661 100644 --- a/arch/x86/include/asm/tasks_types.h +++ b/arch/x86/include/asm/tasks_types.h @@ -65,19 +65,11 @@ union fpu_state { i387_fxsave_t fxsave; }; -static inline void save_fpu_state(union fpu_state* state) { - if (has_fxsr()) - asm volatile ("fxsave %0; fnclex" : "=m"((*state).fxsave) :: "memory"); - else - asm volatile ("fnsave %0; fwait" : "=m"((*state).fsave) :: "memory"); -} +typedef void (*handle_fpu_state)(union fpu_state* state); -static inline void restore_fpu_state(union fpu_state* state) { - if (has_fxsr()) - asm volatile ("fxrstor %0" :: "m"(state->fxsave)); - else - asm volatile ("frstor %0" :: "m"(state->fsave)); -} +extern handle_fpu_state save_fpu_state; +extern handle_fpu_state restore_fpu_state; +extern handle_fpu_state fpu_init; #ifdef __cplusplus } diff --git a/arch/x86/include/asm/tss.h b/arch/x86/include/asm/tss.h index 9e905449..03540957 100644 --- a/arch/x86/include/asm/tss.h +++ b/arch/x86/include/asm/tss.h @@ -35,6 +35,7 @@ extern "C" { /** @brief The tast state segment structure */ typedef struct { +#ifdef CONFIG_X86_32 uint16_t backlink, __blh; uint32_t esp0; uint16_t ss0, __ss0h; @@ -55,6 +56,23 @@ typedef struct { uint16_t gs, __gsh; uint16_t ldt, __ldth; uint16_t trace, bitmap; +#endif +#ifdef CONFIG_X86_64 + uint16_t res0, res1; // reserved entries + uint64_t rsp0; + uint64_t rsp1; + uint64_t rsp2; + uint32_t res2, res3; // reserved entries + uint64_t ist_rsp1; + uint64_t ist_rsp2; + uint64_t ist_rsp3; + uint64_t ist_rsp4; + uint64_t ist_rsp5; + uint64_t ist_rsp6; + uint64_t ist_rsp7; + uint32_t res4, res5; // reserved entries + uint16_t res6, bitmap; +#endif } __attribute__ ((packed)) tss_t; #ifdef __cplusplus diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index ba80b41c..1145b742 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -1,5 +1,5 @@ C_source := gdt.c kb.c timer.c irq.c isrs.c idt.c vga.c multiboot.c apic.c pci.c processor.c -ASM_source := entry.asm string.asm +ASM_source := entry$(BIT).asm string$(BIT).asm MODULE := arch_x86_kernel include $(TOPDIR)/Makefile.inc diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c index ca860d4f..d2e87e37 100644 --- a/arch/x86/kernel/apic.c +++ b/arch/x86/kernel/apic.c @@ -38,10 +38,20 @@ #include #endif +void start_tickless(void); +void end_tickless(void); + #if defined(CONFIG_ROCKCREEK) && (MAX_CORES > 1) #error RockCreek is not a SMP system #endif +/* + * Note that linker symbols are not variables, they have no memory allocated for + * maintaining a value, rather their address is their value. + */ +extern const void kernel_start; +extern const void kernel_end; + // IO APIC MMIO structure: write reg, then read or write data. typedef struct { uint32_t reg; @@ -51,9 +61,9 @@ typedef struct { static const apic_processor_entry_t* apic_processors[MAX_CORES] = {[0 ... MAX_CORES-1] = NULL}; static uint32_t boot_processor = MAX_CORES; -static apic_mp_t* apic_mp = NULL; +apic_mp_t* apic_mp __attribute__ ((section (".data"))) = NULL; static apic_config_table_t* apic_config = NULL; -static uint32_t lapic = 0; +static size_t lapic = 0; static volatile ioapic_t* ioapic = NULL; static uint32_t icr = 0; static uint32_t ncores = 1; @@ -75,12 +85,15 @@ static inline uint32_t lapic_read(uint32_t addr) static inline void lapic_write(uint32_t addr, uint32_t value) { +#ifdef CONFIG_X86_32 /* * to avoid a pentium bug, we have to read a apic register * before we write a value to this register */ asm volatile ("movl (%%eax), %%edx; movl %%ebx, (%%eax)" :: "a"(lapic+addr), "b"(value) : "%edx"); - //*((volatile uint32_t*) (lapic+addr)) = value; +#else + *((volatile uint32_t*) (lapic+addr)) = value; +#endif } static inline uint32_t ioapic_read(uint32_t reg) @@ -144,6 +157,31 @@ int apic_is_enabled(void) return (lapic && initialized); } +int apic_disable_timer(void) +{ + if (BUILTIN_EXPECT(!apic_is_enabled(), 0)) + return -EINVAL; + + lapic_write(APIC_LVT_T, 0x10000); // disable timer interrupt + start_tickless(); + + return 0; +} + +int apic_enable_timer(void) +{ + if (BUILTIN_EXPECT(apic_is_enabled() && icr, 1)) { + lapic_write(APIC_DCR, 0xB); // set it to 1 clock increments + lapic_write(APIC_LVT_T, 0x2007B); // connects the timer to 123 and enables it + lapic_write(APIC_ICR, icr); + end_tickless(); + + return 0; + } + + return -EINVAL; +} + #if MAX_CORES > 1 static inline void set_ipi_dest(uint32_t cpu_id) { uint32_t tmp; @@ -280,9 +318,18 @@ extern void cpu_init(void); */ extern int smp_main(void); +#ifdef CONFIG_X86_64 +/* + * 32bit entry point, which jumps to the 64bit code smp_start + */ +extern void smp_entry(void); +#endif + void smp_start(uint32_t id) { - uint32_t i; +#ifdef CONFIG_X86_32 + size_t i; +#endif atomic_int32_inc(&cpu_online); @@ -301,8 +348,10 @@ void smp_start(uint32_t id) // install IDT idt_install(); + // On 64bit system, paging is already enabled +#ifdef CONFIG_X86_32 /* enable paging */ - write_cr3((uint32_t)get_boot_pgd()); + write_cr3((size_t)get_boot_pgd()); i = read_cr0(); i = i | (1 << 31); write_cr0(i); @@ -310,12 +359,13 @@ void smp_start(uint32_t id) // reset APIC and set id lapic_reset(); // sets also the timer interrupt apic_set_cpu_id(id); +#endif /* * we turned on paging - * => now, we are able to register our task for Task State Switching + * => now, we are able to register our task */ - register_task(per_core(current_task)); + register_task(); // enable additional cpu features cpu_detection(); @@ -329,7 +379,7 @@ void smp_start(uint32_t id) } #endif -#if 1 +#ifdef CONFIG_X86_32 static apic_mp_t* search_apic(size_t base, size_t limit) { size_t ptr; apic_mp_t* tmp; @@ -375,8 +425,13 @@ int smp_init(void) { // replace 0xDEADC0DE with the address of the smp entry code if (*((uint32_t*) (bootaddr+j)) == 0xDEADC0DE) { - *((uint32_t*) (bootaddr+j)) = (size_t) smp_start; - kprintf("Set entry point of the application processors at 0x%x\n", (size_t) smp_start); +#ifdef CONFIG_X86_32 + *((uint32_t*) (bootaddr+j)) = (uint32_t) smp_start; + kprintf("Set entry point of the application processors at 0x%x\n", (uint32_t) smp_start); +#else + *((uint32_t*) (bootaddr+j)) = (uint32_t) smp_entry; + kprintf("Set entry point of the application processors at 0x%lx\n", (size_t) smp_entry); +#endif } // replace APIC ID 0xDEADDEAD @@ -446,9 +501,17 @@ int map_apic(void) if (!has_apic()) return -ENXIO; +#ifdef CONFIG_X86_32 lapic = map_region(0 /*lapic*/, lapic, 1, MAP_KERNEL_SPACE|MAP_NO_CACHE); if (BUILTIN_EXPECT(!lapic, 0)) return -ENXIO; +#else + if (lapic != (size_t)&kernel_start - 0x1000) { + lapic = map_region(0 /*lapic*/, lapic, 1, MAP_KERNEL_SPACE|MAP_NO_CACHE); + if (BUILTIN_EXPECT(!lapic, 0)) + return -ENXIO; + } +#endif kprintf("Mapped LAPIC at 0x%x\n", lapic); if (ioapic) { @@ -460,7 +523,7 @@ int map_apic(void) // map all processor entries for(i=0; i start ? end - start : start - end; } while(ticks*TIMER_FREQ < 3*RC_REFCLOCKMHZ*1000000UL); @@ -568,6 +631,7 @@ static int apic_probe(void) uint32_t i, count; int isa_bus = -1; +#ifdef CONFIG_X86_32 #if 1 apic_mp = search_apic(0xF0000, 0x100000); if (apic_mp) @@ -607,6 +671,7 @@ static int apic_probe(void) } } #endif +#endif found_mp: if (!apic_mp) goto no_mp; @@ -620,7 +685,7 @@ found_mp: goto no_mp; } - apic_config = (apic_config_table_t*) apic_mp->mp_config; + apic_config = (apic_config_table_t*) ((size_t) apic_mp->mp_config); if (!apic_config || strncmp((void*) &apic_config->signature, "PCMP", 4) !=0) { kputs("Invalid MP config table\n"); goto no_mp; @@ -668,10 +733,10 @@ found_mp: addr += 20; } else if (*((uint8_t*) addr) == 2) { // IO_APIC apic_io_entry_t* io_entry = (apic_io_entry_t*) addr; - ioapic = (ioapic_t*) io_entry->addr; + ioapic = (ioapic_t*) ((size_t) io_entry->addr); addr += 8; - kprintf("Found IOAPIC at 0x%x (ver. 0x%x)\n", ioapic, - ioapic_read(IOAPIC_REG_VER)); + //kprintf("Found IOAPIC at 0x%x (ver. 0x%x)\n", ioapic, ioapic_read(IOAPIC_REG_VER)); + kprintf("Found IOAPIC at 0x%x\n", ioapic); } else if (*((uint8_t*) addr) == 3) { // IO_INT apic_ioirq_entry_t* extint = (apic_ioirq_entry_t*) addr; if (extint->src_bus == isa_bus) { @@ -690,15 +755,34 @@ found_mp: ncores = count; check_lapic: +#ifdef CONFIG_X86_32 if (apic_config) { lapic = apic_config->lapic; } else { - uint32_t edx, dummy; + uint32_t edx, dummy=0; cpuid(0x1, &dummy, &dummy, &dummy, &edx); if (edx & (1 << 9)) lapic = 0xFEE00000; } +#else + if (apic_config) { + if (apic_config->lapic == 0xFEE00000) { + // On a x64 system, we already map the lapic below the kernel + lapic = (size_t)&kernel_start - 0x1000; + } else { + lapic = apic_config->lapic; + } + } else { + uint32_t edx, dummy=0; + + cpuid(0x1, &dummy, &dummy, &dummy, &edx); + if (edx & (1 << 9)) { + // On a x64 system, we already map the lapic below the kernel + lapic = (size_t)&kernel_start - 0x1000; + } + } +#endif if (!lapic) goto out; diff --git a/arch/x86/kernel/entry.asm b/arch/x86/kernel/entry32.asm similarity index 92% rename from arch/x86/kernel/entry.asm rename to arch/x86/kernel/entry32.asm index 8d34dff9..7fe3338a 100644 --- a/arch/x86/kernel/entry.asm +++ b/arch/x86/kernel/entry32.asm @@ -21,6 +21,8 @@ ; perhaps setting up the GDT and segments. Please note that interrupts ; are disabled at this point: More on interrupts later! +%include "config.inc" + [BITS 32] ; We use a special name to map this section at the begin of our kernel ; => Multiboot needs its magic number at the begin of the kernel @@ -46,29 +48,22 @@ mboot: dd MULTIBOOT_HEADER_MAGIC dd MULTIBOOT_HEADER_FLAGS dd MULTIBOOT_CHECKSUM - - ; AOUT kludge - must be physical addresses. Make a note of these: - ; The linker script fills in the data for these ones! - ; dd mboot - ; dd code - ; dd bss - ; dd end - ; dd start msg db "?ello from MetalSVM kernel!!", 0 -extern default_stack_pointer - SECTION .text ALIGN 4 stublet: ; initialize stack pointer. - mov esp, [default_stack_pointer] + mov esp, boot_stack + add esp, KERNEL_STACK_SIZE-16 +; save pointer to the multiboot structure + push ebx ; initialize cpu features call cpu_init ; interpret multiboot information extern multiboot_init - push ebx +; pointer to the multiboot structure is already pushed call multiboot_init add esp, 4 @@ -113,10 +108,12 @@ flush2: ret ; determines the current instruction pointer (after the jmp) -global read_eip -read_eip: - pop eax ; Get the return address - jmp eax ; Return. Can't use RET because return +global read_ip +read_ip: + mov eax, [esp+4] + pop DWORD [eax] ; Get the return address + add esp, 4 ; Dirty Hack! read_ip cleanup the stacl + jmp [eax] ; Return. Can't use RET because return ; address popped off the stack. ; In just a few pages in this tutorial, we will add our Interrupt @@ -496,17 +493,6 @@ global apic_lint1 global apic_error global apic_svr -global switch_task -switch_task: - mov eax, [esp+4] - add ax, WORD 5 - mov bx, WORD 8 - mul bx - mov [hack+5], ax -hack: - jmp 0x00 : 0xDEADBEAF - ret - ; 32: IRQ0 irq0: ; irq0 - irq15 are registered as "Interrupt Gate" @@ -764,7 +750,29 @@ apic_svr: jmp common_stub extern irq_handler +extern get_current_stack +extern finish_task_switch +global switch_context +ALIGN 4 +switch_context: + ; create on the stack a pseudo interrupt + ; afterwards, we switch to the task with iret + mov eax, [esp+4] ; on the stack is already the address to store the old esp + pushf ; EFLAGS + push DWORD 0x8 ; CS + push DWORD rollback ; EIP + push DWORD 0x0 ; Interrupt number + push DWORD 0x00edbabe ; Error code + pusha ; Registers... + + jmp common_switch + +ALIGN 4 +rollback: + ret + +ALIGN 4 common_stub: pusha @@ -773,8 +781,31 @@ common_stub: call irq_handler add esp, 4 + cmp eax, 0 + je no_context_switch + +common_switch: + mov [eax], esp ; store old esp + call get_current_stack ; get new esp + xchg eax, esp + + ; set task switched flag + mov eax, cr0 + or eax, 8 + mov cr0, eax + + ; call cleanup code + call finish_task_switch + +no_context_switch: popa add esp, 8 iret +SECTION .data +global boot_stack +ALIGN 4096 +boot_stack: +TIMES (MAX_CORES*KERNEL_STACK_SIZE) DB 0xcd + SECTION .note.GNU-stack noalloc noexec nowrite progbits diff --git a/arch/x86/kernel/entry64.asm b/arch/x86/kernel/entry64.asm new file mode 100644 index 00000000..586f5fe3 --- /dev/null +++ b/arch/x86/kernel/entry64.asm @@ -0,0 +1,1173 @@ +; +; Copyright 2010 Stefan Lankes, Chair for Operating Systems, +; RWTH Aachen University +; +; Licensed under the Apache License, Version 2.0 (the "License"); +; you may not use this file except in compliance with the License. +; You may obtain a copy of the License at +; +; http://www.apache.org/licenses/LICENSE-2.0 +; +; Unless required by applicable law or agreed to in writing, software +; distributed under the License is distributed on an "AS IS" BASIS, +; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +; See the License for the specific language governing permissions and +; limitations under the License. +; +; This file is part of MetalSVM. + +; This is the kernel's entry point. We could either call main here, +; or we can use this to setup the stack or other nice stuff, like +; perhaps setting up the GDT and segments. Please note that interrupts +; are disabled at this point: More on interrupts later! + +%include "config.inc" + +[BITS 32] + +extern kernel_start ; defined in linker script +extern kernel_end +extern apic_mp + +; We use a special name to map this section at the begin of our kernel +; => Multiboot needs its magic number at the begin of the kernel +SECTION .mboot +global start +start: + jmp stublet + +; This part MUST be 4byte aligned, so we solve that issue using 'ALIGN 4' +ALIGN 4 +mboot: + ; Multiboot macros to make a few lines more readable later + MULTIBOOT_PAGE_ALIGN equ 1<<0 + MULTIBOOT_MEMORY_INFO equ 1<<1 + ; MULTIBOOT_AOUT_KLUDGE equ 1<<16 + MULTIBOOT_HEADER_MAGIC equ 0x1BADB002 + MULTIBOOT_HEADER_FLAGS equ MULTIBOOT_PAGE_ALIGN | MULTIBOOT_MEMORY_INFO ; | MULTIBOOT_AOUT_KLUDGE + MULTIBOOT_CHECKSUM equ -(MULTIBOOT_HEADER_MAGIC + MULTIBOOT_HEADER_FLAGS) + EXTERN code, bss, end + + ; This is the GRUB Multiboot header. A boot signature + dd MULTIBOOT_HEADER_MAGIC + dd MULTIBOOT_HEADER_FLAGS + dd MULTIBOOT_CHECKSUM + +ALIGN 4 +; we need already a valid GDT to switch in the 64bit modus +GDT64: ; Global Descriptor Table (64-bit). + .Null: equ $ - GDT64 ; The null descriptor. + dw 0 ; Limit (low). + dw 0 ; Base (low). + db 0 ; Base (middle) + db 0 ; Access. + db 0 ; Granularity. + db 0 ; Base (high). + .Code: equ $ - GDT64 ; The code descriptor. + dw 0 ; Limit (low). + dw 0 ; Base (low). + db 0 ; Base (middle) + db 10011000b ; Access. + db 00100000b ; Granularity. + db 0 ; Base (high). + .Data: equ $ - GDT64 ; The data descriptor. + dw 0 ; Limit (low). + dw 0 ; Base (low). + db 0 ; Base (middle) + db 10010010b ; Access. + db 00000000b ; Granularity. + db 0 ; Base (high). + .Pointer: ; The GDT-pointer. + dw $ - GDT64 - 1 ; Limit. + dq GDT64 ; Base. + + times 256 DD 0 +startup_stack: + +SECTION .data +; create default page tables for the 64bit kernel +global boot_pgd ; aka PML4 +ALIGN 4096 ; of course, the page tables have to be page aligned +NOPTS equ 512 +boot_pgd times 512 DQ 0 +boot_pdpt times 512 DQ 0 +boot_pd times 512 DQ 0 +boot_pt times (NOPTS*512) DQ 0 + +SECTION .text +ALIGN 8 +%if MAX_CORES > 1 +global smp_entry +smp_entry: +; enable caching, disable paging and fpu emulation + and eax, 0x1ffffffb +; ...and turn on FPU exceptions + or eax, 0x22 + mov cr0, eax +; clears the current pgd entry + xor eax, eax + mov cr3, eax +; at this stage, we disable the SSE support + mov eax, cr4 + and eax, 0xfffbf9ff + mov cr4, eax + +; initialize page table + mov edi, boot_pgd + mov cr3, edi + +; we need to enable PAE modus + mov eax, cr4 + or eax, 1 << 5 + mov cr4, eax + +; switch to the compatibility mode (which is part of long mode) + mov ecx, 0xC0000080 + rdmsr + or eax, 1 << 8 + wrmsr + +; enable paging + mov eax, cr0 + or eax, 1 << 31 | 1 << 0 ; Set the PG-bit, which is the 31nd bit, and the PM-bit, which is the 0th bit. + mov cr0, eax + + mov edi, [esp+4] ; set argumet for smp_start + lgdt [GDT64.Pointer] ; Load the 64-bit global descriptor table. + jmp GDT64.Code:smp_start64 ; Set the code segment and enter 64-bit long mode. + + jmp $ ; endless loop +%endif + +search_apic: + push ebp + mov ebp, esp + push ecx + + xor eax, eax + mov ecx, [ebp+8] +L1: + cmp [ecx], DWORD 0x5f504d5f ; MP_FLT_SIGNATURE + jne L2 + mov al, BYTE [ecx+9] + cmp eax, 4 + ja L2 + mov al, BYTE [ecx+11] + cmp eax, 0 + jne L2 + mov eax, ecx + jmp L3 + +L2: + add ecx, 4 + cmp ecx, [ebp+12] + jb L1 + xor eax, eax + +L3: + pop ecx + pop ebp + ret + +ALIGN 4 +stublet: + mov esp, startup_stack-4 + push ebx ; save pointer to the multiboot structure + mov eax, cr0 +; enable caching, disable paging and fpu emulation + and eax, 0x1ffffffb +; ...and turn on FPU exceptions + or eax, 0x22 + mov cr0, eax +; clears the current pgd entry + xor eax, eax + mov cr3, eax +; at this stage, we disable the SSE support + mov eax, cr4 + and eax, 0xfffbf9ff + mov cr4, eax +; do we have the instruction cpuid? + pushfd + pop eax + mov ecx, eax + xor eax, 1 << 21 + push eax + popfd + pushfd + pop eax + push ecx + popfd + xor eax, ecx + jz Linvalid +; cpuid > 0x80000000? + mov eax, 0x80000000 + cpuid + cmp eax, 0x80000001 + jb Linvalid ; It is less, there is no long mode. +; do we have a long mode? + mov eax, 0x80000001 + cpuid + test edx, 1 << 29 ; Test if the LM-bit, which is bit 29, is set in the D-register. + jz Linvalid ; They aren't, there is no long mode. + +; initialize page table + mov edi, boot_pgd + mov cr3, edi + +; So lets make PML4T[0] point to the PDPT and so on: + mov DWORD [edi], boot_pdpt ; Set the double word at the destination index to pdpt. + or DWORD [edi], 0x00000003 ; Set present and writeable bit + mov edi, boot_pdpt + mov DWORD [edi], boot_pd ; Set the double word at the destination index to pd. + or DWORD [edi], 0x00000003 ; Set present and writeable bit + mov edi, boot_pd + mov ebx, boot_pt + mov ecx, NOPTS +L0: + mov DWORD [edi], ebx ; Set the double word at the destination index to pt. + or DWORD [edi], 0x00000003 ; Set present and writeable bit + add edi, 8 + add ebx, 0x1000 + loop L0 + + ; map the VGA address into the virtual address space + mov edi, 0xB8000 + shr edi, 9 ; (edi >> 12) * 8 + add edi, boot_pt + mov ebx, 0xB8000 + or ebx, 0x00000003 + mov DWORD [edi], ebx + + ; map multiboot structure into the virtual address space + mov edi, [esp] + and edi, 0xFFFFF000 + shr edi, 9 ; (edi >> 12) * 8 + add edi, boot_pt + mov ebx, [esp] + and ebx, 0xFFFFF000 + or ebx, 0x00000003 + mov DWORD [edi], ebx + + ; check if lapic is available + push eax + push ebx + push ecx + push edx + mov eax, 1 + cpuid + and edx, 0x200 + cmp edx, 0 + je no_lapic + ; map lapic at 0xFEE00000 below the kernel + mov edi, kernel_start - 0x1000 + shr edi, 9 ; (edi >> 12) * 8 + add edi, boot_pt + mov ebx, 0xFEE00000 + or ebx, 0x00000013 + mov DWORD [edi], ebx +no_lapic: + pop edx + pop ecx + pop ebx + pop eax + + ; search APIC + push DWORD 0x100000 + push DWORD 0xF0000 + call search_apic + add esp, 8 + + cmp eax, 0 + jne La + + push DWORD 0xA0000 + push DWORD 0x9F000 + call search_apic + add esp, 8 + + cmp eax, 0 + je Lb + +La: + ; map MP Floating Pointer Structure + mov DWORD [apic_mp], eax + mov edi, eax + and edi, 0xFFFFF000 + shr edi, 9 ; (edi >> 12) * 8 + add edi, boot_pt + mov ebx, eax + and ebx, 0xFFFFF000 + or ebx, 0x00000013 + mov DWORD [edi], ebx + + ; map mp_config + mov edi, [eax+4] + and edi, 0xFFFFF000 + shr edi, 9 ; (edi >> 12) * 8 + add edi, boot_pt + mov ebx, [eax+4] + and ebx, 0xFFFFF000 + or ebx, 0x00000013 + mov DWORD [edi], ebx + +Lb: + mov edi, kernel_start + shr edi, 9 ; (kernel_start >> 12) * 8 + add edi, boot_pt + mov ebx, kernel_start + or ebx, 0x00000003 + mov ecx, kernel_end ; determine kernel size in number of pages + sub ecx, kernel_start + shr ecx, 12 + inc ecx + +Lc: + mov DWORD [edi], ebx ; Set the double word at the destination index to the B-register. + add edi, 8 + add ebx, 0x1000 + loop Lc + +; we need to enable PAE modus + mov eax, cr4 + or eax, 1 << 5 + mov cr4, eax + +; switch to the compatibility mode (which is part of long mode) + mov ecx, 0xC0000080 + rdmsr + or eax, 1 << 8 + wrmsr + +; enable paging + mov eax, cr0 + or eax, 1 << 31 | 1 << 0 ; Set the PG-bit, which is the 31nd bit, and the PM-bit, which is the 0th bit. + mov cr0, eax + + pop ebx ; restore pointer to multiboot structure + lgdt [GDT64.Pointer] ; Load the 64-bit global descriptor table. + jmp GDT64.Code:start64 ; Set the code segment and enter 64-bit long mode. + +Linvalid: + jmp $ + +[BITS 64] +start64: +; initialize segment registers + mov ax, GDT64.Data + mov ds, ax + mov es, ax + mov fs, ax + mov gs, ax + mov ss, ax +; set default stack pointer + mov rsp, boot_stack + add rsp, KERNEL_STACK_SIZE-16 +; interpret multiboot information + extern multiboot_init + mov rdi, rbx + call multiboot_init +; jump to the boot processors's C code + extern main + call main + jmp $ + +%if MAX_CORES > 1 +smp_start64: +; initialize segment registers + mov ax, GDT64.Data + mov ds, ax + mov es, ax + mov fs, ax + mov gs, ax + mov ss, ax + +; jump to the boot processors's C code + extern smp_start + call smp_start + jmp $ +%endif + +global cpu_init +cpu_init: +; mov eax, cr0 +; enable caching, disable paging and fpu emulation +; and eax, 0x1ffffffb +; ...and turn on FPU exceptions +; or eax, 0x22 +; mov cr0, eax +; clears the current pgd entry +; xor eax, eax +; mov cr3, eax +; at this stage, we disable the SSE support +; mov eax, cr4 +; and eax, 0xfffbf9ff +; mov cr4, eax +; ret + +; This will set up our new segment registers and is declared in +; C as 'extern void gdt_flush();' +global gdt_flush +extern gp +gdt_flush: + lgdt [gp] + ret + +; determines the current instruction pointer (after the jmp) +global read_eip +read_eip: + pop rax ; Get the return address + jmp rax ; Return. Can't use RET because return + ; address popped off the stack. + +; In just a few pages in this tutorial, we will add our Interrupt +; Service Routines (ISRs) right here! +global isr0 +global isr1 +global isr2 +global isr3 +global isr4 +global isr5 +global isr6 +global isr7 +global isr8 +global isr9 +global isr10 +global isr11 +global isr12 +global isr13 +global isr14 +global isr15 +global isr16 +global isr17 +global isr18 +global isr19 +global isr20 +global isr21 +global isr22 +global isr23 +global isr24 +global isr25 +global isr26 +global isr27 +global isr28 +global isr29 +global isr30 +global isr31 +global isrsyscall + +; 0: Divide By Zero Exception +isr0: + ; isr0 - isr31 are registered as "Interrupt Gate" + ; Therefore, the interrupt flag (IF) is already cleared. + ; cli + push byte 0 ; pseudo error code + push byte 0 + jmp common_stub + +; 1: Debug Exception +isr1: + ; isr0 - isr31 are registered as "Interrupt Gate" + ; Therefore, the interrupt flag (IF) is already cleared. + ; cli + push byte 0 ; pseudo error code + push byte 1 + jmp common_stub + +; 2: Non Maskable Interrupt Exception +isr2: + ; isr0 - isr31 are registered as "Interrupt Gate" + ; Therefore, the interrupt flag (IF) is already cleared. + ; cli + push byte 0 ; pseudo error code + push byte 2 + jmp common_stub + +; 3: Int 3 Exception +isr3: + ; isr0 - isr31 are registered as "Interrupt Gate" + ; Therefore, the interrupt flag (IF) is already cleared. + ; cli + push byte 0 ; pseudo error code + push byte 3 + jmp common_stub + +; 4: INTO Exception +isr4: + ; isr0 - isr31 are registered as "Interrupt Gate" + ; Therefore, the interrupt flag (IF) is already cleared. + ; cli + push byte 0 ; pseudo error code + push byte 4 + jmp common_stub + +; 5: Out of Bounds Exception +isr5: + ; isr0 - isr31 are registered as "Interrupt Gate" + ; Therefore, the interrupt flag (IF) is already cleared. + ; cli + push byte 0 ; pseudo error code + push byte 5 + jmp common_stub + +; 6: Invalid Opcode Exception +isr6: + ; isr0 - isr31 are registered as "Interrupt Gate" + ; Therefore, the interrupt flag (IF) is already cleared. + ; cli + push byte 0 ; pseudo error code + push byte 6 + jmp common_stub + +; 7: Coprocessor Not Available Exception +isr7: + ; isr0 - isr31 are registered as "Interrupt Gate" + ; Therefore, the interrupt flag (IF) is already cleared. + ; cli + push byte 0 ; pseudo error code + push byte 7 + jmp common_stub + +; 8: Double Fault Exception (With Error Code!) +isr8: + ; isr0 - isr31 are registered as "Interrupt Gate" + ; Therefore, the interrupt flag (IF) is already cleared. + ; cli + push byte 8 + jmp common_stub + +; 9: Coprocessor Segment Overrun Exception +isr9: + ; isr0 - isr31 are registered as "Interrupt Gate" + ; Therefore, the interrupt flag (IF) is already cleared. + ; cli + push byte 0 ; pseudo error code + push byte 9 + jmp common_stub + +; 10: Bad TSS Exception (With Error Code!) +isr10: + ; isr0 - isr31 are registered as "Interrupt Gate" + ; Therefore, the interrupt flag (IF) is already cleared. + ; cli + push byte 10 + jmp common_stub + +; 11: Segment Not Present Exception (With Error Code!) +isr11: + ; isr0 - isr31 are registered as "Interrupt Gate" + ; Therefore, the interrupt flag (IF) is already cleared. + ; cli + push byte 11 + jmp common_stub + +; 12: Stack Fault Exception (With Error Code!) +isr12: + ; isr0 - isr31 are registered as "Interrupt Gate" + ; Therefore, the interrupt flag (IF) is already cleared. + ; cli + push byte 12 + jmp common_stub + +; 13: General Protection Fault Exception (With Error Code!) +isr13: + ; isr0 - isr31 are registered as "Interrupt Gate" + ; Therefore, the interrupt flag (IF) is already cleared. + ; cli + push byte 13 + jmp common_stub + +; 14: Page Fault Exception (With Error Code!) +isr14: + ; isr0 - isr31 are registered as "Interrupt Gate" + ; Therefore, the interrupt flag (IF) is already cleared. + ; cli + push byte 14 + jmp common_stub + +; 15: Reserved Exception +isr15: + ; isr0 - isr31 are registered as "Interrupt Gate" + ; Therefore, the interrupt flag (IF) is already cleared. + ; cli + push byte 0 ; pseudo error code + push byte 15 + jmp common_stub + +; 16: Floating Point Exception +isr16: + ; isr0 - isr31 are registered as "Interrupt Gate" + ; Therefore, the interrupt flag (IF) is already cleared. + ; cli + push byte 0 ; pseudo error code + push byte 16 + jmp common_stub + +; 17: Alignment Check Exception +isr17: + ; isr0 - isr31 are registered as "Interrupt Gate" + ; Therefore, the interrupt flag (IF) is already cleared. + ; cli + push byte 17 + jmp common_stub + +; 18: Machine Check Exception +isr18: + ; isr0 - isr31 are registered as "Interrupt Gate" + ; Therefore, the interrupt flag (IF) is already cleared. + ; cli + push byte 0 ; pseudo error code + push byte 18 + jmp common_stub + +; 19: Reserved +isr19: + ; isr0 - isr31 are registered as "Interrupt Gate" + ; Therefore, the interrupt flag (IF) is already cleared. + ; cli + push byte 0 ; pseudo error code + push byte 19 + jmp common_stub + +; 20: Reserved +isr20: + ; isr0 - isr31 are registered as "Interrupt Gate" + ; Therefore, the interrupt flag (IF) is already cleared. + ; cli + push byte 0 ; pseudo error code + push byte 20 + jmp common_stub + +; 21: Reserved +isr21: + ; isr0 - isr31 are registered as "Interrupt Gate" + ; Therefore, the interrupt flag (IF) is already cleared. + ; cli + push byte 0 ; pseudo error code + push byte 21 + jmp common_stub + +; 22: Reserved +isr22: + ; isr0 - isr31 are registered as "Interrupt Gate" + ; Therefore, the interrupt flag (IF) is already cleared. + ; cli + push byte 0 ; pseudo error code + push byte 22 + jmp common_stub + +; 23: Reserved +isr23: + ; isr0 - isr31 are registered as "Interrupt Gate" + ; Therefore, the interrupt flag (IF) is already cleared. + ; cli + push byte 0 ; pseudo error code + push byte 23 + jmp common_stub + +; 24: Reserved +isr24: + ; isr0 - isr31 are registered as "Interrupt Gate" + ; Therefore, the interrupt flag (IF) is already cleared. + ; cli + push byte 0 ; pseudo error code + push byte 24 + jmp common_stub + +; 25: Reserved +isr25: + ; isr0 - isr31 are registered as "Interrupt Gate" + ; Therefore, the interrupt flag (IF) is already cleared. + ; cli + push byte 0 ; pseudo error code + push byte 25 + jmp common_stub + +; 26: Reserved +isr26: + ; isr0 - isr31 are registered as "Interrupt Gate" + ; Therefore, the interrupt flag (IF) is already cleared. + ; cli + push byte 0 ; pseudo error code + push byte 26 + jmp common_stub + +; 27: Reserved +isr27: + ; isr0 - isr31 are registered as "Interrupt Gate" + ; Therefore, the interrupt flag (IF) is already cleared. + ; cli + push byte 0 ; pseudo error code + push byte 27 + jmp common_stub + +; 28: Reserved +isr28: + ; isr0 - isr31 are registered as "Interrupt Gate" + ; Therefore, the interrupt flag (IF) is already cleared. + ; cli + push byte 0 ; pseudo error code + push byte 28 + jmp common_stub + +; 29: Reserved +isr29: + ; isr0 - isr31 are registered as "Interrupt Gate" + ; Therefore, the interrupt flag (IF) is already cleared. + ; cli + push byte 0 ; pseudo error code + push byte 29 + jmp common_stub + +; 30: Reserved +isr30: + ; isr0 - isr31 are registered as "Interrupt Gate" + ; Therefore, the interrupt flag (IF) is already cleared. + ; cli + push byte 0 ; pseudo error code + push byte 30 + jmp common_stub + +; 31: Reserved +isr31: + ; isr0 - isr31 are registered as "Interrupt Gate" + ; Therefore, the interrupt flag (IF) is already cleared. + ; cli + push byte 0 ; pseudo error code + push byte 31 + jmp common_stub + +extern syscall_handler + +; used to realize system calls +isrsyscall: + push r15 + push r14 + push r13 + push r12 + push r11 + push r10 + push r9 + push r8 + push rdi + push rsi + push rbp + push rsp + push rbx + push rdx + push rcx + push rax + + mov rdi, rsp + call syscall_handler + + pop rax + pop rcx + pop rdx + pop rbx + add rsp, 8 + pop rbp + pop rsi + pop rdi + pop r8 + pop r9 + pop r10 + pop r11 + pop r12 + pop r13 + pop r14 + iretq + +global irq0 +global irq1 +global irq2 +global irq3 +global irq4 +global irq5 +global irq6 +global irq7 +global irq8 +global irq9 +global irq10 +global irq11 +global irq12 +global irq13 +global irq14 +global irq15 +global irq16 +global irq17 +global irq18 +global irq19 +global irq20 +global irq21 +global irq22 +global irq23 +global apic_timer +global apic_lint0 +global apic_lint1 +global apic_error +global apic_svr + +; 32: IRQ0 +irq0: + ; irq0 - irq15 are registered as "Interrupt Gate" + ; Therefore, the interrupt flag (IF) is already cleared. + ; cli + push byte 0 ; pseudo error code + push byte 32 + jmp common_stub + +; 33: IRQ1 +irq1: + ; irq0 - irq15 are registered as "Interrupt Gate" + ; Therefore, the interrupt flag (IF) is already cleared. + ; cli + push byte 0 ; pseudo error code + push byte 33 + jmp common_stub + +; 34: IRQ2 +irq2: + ; irq0 - irq15 are registered as "Interrupt Gate" + ; Therefore, the interrupt flag (IF) is already cleared. + ; cli + push byte 0 ; pseudo error code + push byte 34 + jmp common_stub + +; 35: IRQ3 +irq3: + ; irq0 - irq15 are registered as "Interrupt Gate" + ; Therefore, the interrupt flag (IF) is already cleared. + ; cli + push byte 0 ; pseudo error code + push byte 35 + jmp common_stub + +; 36: IRQ4 +irq4: + ; irq0 - irq15 are registered as "Interrupt Gate" + ; Therefore, the interrupt flag (IF) is already cleared. + ; cli + push byte 0 ; pseudo error code + push byte 36 + jmp common_stub + +; 37: IRQ5 +irq5: + ; irq0 - irq15 are registered as "Interrupt Gate" + ; Therefore, the interrupt flag (IF) is already cleared. + ; cli + push byte 0 ; pseudo error code + push byte 37 + jmp common_stub + +; 38: IRQ6 +irq6: + ; irq0 - irq15 are registered as "Interrupt Gate" + ; Therefore, the interrupt flag (IF) is already cleared. + ; cli + push byte 0 ; pseudo error code + push byte 38 + jmp common_stub + +; 39: IRQ7 +irq7: + ; irq0 - irq15 are registered as "Interrupt Gate" + ; Therefore, the interrupt flag (IF) is already cleared. + ; cli + push byte 0 ; pseudo error code + push byte 39 + jmp common_stub + +; 40: IRQ8 +irq8: + ; irq0 - irq15 are registered as "Interrupt Gate" + ; Therefore, the interrupt flag (IF) is already cleared. + ; cli + push byte 0 ; pseudo error code + push byte 40 + jmp common_stub + +; 41: IRQ9 +irq9: + ; irq0 - irq15 are registered as "Interrupt Gate" + ; Therefore, the interrupt flag (IF) is already cleared. + ; cli + push byte 0 ; pseudo error code + push byte 41 + jmp common_stub + +; 42: IRQ10 +irq10: + ; irq0 - irq15 are registered as "Interrupt Gate" + ; Therefore, the interrupt flag (IF) is already cleared. + ; cli + push byte 0 ; pseudo error code + push byte 42 + jmp common_stub + +; 43: IRQ11 +irq11: + ; irq0 - irq15 are registered as "Interrupt Gate" + ; Therefore, the interrupt flag (IF) is already cleared. + ; cli + push byte 0 ; pseudo error code + push byte 43 + jmp common_stub + +; 44: IRQ12 +irq12: + ; irq0 - irq15 are registered as "Interrupt Gate" + ; Therefore, the interrupt flag (IF) is already cleared. + ; cli + push byte 0 ; pseudo error code + push byte 44 + jmp common_stub + +; 45: IRQ13 +irq13: + ; irq0 - irq15 are registered as "Interrupt Gate" + ; Therefore, the interrupt flag (IF) is already cleared. + ; cli + push byte 0 ; pseudo error code + push byte 45 + jmp common_stub + +; 46: IRQ14 +irq14: + ; irq0 - irq15 are registered as "Interrupt Gate" + ; Therefore, the interrupt flag (IF) is already cleared. + ; cli + push byte 0 ; pseudo error code + push byte 46 + jmp common_stub + +; 47: IRQ15 +irq15: + ; irq0 - irq15 are registered as "Interrupt Gate" + ; Therefore, the interrupt flag (IF) is already cleared. + ; cli + push byte 0 ; pseudo error code + push byte 47 + jmp common_stub + +; 48: IRQ16 +irq16: + ; irq16 - irq23 are registered as "Interrupt Gate" + ; Therefore, the interrupt flag (IF) is already cleared. + ; cli + push byte 0 ; pseudo error code + push byte 48 + jmp common_stub + +; 49: IRQ17 +irq17: + ; irq16- irq23 are registered as "Interrupt Gate" + ; Therefore, the interrupt flag (IF) is already cleared. + ; cli + push byte 0 ; pseudo error code + push byte 49 + jmp common_stub + +; 50: IRQ18 +irq18: + ; irq16 - irq23 are registered as "Interrupt Gate" + ; Therefore, the interrupt flag (IF) is already cleared. + ; cli + push byte 0 ; pseudo error code + push byte 50 + jmp common_stub + +; 51: IRQ19 +irq19: + ; irq16 - irq23 are registered as "Interrupt Gate" + ; Therefore, the interrupt flag (IF) is already cleared. + ; cli + push byte 0 ; pseudo error code + push byte 51 + jmp common_stub + +; 52: IRQ20 +irq20: + ; irq16- irq23 are registered as "Interrupt Gate" + ; Therefore, the interrupt flag (IF) is already cleared. + ; cli + push byte 0 ; pseudo error code + push byte 52 + jmp common_stub + +; 53: IRQ21 +irq21: + ; irq16 - irq23 are registered as "Interrupt Gate" + ; Therefore, the interrupt flag (IF) is already cleared. + ; cli + push byte 0 ; error code + push byte 53 + jmp common_stub + +; 54: IRQ22 +irq22: + ; irq16- irq23 are registered as "Interrupt Gate" + ; Therefore, the interrupt flag (IF) is already cleared. + ; cli + push byte 0 ; pseudo error code + push byte 54 + jmp common_stub + +; 55: IRQ23 +irq23: + ; irq16 - irq23 are registered as "Interrupt Gate" + ; Therefore, the interrupt flag (IF) is already cleared. + ; cli + push byte 0 ; pseudo error code + push byte 55 + jmp common_stub + +apic_timer: + ; apic timer is registered as "Interrupt Gate" + ; Therefore, the interrupt flag (IF) is already cleared. + ; cli + push byte 0 ; pseudo error code + push byte 123 + jmp common_stub + +apic_lint0: + ; lint0 is registered as "Interrupt Gate" + ; Therefore, the interrupt flag (IF) is already cleared. + ; cli + push byte 0 ; pseudo error code + push byte 124 + jmp common_stub + +apic_lint1: + ; lint1 is registered as "Interrupt Gate" + ; Therefore, the interrupt flag (IF) is already cleared. + ; cli + push byte 0 ; pseudo error code + push byte 125 + jmp common_stub + +apic_error: + ; LVT error interrupt is registered as "Interrupt Gate" + ; Therefore, the interrupt flag (IF) is already cleared. + ; cli + push byte 0 ; pseudo error code + push byte 126 + jmp common_stub + +apic_svr: + ; SVR is registered as "Interrupt Gate" + ; Therefore, the interrupt flag (IF) is already cleared. + ; cli + push byte 0 ; pseudo error code + push byte 127 + jmp common_stub + +extern irq_handler +extern get_current_stack +extern finish_task_switch + +global switch_context +ALIGN 8 +switch_context: + ; create on the stack a pseudo interrupt + ; afterwards, we switch to the task with iret + mov rax, rdi ; rdi contains the address to store the old rsp + push QWORD 0x10 ; SS + push rsp ; RSP + add QWORD [rsp], 8*1 + pushf ; RFLAGS + push QWORD 0x08 ; CS + push QWORD rollback ; RIP + push QWORD 0x00 ; Interrupt number + push QWORD 0x00edbabe ; Error code + push rax + push rcx + push rdx + push rbx + push rsp + push rbp + push rsi + push rdi + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + + jmp common_switch + +ALIGN 8 +rollback: + ret + +ALIGN 8 +common_stub: + push rax + push rcx + push rdx + push rbx + push rsp + push rbp + push rsi + push rdi + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + + ; use the same handler for interrupts and exceptions + mov rdi, rsp + call irq_handler + + cmp rax, 0 + je no_context_switch + +common_switch: + mov [rax], rsp ; store old rsp + call get_current_stack ; get new rsp + xchg rax, rsp + + ; set task switched flag + mov rax, cr0 + or eax, 8 + mov cr0, rax + + ; call cleanup code + call finish_task_switch + +no_context_switch: + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop rdi + pop rsi + pop rbp + add rsp, 8 + pop rbx + pop rdx + pop rcx + pop rax + + add rsp, 16 + iretq + +SECTION .data +global boot_stack +ALIGN 4096 +boot_stack: +TIMES (MAX_CORES*KERNEL_STACK_SIZE) DB 0xcd + +SECTION .note.GNU-stack noalloc noexec nowrite progbits diff --git a/arch/x86/kernel/gdt.c b/arch/x86/kernel/gdt.c index 12631a65..9e1c30df 100644 --- a/arch/x86/kernel/gdt.c +++ b/arch/x86/kernel/gdt.c @@ -17,6 +17,7 @@ * This file is part of MetalSVM. */ +#include #include #include #include @@ -27,9 +28,7 @@ #include gdt_ptr_t gp; -static tss_t task_state_segments[MAX_TASKS] __attribute__ ((aligned (PAGE_SIZE))); -static unsigned char kstacks[MAX_TASKS][KERNEL_STACK_SIZE] __attribute__ ((aligned (PAGE_SIZE))) = {[0 ... MAX_TASKS-1][0 ... KERNEL_STACK_SIZE-1] = 0xCD}; -uint32_t default_stack_pointer = (uint32_t) kstacks[0] + KERNEL_STACK_SIZE - sizeof(size_t); +static tss_t task_state_segments[MAX_CORES] __attribute__ ((aligned (PAGE_SIZE))); // currently, our kernel has full access to the ioports static gdt_entry_t gdt[GDT_ENTRIES] = {[0 ... GDT_ENTRIES-1] = {0, 0, 0, 0, 0, 0}}; @@ -39,128 +38,156 @@ static gdt_entry_t gdt[GDT_ENTRIES] = {[0 ... GDT_ENTRIES-1] = {0, 0, 0, 0, 0, */ extern void gdt_flush(void); -/* - * This is defined in entry.asm. We use this for a - * hardware-based task switch. - */ -extern void tss_switch(uint32_t id); - -size_t get_stack(uint32_t id) +size_t* get_current_stack(void) { - if (BUILTIN_EXPECT(id >= MAX_TASKS, 0)) - return -EINVAL; - return (size_t) kstacks[id] + KERNEL_STACK_SIZE - sizeof(size_t); -} + task_t* curr_task = per_core(current_task); -int register_task(task_t* task) { - uint16_t sel; - uint32_t id = task->id; + // determine and set esp0 +#ifdef CONFIG_X86_32 + task_state_segments[CORE_ID].esp0 = (size_t) curr_task->stack + KERNEL_STACK_SIZE - 16; // => stack is 16byte aligned +#else + task_state_segments[CORE_ID].rsp0 = (size_t) curr_task->stack + KERNEL_STACK_SIZE - 16; // => stack is 16byte aligned +#endif - if (BUILTIN_EXPECT(!task, 0)) - return -EINVAL; + // use new page table + write_cr3(virt_to_phys((size_t)curr_task->pgd)); - sel = (task->id+5) << 3; - asm volatile ("mov %0, %%ax; ltr %%ax" : : "ir"(sel) : "%eax"); - - // initialize the static elements of a TSS - task_state_segments[id].cr3 = (uint32_t) (task->pgd); - task_state_segments[id].ss0 = 0x10; - - return 0; + return curr_task->last_stack_pointer; } int arch_fork(task_t* task) { - uint16_t cs = 0x08; - uint16_t ds = 0x10; - uint32_t id; + struct state* state; task_t* curr_task = per_core(current_task); + size_t esp, state_size; if (BUILTIN_EXPECT(!task, 0)) return -EINVAL; - id = task->id; + + if (BUILTIN_EXPECT(!task->stack, 0)) + return -EINVAL; + +#ifdef CONFIG_X86_32 + state_size = sizeof(struct state) - 2*sizeof(size_t); +#else + state_size = sizeof(struct state); +#endif // copy kernel stack of the current task - memcpy(kstacks[id], kstacks[curr_task->id], KERNEL_STACK_SIZE); + mb(); + memcpy(task->stack, curr_task->stack, KERNEL_STACK_SIZE); - // reset TSS - memset(task_state_segments+id, 0x00, sizeof(tss_t)); +#ifdef CONFIG_X86_32 + asm volatile ("mov %%esp, %0" : "=m"(esp)); + esp -= (size_t) curr_task->stack; + esp += (size_t) task->stack; - // set default values of all registers - task_state_segments[id].cs = cs; - task_state_segments[id].ss = ds; - task_state_segments[id].ds = ds; - task_state_segments[id].fs = ds; - task_state_segments[id].gs = ds; - task_state_segments[id].es = ds; - task_state_segments[id].cr3 = (uint32_t) (virt_to_phys((size_t)task->pgd)); - task_state_segments[id].ss0 = ds; - task_state_segments[id].esp0 = (uint32_t) kstacks[id] + KERNEL_STACK_SIZE - sizeof(size_t); + state = (struct state*) (esp - state_size); + //memset(state, 0x00, state_size); - // save curret task context - asm volatile("mov %%esp, %0" : "=r"(task_state_segments[id].esp)); - task_state_segments[id].esp -= (uint32_t) kstacks[curr_task->id]; - task_state_segments[id].esp += (uint32_t) kstacks[id]; - - asm volatile ("pusha"); - asm volatile ("pop %0" : "=r"(task_state_segments[id].edi)); - asm volatile ("pop %0" : "=r"(task_state_segments[id].esi)); - asm volatile ("pop %0" : "=r"(task_state_segments[id].ebp)); -#ifdef WITH_FRAME_POINTER - task_state_segments[id].ebp -= (uint32_t) kstacks[curr_task->id]; - task_state_segments[id].ebp += (uint32_t) kstacks[id]; -#endif + asm volatile ("pusha; pop %0" : "=m"(state->edi)); + asm volatile ("pop %0" : "=m"(state->esi)); + asm volatile ("pop %0" : "=m"(state->ebp)); asm volatile ("add $4, %%esp" ::: "%esp"); - asm volatile ("pop %0" : "=r"(task_state_segments[id].ebx)); - asm volatile ("pop %0" : "=r"(task_state_segments[id].edx)); - asm volatile ("pop %0" : "=r"(task_state_segments[id].ecx)); - asm volatile ("pop %0" : "=r"(task_state_segments[id].eax)); + asm volatile ("pop %0" : "=m"(state->ebx)); + asm volatile ("pop %0" : "=m"(state->edx)); + asm volatile ("pop %0" : "=m"(state->ecx)); + asm volatile ("pop %0" : "=m"(state->eax)); + state->esp = esp; + task->last_stack_pointer = (size_t*) state; + state->int_no = 0xB16B00B5; + state->error = 0xC03DB4B3; + state->cs = 0x08; // store the current EFLAGS - asm volatile ("pushf; pop %%eax" : "=a"(task_state_segments[id].eflags)); - // This will be the entry point for the new task. - asm volatile ("call read_eip" : "=a"(task_state_segments[id].eip)); + asm volatile ("pushf; pop %0" : "=m"(state->eflags)); + // enable interrupts + state->eflags |= (1 << 9); + // This will be the entry point for the new task. read_ip cleanups the stack + asm volatile ("push %0; call read_ip" :: "r"(&state->eip) : "%eax"); +#else +#warning Currently, not supported! + return -1; +#endif return 0; } -int create_default_frame(task_t* task, internal_entry_point_t ep, void* arg) +int create_default_frame(task_t* task, entry_point_t ep, void* arg) { - uint16_t cs = 0x08; - uint16_t ds = 0x10; - uint32_t id; + size_t *stack; + struct state *stptr; + size_t state_size; if (BUILTIN_EXPECT(!task, 0)) return -EINVAL; - id = task->id; - /* reset buffers */ - memset(task_state_segments+id, 0x00, sizeof(tss_t)); - memset(kstacks[id], 0xCD, KERNEL_STACK_SIZE); + if (BUILTIN_EXPECT(!task->stack, 0)) + return -EINVAL; - /* set default values of all registers */ - task_state_segments[id].cs = cs; - task_state_segments[id].ss = ds; - task_state_segments[id].ds = ds; - task_state_segments[id].fs = ds; - task_state_segments[id].gs = ds; - task_state_segments[id].es = ds; - task_state_segments[id].eflags = 0x1002; // 0x1202; - task_state_segments[id].cr3 = (uint32_t) (virt_to_phys((size_t)task->pgd)); - task_state_segments[id].eip = (uint32_t) ep; - task_state_segments[id].esp = (uint32_t) kstacks[id] + KERNEL_STACK_SIZE - sizeof(size_t); - - /* build default stack frame */ - *((size_t*)task_state_segments[id].esp) = 0xDEADBEAF; /* dead-end */ - task_state_segments[id].ebp = task_state_segments[id].esp; - task_state_segments[id].esp -= sizeof(size_t); - *((size_t*)task_state_segments[id].esp) = (size_t) arg; - task_state_segments[id].esp -= sizeof(size_t); - *((size_t*)task_state_segments[id].esp) = (size_t) leave_kernel_task; + memset(task->stack, 0xCD, KERNEL_STACK_SIZE); - /* setup for the kernel stack frame */ - task_state_segments[id].ss0 = 0x10; - task_state_segments[id].esp0 = (uint32_t) kstacks[id] + KERNEL_STACK_SIZE - sizeof(size_t); + /* The difference between setting up a task for SW-task-switching + * and not for HW-task-switching is setting up a stack and not a TSS. + * This is the stack which will be activated and popped off for iret later. + */ + stack = (size_t*) (task->stack + KERNEL_STACK_SIZE - 16); // => stack is 16byte aligned + + /* The next three things on the stack are a marker for debugging purposes, ... */ + *stack-- = 0xDEADBEEF; +#ifdef CONFIG_X86_32 + /* the first-function-to-be-called's arguments, ... */ + *stack-- = (size_t) arg; +#endif + /* and the "caller" we shall return to. + * This procedure cleans the task after exit. */ + *stack = (size_t) leave_kernel_task; + + /* Next bunch on the stack is the initial register state. + * The stack must look like the stack of a task which was + * scheduled away previously. */ + + /* In 64bit mode, he stack pointer (SS:RSP) is pushed unconditionally on interrupts. + * In legacy modes, this push is conditional and based on a change in current privilege level (CPL).*/ +#ifdef CONFIG_X86_32 + state_size = sizeof(struct state) - 2*sizeof(size_t); +#else + state_size = sizeof(struct state); +#endif + stack = (size_t*) ((size_t) stack - state_size); + + stptr = (struct state *) stack; + memset(stptr, 0x00, state_size); +#ifdef CONFIG_X86_32 + stptr->esp = (size_t)stack + state_size; +#else + stptr->rsp = (size_t)stack + state_size; + /* the first-function-to-be-called's arguments, ... */ + stptr->rdi = (size_t) arg; +#endif + stptr->int_no = 0xB16B00B5; + stptr->error = 0xC03DB4B3; + + /* The instruction pointer shall be set on the first function to be called + * after IRETing */ +#ifdef CONFIG_X86_32 + stptr->eip = (size_t)ep; +#else + stptr->rip = (size_t)ep; +#endif + stptr->cs = 0x08; +#ifdef CONFIG_X86_32 + stptr->eflags = 0x1202; + // the creation of a kernel tasks didn't change the IOPL level + // => useresp & ss is not required +#else + stptr->rflags = 0x1202; + stptr->ss = 0x10; + stptr->userrsp = stptr->rsp; +#endif + + /* Set the task's stack pointer entry to the stack we have crafted right now. */ + task->last_stack_pointer = (size_t*)stack; return 0; } @@ -169,27 +196,24 @@ int create_default_frame(task_t* task, internal_entry_point_t ep, void* arg) static void gdt_set_gate(int num, unsigned long base, unsigned long limit, unsigned char access, unsigned char gran) { - gdt[num] = configure_gdt_entry(base, limit, access, gran); + configure_gdt_entry(&gdt[num], base, limit, access, gran); } -gdt_entry_t configure_gdt_entry(unsigned long base, unsigned long limit, +void configure_gdt_entry(gdt_entry_t *dest_entry, unsigned long base, unsigned long limit, unsigned char access, unsigned char gran) { - gdt_entry_t desc; /* Setup the descriptor base address */ - desc.base_low = (base & 0xFFFF); - desc.base_middle = (base >> 16) & 0xFF; - desc.base_high = (base >> 24) & 0xFF; + dest_entry->base_low = (base & 0xFFFF); + dest_entry->base_middle = (base >> 16) & 0xFF; + dest_entry->base_high = (base >> 24) & 0xFF; /* Setup the descriptor limits */ - desc.limit_low = (limit & 0xFFFF); - desc.granularity = ((limit >> 16) & 0x0F); + dest_entry->limit_low = (limit & 0xFFFF); + dest_entry->granularity = ((limit >> 16) & 0x0F); /* Finally, set up the granularity and access flags */ - desc.granularity |= (gran & 0xF0); - desc.access = access; - - return desc; + dest_entry->granularity |= (gran & 0xF0); + dest_entry->access = access; } /* @@ -202,12 +226,23 @@ gdt_entry_t configure_gdt_entry(unsigned long base, unsigned long limit, void gdt_install(void) { unsigned int i; + unsigned long mode, limit; - memset(task_state_segments, 0x00, MAX_TASKS*sizeof(tss_t)); + memset(task_state_segments, 0x00, MAX_CORES*sizeof(tss_t)); + +#ifdef CONFIG_X86_32 + mode = GDT_FLAG_32_BIT; + limit = 0xFFFFFFFF; +#elif defined(CONFIG_X86_64) + mode = GDT_FLAG_64_BIT; + limit = 0; +#else +#error invalid mode +#endif /* Setup the GDT pointer and limit */ gp.limit = (sizeof(gdt_entry_t) * GDT_ENTRIES) - 1; - gp.base = (unsigned int) &gdt; + gp.base = (size_t) &gdt; /* Our NULL descriptor */ gdt_set_gate(0, 0, 0, 0, 0); @@ -217,40 +252,49 @@ void gdt_install(void) * is 0, the limit is 4 GByte, it uses 4KByte granularity, * uses 32-bit opcodes, and is a Code Segment descriptor. */ - gdt_set_gate(1, 0, 0xFFFFFFFF, + gdt_set_gate(1, 0, limit, GDT_FLAG_RING0 | GDT_FLAG_SEGMENT | GDT_FLAG_CODESEG | GDT_FLAG_PRESENT, - GDT_FLAG_4K_GRAN | GDT_FLAG_32_BIT); + GDT_FLAG_4K_GRAN | mode); /* * The third entry is our Data Segment. It's EXACTLY the * same as our code segment, but the descriptor type in * this entry's access byte says it's a Data Segment */ - gdt_set_gate(2, 0, 0xFFFFFFFF, + gdt_set_gate(2, 0, limit, GDT_FLAG_RING0 | GDT_FLAG_SEGMENT | GDT_FLAG_DATASEG | GDT_FLAG_PRESENT, - GDT_FLAG_4K_GRAN | GDT_FLAG_32_BIT); + GDT_FLAG_4K_GRAN | mode); /* * Create code segement for userspace applications (ring 3) */ - gdt_set_gate(3, 0, 0xFFFFFFFF, + gdt_set_gate(3, 0, limit, GDT_FLAG_RING3 | GDT_FLAG_SEGMENT | GDT_FLAG_CODESEG | GDT_FLAG_PRESENT, - GDT_FLAG_4K_GRAN | GDT_FLAG_32_BIT); + GDT_FLAG_4K_GRAN | mode); /* * Create data segement for userspace applications (ring 3) */ - gdt_set_gate(4, 0, 0xFFFFFFFF, + gdt_set_gate(4, 0, limit, GDT_FLAG_RING3 | GDT_FLAG_SEGMENT | GDT_FLAG_DATASEG | GDT_FLAG_PRESENT, - GDT_FLAG_4K_GRAN | GDT_FLAG_32_BIT); + GDT_FLAG_4K_GRAN | mode); /* * Create TSS for each task at ring0 (we use these segments for task switching) */ - for(i=0; i> 16) & 0xFFFF; + dest_entry->base_lo = (base & 0xFFFF); + dest_entry->base_hi = (base >> 16) & 0xFFFF; /* The segment or 'selector' that this IDT entry will use * is set here, along with any access flags */ - desc.sel = sel; - desc.always0 = 0; - desc.flags = flags; - - return desc; + dest_entry->sel = sel; + dest_entry->always0 = 0; + dest_entry->flags = flags; } /* @@ -67,7 +67,7 @@ idt_entry_t configure_idt_entry(size_t base, unsigned short sel, void idt_set_gate(unsigned char num, size_t base, unsigned short sel, unsigned char flags) { - idt[num] = configure_idt_entry(base, sel, flags); + configure_idt_entry(&idt[num], base, sel, flags); } extern void isrsyscall(void); @@ -82,7 +82,7 @@ void idt_install(void) /* Sets the special IDT pointer up, just like in 'gdt.c' */ idtp.limit = (sizeof(idt_entry_t) * 256) - 1; - idtp.base = (unsigned int)&idt; + idtp.base = (size_t)&idt; /* Add any new ISRs to the IDT here using idt_set_gate */ idt_set_gate(INT_SYSCALL, (size_t)isrsyscall, KERNEL_CODE_SELECTOR, diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index e2cde9d5..b1db9b46 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include @@ -128,6 +129,22 @@ static int irq_remap(void) return 0; } +int disable_timer_irq(void) +{ + if (BUILTIN_EXPECT(apic_is_enabled(), 1)) + return apic_disable_timer(); + + return -EINVAL; +} + +int enable_timer_irq(void) +{ + if (BUILTIN_EXPECT(apic_is_enabled(), 1)) + return apic_enable_timer(); + + return -EINVAL; +} + /** @brief Remap IRQs and install ISRs in IDT * * We first remap the interrupt controllers, and then we install @@ -224,12 +241,12 @@ int irq_init(void) * controller (an IRQ from 8 to 15) gets an interrupt, you need to * acknowledge the interrupt at BOTH controllers, otherwise, you * only send an EOI command to the first controller. If you don't send - * an EOI, it won't raise any more IRQs.\n - * \n + * an EOI, it won't raise any more IRQs. + * * Note: If we enabled the APIC, we also disabled the PIC. Afterwards, * we get no interrupts between 0 and 15. */ -void irq_handler(struct state *s) +size_t** irq_handler(struct state *s) { /* This is a blank function pointer */ void (*handler) (struct state * s); @@ -276,7 +293,9 @@ void irq_handler(struct state *s) leave_handler: // timer interrupt? if ((s->int_no == 32) || (s->int_no == 123)) - scheduler(); // switch to a new task + return scheduler(); // switch to a new task else if ((s->int_no >= 32) && (get_highest_priority() > per_core(current_task)->prio)) - scheduler(); + return scheduler(); + + return NULL; } diff --git a/arch/x86/kernel/isrs.c b/arch/x86/kernel/isrs.c index 73f1e203..7aa20962 100644 --- a/arch/x86/kernel/isrs.c +++ b/arch/x86/kernel/isrs.c @@ -27,13 +27,13 @@ */ #include -#include #include #include #include #include #include #include +#include /* * These are function prototypes for all of the exception @@ -165,27 +165,6 @@ void isrs_install(void) irq_install_handler(7, fpu_handler); } -static void fpu_init(union fpu_state* fpu) -{ - if (has_fxsr()) { - i387_fxsave_t* fx = &fpu->fxsave; - - memset(fx, 0x00, sizeof(i387_fxsave_t)); - fx->cwd = 0x37f; - if (has_xmm()) - fx->mxcsr = 0x1f80; - } else { - i387_fsave_t *fp = &fpu->fsave; - - memset(fp, 0x00, sizeof(i387_fsave_t)); - fp->cwd = 0xffff037fu; - fp->swd = 0xffff0000u; - fp->twd = 0xffffffffu; - fp->fos = 0xffff0000u; - } - -} - static void fpu_handler(struct state *s) { task_t* task = per_core(current_task); @@ -230,9 +209,14 @@ static void fault_handler(struct state *s) { if (s->int_no < 32) { kputs(exception_messages[s->int_no]); +#ifdef CONFIG_X86_32 kprintf(" Exception (%d) at 0x%x:0x%x on core %u, error code 0x%x, eflags 0x%x\n", s->int_no, s->cs, s->eip, CORE_ID, s->error, s->eflags); - +#elif defined(CONFIG_X86_64) + kprintf(" Exception (%d) at 0x%llx:0x%llx on core %u, error code 0x%llx, rflags 0x%llx\n", + s->int_no, s->cs, s->rip, CORE_ID, s->error, s->rflags); +#endif + /* Now, we signalize that we have handled the interrupt */ if (apic_is_enabled()) apic_eoi(); diff --git a/arch/x86/kernel/processor.c b/arch/x86/kernel/processor.c index f923fabb..479e589b 100644 --- a/arch/x86/kernel/processor.c +++ b/arch/x86/kernel/processor.c @@ -19,6 +19,7 @@ #include #include +#include #include #include #include @@ -26,30 +27,103 @@ #include #endif +static void default_mb(void) +{ + asm volatile ("lock; addl $0,0(%%esp)" ::: "memory", "cc"); +} + +static void default_save_fpu_state(union fpu_state* state) +{ + asm volatile ("fnsave %0; fwait" : "=m"((*state).fsave) :: "memory"); +} + +static void default_restore_fpu_state(union fpu_state* state) +{ + asm volatile ("frstor %0" :: "m"(state->fsave)); +} + +static void default_fpu_init(union fpu_state* fpu) +{ + i387_fsave_t *fp = &fpu->fsave; + + memset(fp, 0x00, sizeof(i387_fsave_t)); + fp->cwd = 0xffff037fu; + fp->swd = 0xffff0000u; + fp->twd = 0xffffffffu; + fp->fos = 0xffff0000u; +} + + +func_memory_barrier mb = default_mb; +func_memory_barrier rmb = default_mb; +func_memory_barrier wmb = default_mb; +handle_fpu_state save_fpu_state = default_save_fpu_state; +handle_fpu_state restore_fpu_state = default_restore_fpu_state; +handle_fpu_state fpu_init = default_fpu_init; + +static void mfence(void) { asm volatile("mfence" ::: "memory"); } +static void lfence(void) { asm volatile("lfence" ::: "memory"); } +static void sfence(void) { asm volatile("sfence" ::: "memory"); } + +static void save_fpu_state_fxsr(union fpu_state* state) +{ + asm volatile ("fxsave %0; fnclex" : "=m"((*state).fxsave) :: "memory"); +} + +static void restore_fpu_state_fxsr(union fpu_state* state) +{ + asm volatile ("fxrstor %0" :: "m"(state->fxsave)); +} + +static void fpu_init_fxsr(union fpu_state* fpu) +{ + i387_fxsave_t* fx = &fpu->fxsave; + + memset(fx, 0x00, sizeof(i387_fxsave_t)); + fx->cwd = 0x37f; + if (BUILTIN_EXPECT(has_sse(), 1)) + fx->mxcsr = 0x1f80; +} + cpu_info_t cpu_info = { 0, 0 }; static uint32_t cpu_freq = 0; int cpu_detection(void) { - uint32_t a, b, cr4; + uint32_t a, b; + size_t cr4; cpuid(1, &a, &b, &cpu_info.feature2, &cpu_info.feature1); cr4 = read_cr4(); if (has_fxsr()) cr4 |= 0x200; // set the OSFXSR bit - if (has_xmm()) + if (has_sse()) cr4 |= 0x400; // set the OSXMMEXCPT bit write_cr4(cr4); + if (has_sse()) + wmb = sfence; + + if (has_sse2()) { + rmb = lfence; + mb = mfence; + } + if (has_avx()) kprintf("The CPU owns the Advanced Vector Extensions (AVX). However, MetalSVM doesn't support AVX!\n"); if (has_fpu()) { - kputs("Found and initialize FPU!\n"); + kputs("Found and initialized FPU!\n"); asm volatile ("fninit"); } + if (has_fxsr()) { + save_fpu_state = save_fpu_state_fxsr; + restore_fpu_state = restore_fpu_state_fxsr; + fpu_init = fpu_init_fxsr; + } + return 0; } @@ -75,12 +149,12 @@ uint32_t detect_cpu_frequency(void) while((ticks = get_clock_tick()) - old == 0) HALT; - mb(); + rmb(); start = rdtsc(); /* wait a second to determine the frequency */ while(get_clock_tick() - ticks < TIMER_FREQ) HALT; - mb(); + rmb(); end = rdtsc(); diff = end > start ? end - start : start - end; diff --git a/arch/x86/kernel/string.asm b/arch/x86/kernel/string32.asm similarity index 100% rename from arch/x86/kernel/string.asm rename to arch/x86/kernel/string32.asm diff --git a/arch/x86/kernel/string64.asm b/arch/x86/kernel/string64.asm new file mode 100644 index 00000000..5dc5d855 --- /dev/null +++ b/arch/x86/kernel/string64.asm @@ -0,0 +1,79 @@ +; +; Written by the Chair for Operating Systems, RWTH Aachen University +; +; NO Copyright (C) 2010-2012, Stefan Lankes +; consider these trivial functions to be public domain. +; +; These functions are distributed on an "AS IS" BASIS, +; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +; + +[BITS 64] +SECTION .text +global strcpy +strcpy: + push rdi + +L1: + lodsb + stosb + test al, al + jne L1 + + pop rax + ret + +global strncpy +strncpy: + push rdi + mov rcx, rdx + +L2: + dec rcx + js L3 + lodsb + stosb + test al, al + jne L1 + rep + stosb + +L3: + pop rax + ret + +%if 0 +; The following function is derived from JamesM's kernel development tutorials +; (http://www.jamesmolloy.co.uk/tutorial_html/) +global copy_page_physical +copy_page_physical: + push esi ; According to __cdecl, we must preserve the contents of ESI + push edi ; and EDI. + pushf ; push EFLAGS, so we can pop it and reenable interrupts + ; later, if they were enabled anyway. + cli ; Disable interrupts, so we aren't interrupted. + ; Load these in BEFORE we disable paging! + + mov edi, [esp+12+4] ; Destination address + mov esi, [esp+12+8] ; Source address + + mov edx, cr0 ; Get the control register... + and edx, 0x7fffffff ; and... + mov cr0, edx ; Disable paging. + + cld + mov ecx, 0x400 ; 1024*4bytes = 4096 bytes = page size + rep movsd ; copy page + + mov edx, cr0 ; Get the control register again + or edx, 0x80000000 ; and... + mov cr0, edx ; Enable paging. + + popf ; Pop EFLAGS back. + pop edi ; Get the original value of EDI + pop esi ; and ESI back. + ret + +%endif + +SECTION .note.GNU-stack noalloc noexec nowrite progbits diff --git a/arch/x86/kernel/timer.c b/arch/x86/kernel/timer.c index d6b71d2c..da8595c4 100644 --- a/arch/x86/kernel/timer.c +++ b/arch/x86/kernel/timer.c @@ -39,12 +39,47 @@ static volatile uint64_t timer_ticks = 0; #if MAX_CORES > 1 extern atomic_int32_t cpu_online; #endif +static int8_t use_tickless = 0; +static uint64_t last_rdtsc = 0; uint64_t get_clock_tick(void) { return timer_ticks; } +void start_tickless(void) +{ + use_tickless = 1; + rmb(); + last_rdtsc = rdtsc(); +} + +void end_tickless(void) +{ + use_tickless = 0; + last_rdtsc = 0; +} + +void check_ticks(void) +{ + if (!use_tickless) + return; + +#if MAX_CORES > 1 + if (smp_id() == 0) +#endif + { + uint64_t curr_rdtsc = rdtsc(); + + rmb(); + if (curr_rdtsc - last_rdtsc > 1000000ULL*(uint64_t)get_cpu_frequency() / (uint64_t)TIMER_FREQ) { + timer_ticks++; + last_rdtsc = curr_rdtsc; + rmb(); + } + } +} + int sys_times(struct tms* buffer, clock_t* clock) { if (BUILTIN_EXPECT(!buffer, 0)) @@ -85,12 +120,14 @@ static void timer_handler(struct state *s) // dump_load(); } +#ifndef CONFIG_TICKLESS update_load(); #if MAX_CORES > 1 if (atomic_int32_read(&cpu_online) > 1) load_balancing(); #endif +#endif } int timer_wait(unsigned int ticks) diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile index 09392539..c5b4f064 100644 --- a/arch/x86/mm/Makefile +++ b/arch/x86/mm/Makefile @@ -1,4 +1,4 @@ -C_source := page.c svm.c +C_source := page$(BIT).c svm.c MODULE := arch_x86_mm include $(TOPDIR)/Makefile.inc diff --git a/arch/x86/mm/page.c b/arch/x86/mm/page32.c similarity index 84% rename from arch/x86/mm/page.c rename to arch/x86/mm/page32.c index b721ecf7..a63a53fc 100644 --- a/arch/x86/mm/page.c +++ b/arch/x86/mm/page32.c @@ -57,7 +57,9 @@ extern const void kernel_start; extern const void kernel_end; // boot task's page directory and page directory lock -static page_dir_t boot_pgd = {{[0 ... 1023] = 0}}; +static page_dir_t boot_pgd = {{[0 ... PGT_ENTRIES-1] = 0}}; +static page_table_t pgt_container = {{[0 ... PGT_ENTRIES-1] = 0}}; +static page_table_t boot_pgt[KERNEL_SPACE/(1024*PAGE_SIZE)]; static spinlock_t kslock = SPINLOCK_INIT; static int paging_enabled = 0; @@ -88,8 +90,8 @@ inline static size_t copy_page_table(task_t* task, uint32_t pgd_index, page_tabl if (counter) (*counter)++; - for(i=0; i<1024; i++) { - if (pgt->entries[i] & 0xFFFFF000) { + for(i=0; ientries[i] & PAGE_MASK) { if (!(pgt->entries[i] & PG_USER)) { // Kernel page => copy only page entries new_pgt->entries[i] = pgt->entries[i]; @@ -102,7 +104,7 @@ inline static size_t copy_page_table(task_t* task, uint32_t pgd_index, page_tabl if (counter) (*counter)++; - copy_page_physical((void*)phyaddr, (void*) (pgt->entries[i] & 0xFFFFF000)); + copy_page_physical((void*)phyaddr, (void*) (pgt->entries[i] & PAGE_MASK)); new_pgt->entries[i] = phyaddr | (pgt->entries[i] & 0xFFF); @@ -131,7 +133,7 @@ int create_pgd(task_t* task, int copy) // we already know the virtual address of the "page table container" // (see file header) - pgt_container = (page_table_t*) ((KERNEL_SPACE - PAGE_SIZE) & 0xFFFFF000); + pgt_container = (page_table_t*) ((KERNEL_SPACE - PAGE_SIZE) & PAGE_MASK); // create new page directory for the new task pgd = kmalloc(sizeof(page_dir_t)); @@ -149,7 +151,7 @@ int create_pgd(task_t* task, int copy) spinlock_lock(&kslock); - for(i=0; i<1024; i++) { + for(i=0; ientries[i] = boot_pgd.entries[i]; // only kernel entries will be copied if (pgd->entries[i] && !(pgd->entries[i] & PG_USER)) @@ -159,13 +161,13 @@ int create_pgd(task_t* task, int copy) spinlock_unlock(&kslock); // map page table container at the end of the kernel space - viraddr = (KERNEL_SPACE - PAGE_SIZE) & 0xFFFFF000; + viraddr = (KERNEL_SPACE - PAGE_SIZE) & PAGE_MASK; index1 = viraddr >> 22; index2 = (viraddr >> 12) & 0x3FF; // now, we create a self reference - pgd->entries[index1] = ((size_t) virt_to_phys((size_t) pgt) & 0xFFFFF000)|KERN_TABLE; - pgt->entries[index2] = ((size_t) virt_to_phys((size_t) pgt) & 0xFFFFF000)|KERN_PAGE; + pgd->entries[index1] = ((size_t) virt_to_phys((size_t) pgt) & PAGE_MASK)|KERN_TABLE; + pgt->entries[index2] = ((size_t) virt_to_phys((size_t) pgt) & PAGE_MASK)|KERN_PAGE; task->pgd = pgd; @@ -178,10 +180,10 @@ int create_pgd(task_t* task, int copy) if (!(curr_task->pgd->entries[i] & PG_USER)) continue; - phyaddr = copy_page_table(task, i, (page_table_t*) ((KERNEL_SPACE - 1024*PAGE_SIZE + i*PAGE_SIZE) & 0xFFFFF000), &counter); + phyaddr = copy_page_table(task, i, (page_table_t*) ((KERNEL_SPACE - 1024*PAGE_SIZE + i*PAGE_SIZE) & PAGE_MASK), &counter); if (phyaddr) { - pgd->entries[i] = (phyaddr & 0xFFFFF000) | (curr_task->pgd->entries[i] & 0xFFF); - pgt->entries[i] = (phyaddr & 0xFFFFF000) | KERN_PAGE; + pgd->entries[i] = (phyaddr & PAGE_MASK) | (curr_task->pgd->entries[i] & 0xFFF); + pgt->entries[i] = (phyaddr & PAGE_MASK) | KERN_PAGE; } } @@ -206,9 +208,9 @@ int drop_pgd(void) spinlock_lock(&task->pgd_lock); - for(i=0; i<1024; i++) { + for(i=0; ientries[i] & PG_USER) { - put_page(pgd->entries[i] & 0xFFFFF000); + put_page(pgd->entries[i] & PAGE_MASK); pgd->entries[i] = 0; } } @@ -241,14 +243,14 @@ size_t virt_to_phys(size_t viraddr) index1 = viraddr >> 22; index2 = (viraddr >> 12) & 0x3FF; - if (!(task->pgd->entries[index1] & 0xFFFFF000)) + if (!(task->pgd->entries[index1] & PAGE_MASK)) goto out; - pgt = (page_table_t*) ((KERNEL_SPACE - 1024*PAGE_SIZE + index1*PAGE_SIZE) & 0xFFFFF000); + pgt = (page_table_t*) ((KERNEL_SPACE - 1024*PAGE_SIZE + index1*PAGE_SIZE) & PAGE_MASK); if (!pgt || !(pgt->entries[index2])) goto out; - ret = pgt->entries[index2] & 0xFFFFF000; // determine page frame + ret = pgt->entries[index2] & PAGE_MASK; // determine page frame ret = ret | (viraddr & 0xFFF); // add page offset out: //kprintf("vir %p to phy %p\n", viraddr, ret); @@ -313,9 +315,9 @@ size_t map_region(size_t viraddr, size_t phyaddr, uint32_t npages, uint32_t flag if (paging_enabled) // we already know the virtual address of the "page table container" // (see file header) - pgt_container = (page_table_t*) ((KERNEL_SPACE - PAGE_SIZE) & 0xFFFFF000); + pgt_container = (page_table_t*) ((KERNEL_SPACE - PAGE_SIZE) & PAGE_MASK); else - pgt_container = (page_table_t*) (task->pgd->entries[(KERNEL_SPACE - PAGE_SIZE) >> 22] & 0xFFFFF000); + pgt_container = (page_table_t*) (task->pgd->entries[(KERNEL_SPACE - PAGE_SIZE) >> 22] & PAGE_MASK); if (BUILTIN_EXPECT(!pgt_container, 0)) { spinlock_unlock(pgd_lock); @@ -328,26 +330,26 @@ size_t map_region(size_t viraddr, size_t phyaddr, uint32_t npages, uint32_t flag // clear the page table if (paging_enabled) - memset((void*) ((KERNEL_SPACE - 1024*PAGE_SIZE + index*PAGE_SIZE) & 0xFFFFF000), 0x00, PAGE_SIZE); + memset((void*) ((KERNEL_SPACE - 1024*PAGE_SIZE + index*PAGE_SIZE) & PAGE_MASK), 0x00, PAGE_SIZE); else memset(pgt, 0x00, PAGE_SIZE); - } else pgt = (page_table_t*) (task->pgd->entries[index] & 0xFFFFF000); + } else pgt = (page_table_t*) (task->pgd->entries[index] & PAGE_MASK); /* convert physical address to virtual */ if (paging_enabled) - pgt = (page_table_t*) ((KERNEL_SPACE - 1024*PAGE_SIZE + index*PAGE_SIZE) & 0xFFFFF000); + pgt = (page_table_t*) ((KERNEL_SPACE - 1024*PAGE_SIZE + index*PAGE_SIZE) & PAGE_MASK); index = (viraddr >> 12) & 0x3FF; if (pgt->entries[index] && !(flags & MAP_REMAP)) { spinlock_unlock(pgd_lock); - kprintf("0x%x is already maped\n", viraddr); + kprintf("0x%x is already mapped\n", viraddr); return 0; } if (flags & MAP_USER_SPACE) - pgt->entries[index] = USER_PAGE|(phyaddr & 0xFFFFF000); + pgt->entries[index] = USER_PAGE|(phyaddr & PAGE_MASK); else - pgt->entries[index] = KERN_PAGE|(phyaddr & 0xFFFFF000); + pgt->entries[index] = KERN_PAGE|(phyaddr & PAGE_MASK); if (flags & MAP_NO_CACHE) pgt->entries[index] |= PG_PCD; @@ -364,6 +366,9 @@ size_t map_region(size_t viraddr, size_t phyaddr, uint32_t npages, uint32_t flag if (flags & MAP_SVM_LAZYRELEASE) pgt->entries[index] |= PG_SVM_LAZYRELEASE|PG_PWT; + if (flags & MAP_SVM_INIT) + pgt->entries[index] |= PG_SVM_INIT; + if (flags & MAP_NO_ACCESS) pgt->entries[index] &= ~PG_PRESENT; @@ -405,15 +410,17 @@ int change_page_permissions(size_t start, size_t end, uint32_t flags) index2 = (viraddr >> 12) & 0x3FF; while ((viraddr < end) && (index2 < 1024)) { - pgt = (page_table_t*) (page_table_t*) ((KERNEL_SPACE - 1024*PAGE_SIZE + index1*PAGE_SIZE) & 0xFFFFF000); + pgt = (page_table_t*) (page_table_t*) ((KERNEL_SPACE - 1024*PAGE_SIZE + index1*PAGE_SIZE) & PAGE_MASK); if (pgt && pgt->entries[index2]) { - phyaddr = pgt->entries[index2] & 0xFFFFF000; + phyaddr = pgt->entries[index2] & PAGE_MASK; newflags = pgt->entries[index2] & 0xFFF; // get old flags - if ((newflags & PG_SVM_STRONG) && !(newflags & PG_PRESENT) && (flags & (VMA_READ|VMA_WRITE) && !(flags & VMA_NOACCESS))) - newflags |= PG_PRESENT; - else if ((newflags & PG_SVM_STRONG) && (newflags & PG_PRESENT) && (flags & VMA_NOACCESS)) - newflags &= ~PG_PRESENT; + if (!(newflags & PG_SVM_INIT)) { + if ((newflags & PG_SVM_STRONG) && !(newflags & PG_PRESENT) && (flags & (VMA_READ|VMA_WRITE) && !(flags & VMA_NOACCESS))) + newflags |= PG_PRESENT; + else if ((newflags & PG_SVM_STRONG) && (newflags & PG_PRESENT) && (flags & VMA_NOACCESS)) + newflags &= ~PG_PRESENT; + } // update flags if (!(flags & VMA_WRITE)) { @@ -430,7 +437,7 @@ int change_page_permissions(size_t start, size_t end, uint32_t flags) #endif } - pgt->entries[index2] = (newflags & 0xFFF) | (phyaddr & 0xFFFFF000); + pgt->entries[index2] = (newflags & 0xFFF) | (phyaddr & PAGE_MASK); tlb_flush_one_page(viraddr); } @@ -464,12 +471,12 @@ size_t vm_alloc(uint32_t npages, uint32_t flags) if (flags & MAP_KERNEL_SPACE) { pgd_lock = &kslock; - start = (((size_t) &kernel_end) + PAGE_SIZE) & 0xFFFFF000; - end = (KERNEL_SPACE - 2*PAGE_SIZE) & 0xFFFFF000; // we need 1 PAGE for our PGTs + start = (((size_t) &kernel_end) + PAGE_SIZE) & PAGE_MASK; + end = (KERNEL_SPACE - 2*PAGE_SIZE) & PAGE_MASK; // we need 1 PAGE for our PGTs } else { pgd_lock = &task->pgd_lock; - start = KERNEL_SPACE & 0xFFFFF000; - end = 0xFFFFF000; + start = KERNEL_SPACE & PAGE_MASK; + end = PAGE_MASK; } if (BUILTIN_EXPECT(!npages, 0)) @@ -483,7 +490,7 @@ size_t vm_alloc(uint32_t npages, uint32_t flags) index1 = i >> 22; index2 = (i >> 12) & 0x3FF; - pgt = (page_table_t*) ((KERNEL_SPACE - 1024*PAGE_SIZE + index1*PAGE_SIZE) & 0xFFFFF000); + pgt = (page_table_t*) ((KERNEL_SPACE - 1024*PAGE_SIZE + index1*PAGE_SIZE) & PAGE_MASK); if (!pgt || !(pgt->entries[index2])) { i+=PAGE_SIZE; j++; @@ -526,7 +533,7 @@ int unmap_region(size_t viraddr, uint32_t npages) index1 = viraddr >> 22; index2 = (viraddr >> 12) & 0x3FF; - pgt = (page_table_t*) ((KERNEL_SPACE - 1024*PAGE_SIZE + index1*PAGE_SIZE) & 0xFFFFF000); + pgt = (page_table_t*) ((KERNEL_SPACE - 1024*PAGE_SIZE + index1*PAGE_SIZE) & PAGE_MASK); if (!pgt) continue; pgt->entries[index2] &= ~PG_PRESENT; @@ -565,7 +572,7 @@ int vm_free(size_t viraddr, uint32_t npages) index1 = viraddr >> 22; index2 = (viraddr >> 12) & 0x3FF; - pgt = (page_table_t*) ((KERNEL_SPACE - 1024*PAGE_SIZE + index1*PAGE_SIZE) & 0xFFFFF000); + pgt = (page_table_t*) ((KERNEL_SPACE - 1024*PAGE_SIZE + index1*PAGE_SIZE) & PAGE_MASK); if (!pgt) continue; pgt->entries[index2] = 0; @@ -598,7 +605,7 @@ int print_paging_tree(size_t viraddr) kprintf("\tPage directory entry %u: ", index1); if (pgd) { kprintf("0x%0x\n", pgd->entries[index1]); - pgt = (page_table_t*) (pgd->entries[index1] & 0xFFFFF000); + pgt = (page_table_t*) (pgd->entries[index1] & PAGE_MASK); } else kputs("invalid page directory\n"); @@ -629,7 +636,7 @@ static void pagefault_handler(struct state *s) #endif if ((viraddr >= task->start_heap) && (viraddr <= task->end_heap) && (viraddr > KERNEL_SPACE)) { - viraddr = viraddr & 0xFFFFF000; + viraddr = viraddr & PAGE_MASK; phyaddr = get_page(); if (BUILTIN_EXPECT(!phyaddr, 0)) @@ -648,13 +655,19 @@ static void pagefault_handler(struct state *s) // does our SVM system need to handle this page fault? index1 = viraddr >> 22; index2 = (viraddr >> 12) & 0x3FF; - if (!pgd || !(pgd->entries[index1] & 0xFFFFF000)) + if (!pgd || !(pgd->entries[index1] & PAGE_MASK)) goto default_handler; - pgt = (page_table_t*) ((KERNEL_SPACE - 1024*PAGE_SIZE + index1*PAGE_SIZE) & 0xFFFFF000); + pgt = (page_table_t*) ((KERNEL_SPACE - 1024*PAGE_SIZE + index1*PAGE_SIZE) & PAGE_MASK); if (!pgt || !(pgt->entries[index2])) goto default_handler; + if (pgt->entries[index2] & PG_SVM_INIT) { + if (BUILTIN_EXPECT(!svm_alloc_page(viraddr, pgt), 1)) + return; + else + goto default_handler; + } if (pgt->entries[index2] & PG_SVM_STRONG) - if (!svm_access_request(viraddr)) + if (BUILTIN_EXPECT(!svm_access_request(viraddr), 1)) return; #endif @@ -678,12 +691,7 @@ int arch_paging_init(void) irq_install_handler(14, pagefault_handler); // Create a page table to reference to the other page tables - pgt = (page_table_t*) get_page(); - if (!pgt) { - kputs("arch_paging_init: Not enough memory!\n"); - return -ENOMEM; - } - memset(pgt, 0, PAGE_SIZE); + pgt = &pgt_container; // map this table at the end of the kernel space viraddr = KERNEL_SPACE - PAGE_SIZE; @@ -691,21 +699,16 @@ int arch_paging_init(void) index2 = (viraddr >> 12) & 0x3FF; // now, we create a self reference - per_core(current_task)->pgd->entries[index1] = (((size_t) pgt) & 0xFFFFF000)|KERN_TABLE; + per_core(current_task)->pgd->entries[index1] = (((size_t) pgt) & PAGE_MASK)|KERN_TABLE; pgt->entries[index2] = ((size_t) pgt & 0xFFFFF000)|KERN_PAGE; // create the other PGTs for the kernel space for(i=0; ipgd->entries[i] = (phyaddr & 0xFFFFF000)|KERN_TABLE; - pgt->entries[i] = (phyaddr & 0xFFFFF000)|KERN_PAGE; + memset((void*) phyaddr, 0x00, sizeof(page_table_t)); + per_core(current_task)->pgd->entries[i] = (phyaddr & PAGE_MASK)|KERN_TABLE; + pgt->entries[i] = (phyaddr & PAGE_MASK)|KERN_PAGE; } /* @@ -735,7 +738,7 @@ int arch_paging_init(void) * of course, mb_info has to map into the kernel space */ if (mb_info) - map_region((size_t) mb_info & 0xFFFFF000, (size_t) mb_info & 0xFFFFF000, 1, MAP_KERNEL_SPACE); + map_region((size_t) mb_info & PAGE_MASK, (size_t) mb_info & PAGE_MASK, 1, MAP_KERNEL_SPACE); #if 0 /* @@ -762,19 +765,20 @@ int arch_paging_init(void) * Therefore, we map these moduels into the kernel space. */ if (mb_info && (mb_info->flags & MULTIBOOT_INFO_MODS)) { - multiboot_module_t* mmodule = (multiboot_module_t*) mb_info->mods_addr; + multiboot_module_t* mmodule = (multiboot_module_t*) ((size_t) mb_info->mods_addr); npages = mb_info->mods_count * sizeof(multiboot_module_t) >> PAGE_SHIFT; if (mb_info->mods_count * sizeof(multiboot_module_t) & (PAGE_SIZE-1)) npages++; - map_region((size_t) (mb_info->mods_addr), (size_t) (mb_info->mods_addr), npages, MAP_KERNEL_SPACE); + map_region((size_t) mb_info->mods_addr, (size_t) mb_info->mods_addr, npages, MAP_KERNEL_SPACE); for(i=0; imods_count; i++, mmodule++) { // map physical address to the same virtual address npages = (mmodule->mod_end - mmodule->mod_start) >> PAGE_SHIFT; if (mmodule->mod_end & (PAGE_SIZE-1)) npages++; - map_region((size_t) (mmodule->mod_start), (size_t) (mmodule->mod_start), npages, MAP_KERNEL_SPACE); + kprintf("Map module %s at 0x%x (%u pages)\n", (char*) mmodule->cmdline, mmodule->mod_start, npages); + map_region((size_t) mmodule->mod_start, (size_t) mmodule->mod_start, npages, MAP_KERNEL_SPACE); } } #endif @@ -816,9 +820,9 @@ int arch_paging_init(void) /* * we turned on paging - * => now, we are able to register our task for Task State Switching + * => now, we are able to register our task */ - register_task(per_core(current_task)); + register_task(); // APIC registers into the kernel address space map_apic(); diff --git a/arch/x86/mm/page64.c b/arch/x86/mm/page64.c new file mode 100644 index 00000000..5f973f4c --- /dev/null +++ b/arch/x86/mm/page64.c @@ -0,0 +1,646 @@ +/* + * Copyright 2012 Stefan Lankes, Chair for Operating Systems, + * RWTH Aachen University + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * This file is part of MetalSVM. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef CONFIG_ROCKCREEK +#include +#include +#include +#include +#endif + +/* + * Virtual Memory Layout of the standard configuration + * (1 GB kernel space) + * + * 0x00000000 - 0x000FFFFF: reserved for IO devices (16MB) + * 0x00100000 - 0x0DEADFFF: Kernel (size depends on the configuration) (221MB) + * 0x0DEAE000 - 0x3FFFFFFF: Kernel heap + * + */ + +/* + * Note that linker symbols are not variables, they have no memory allocated for + * maintaining a value, rather their address is their value. + */ +extern const void kernel_start; +extern const void kernel_end; + +// boot task's page directory and page directory lock +extern page_dir_t boot_pgd; +static spinlock_t kslock = SPINLOCK_INIT; +static int paging_enabled = 0; + +page_dir_t* get_boot_pgd(void) +{ + return &boot_pgd; +} + +int create_pgd(task_t* task, int copy) +{ + // Currently, we support only kernel tasks + // => all tasks are able to use the same pgd + + if (BUILTIN_EXPECT(!paging_enabled, 0)) + return -EINVAL; + + task->pgd = get_boot_pgd(); + + return 0; +} + +/* + * drops all page frames and the PGD of a user task + */ +int drop_pgd(void) +{ +#if 0 + page_dir_t* pgd = per_core(current_task)->pgd; + size_t phy_pgd = virt_to_phys((size_t) pgd); + task_t* task = per_core(current_task); + uint32_t i; + + if (BUILTIN_EXPECT(pgd == &boot_pgd, 0)) + return -EINVAL; + + spinlock_lock(&task->pgd_lock); + + for(i=0; i<1024; i++) { + if (pgd->entries[i] & PG_USER) { + put_page(pgd->entries[i] & PAGE_MASK); + pgd->entries[i] = 0; + } + } + + // freeing the page directory + put_page(phy_pgd); + + task->pgd = NULL; + + spinlock_unlock(&task->pgd_lock); +#endif + + return 0; +} + +size_t virt_to_phys(size_t viraddr) +{ + task_t* task = per_core(current_task); + uint16_t idx_pd4 = (viraddr >> 39) & 0x1FF; + uint16_t idx_dirp = (viraddr >> 30) & 0x1FF; + uint16_t idx_dir = (viraddr >> 21) & 0x1FF; + uint16_t idx_table = (viraddr >> 12) & 0x1FF; + page_table_t* pgt; + size_t ret = 0; + + if (!paging_enabled) + return viraddr; + + if (BUILTIN_EXPECT(!task || !task->pgd, 0)) + return 0; + + spinlock_lock(&task->pgd_lock); + + // Currently, we allocate pages only in kernel space. + // => physical address of the page table is identical of the virtual address + pgt = (page_table_t*) (task->pgd->entries[idx_pd4] & PAGE_MASK); + if (!pgt) + goto out; + + pgt = (page_table_t*) (pgt->entries[idx_dirp] & PAGE_MASK); + if (!pgt) + goto out; + + pgt = (page_table_t*) (pgt->entries[idx_dir] & PAGE_MASK); + if (!pgt) + goto out; + + ret = (size_t) (pgt->entries[idx_table] & PAGE_MASK); + if (!ret) + goto out; + + ret = ret | (viraddr & 0xFFF); // add page offset +out: + //kprintf("vir %p to phy %p\n", viraddr, ret); + + spinlock_unlock(&task->pgd_lock); + + return ret; +} + +size_t map_region(size_t viraddr, size_t phyaddr, uint32_t npages, uint32_t flags) +{ + task_t* task = per_core(current_task); + spinlock_t* pgd_lock; + page_table_t* pgt; + size_t i, ret; + + if (BUILTIN_EXPECT(!task || !task->pgd, 0)) + return 0; + + if (BUILTIN_EXPECT(!paging_enabled && (viraddr != phyaddr), 0)) + return 0; + + if (flags & MAP_KERNEL_SPACE) + pgd_lock = &kslock; + else + pgd_lock = &task->pgd_lock; + + spinlock_lock(pgd_lock); + + if (!viraddr) { + viraddr = vm_alloc(npages, flags); + if (BUILTIN_EXPECT(!viraddr, 0)) { + spinlock_unlock(pgd_lock); + kputs("map_region: found no valid virtual address\n"); + return 0; + } + } + + ret = viraddr; + for(i=0; i> 39) & 0x1FF; + uint16_t idx_dirp = (viraddr >> 30) & 0x1FF; + uint16_t idx_dir = (viraddr >> 21) & 0x1FF; + uint16_t idx_table = (viraddr >> 12) & 0x1FF; + + pgt = (page_table_t*) (task->pgd->entries[idx_pd4] & PAGE_MASK); + if (!pgt) { + spinlock_unlock(pgd_lock); + kputs("map_region: out of memory\n"); + return 0; + } + + pgt = (page_table_t*) (pgt->entries[idx_dirp] & PAGE_MASK); + if (!pgt) { + spinlock_unlock(pgd_lock); + kputs("map_region: out of memory\n"); + return 0; + } + + pgt = (page_table_t*) (pgt->entries[idx_dir] & PAGE_MASK); + if (!pgt) { + spinlock_unlock(pgd_lock); + kputs("map_region: out of memory\n"); + return 0; + } + + /* convert physical address to virtual */ + // Currently, we allocate pages only in kernel space. + // => physical address of the page table is identical of the virtual address + //if (paging_enabled) + // pgt = (page_table_t*) ((KERNEL_SPACE - 1024*PAGE_SIZE + index*PAGE_SIZE) & PAGE_MASK); + + if (pgt->entries[idx_table] && !(flags & MAP_REMAP)) { + spinlock_unlock(pgd_lock); + kprintf("0x%x is already mapped\n", viraddr); + return 0; + } + + if (flags & MAP_USER_SPACE) + pgt->entries[idx_table] = USER_PAGE|(phyaddr & PAGE_MASK); + else + pgt->entries[idx_table] = KERN_PAGE|(phyaddr & PAGE_MASK); + + if (flags & MAP_NO_CACHE) + pgt->entries[idx_table] |= PG_PCD; + + if (flags & MAP_NO_ACCESS) + pgt->entries[idx_table] &= ~PG_PRESENT; + + if (flags & MAP_WT) + pgt->entries[idx_table] |= PG_PWT; + + if (flags & MAP_USER_SPACE) + atomic_int32_inc(&task->user_usage); + + tlb_flush_one_page(viraddr); + } + + spinlock_unlock(pgd_lock); + + return ret; +} + +int change_page_permissions(size_t start, size_t end, uint32_t flags) +{ +#if 0 + uint32_t index1, index2, newflags; + size_t viraddr = start & PAGE_MASK; + size_t phyaddr; + page_table_t* pgt; + page_dir_t* pgd; + task_t* task = per_core(current_task); + + if (BUILTIN_EXPECT(!paging_enabled, 0)) + return -EINVAL; + + pgd = per_core(current_task)->pgd; + if (BUILTIN_EXPECT(!pgd, 0)) + return -EINVAL; + + spinlock_lock(&task->pgd_lock); + + while (viraddr < end) + { + index1 = viraddr >> 22; + index2 = (viraddr >> 12) & 0x3FF; + + while ((viraddr < end) && (index2 < 1024)) { + pgt = (page_table_t*) (page_table_t*) ((KERNEL_SPACE - 1024*PAGE_SIZE + index1*PAGE_SIZE) & PAGE_MASK); + if (pgt && pgt->entries[index2]) { + phyaddr = pgt->entries[index2] & PAGE_MASK; + newflags = pgt->entries[index2] & 0xFFF; // get old flags + + if (!(newflags & PG_SVM_INIT)) { + if ((newflags & PG_SVM_STRONG) && !(newflags & PG_PRESENT) && (flags & (VMA_READ|VMA_WRITE) && !(flags & VMA_NOACCESS))) + newflags |= PG_PRESENT; + else if ((newflags & PG_SVM_STRONG) && (newflags & PG_PRESENT) && (flags & VMA_NOACCESS)) + newflags &= ~PG_PRESENT; + } + + // update flags + if (!(flags & VMA_WRITE)) { + newflags &= ~PG_RW; +#ifdef CONFIG_ROCKCREEK + if (newflags & (PG_SVM_STRONG|PG_SVM_LAZYRELEASE)) + newflags &= ~PG_MPE; +#endif + } else { + newflags |= PG_RW; +#ifdef CONFIG_ROCKCREEK + if (newflags & (PG_SVM_STRONG|PG_SVM_LAZYRELEASE)) + newflags |= PG_MPE; +#endif + } + + pgt->entries[index2] = (newflags & 0xFFF) | (phyaddr & PAGE_MASK); + + tlb_flush_one_page(viraddr); + } + + index2++; + viraddr += PAGE_SIZE; + } + } + + spinlock_unlock(&task->pgd_lock); +#endif + + return -EINVAL; +} + +/* + * Use the first fit algorithm to find a valid address range + * + * TODO: O(n) => bad performance, we need a better approach + */ +size_t vm_alloc(uint32_t npages, uint32_t flags) +{ + task_t* task = per_core(current_task); + spinlock_t* pgd_lock; + size_t viraddr, i, j, ret = 0; + size_t start, end; + page_table_t* pgt; + + if (BUILTIN_EXPECT(!task || !task->pgd || !paging_enabled, 0)) + return 0; + + if (flags & MAP_KERNEL_SPACE) { + pgd_lock = &kslock; + start = (((size_t) &kernel_end) + 10*PAGE_SIZE) & PAGE_MASK; + end = (KERNEL_SPACE - PAGE_SIZE) & PAGE_MASK; + } else { + pgd_lock = &task->pgd_lock; + start = KERNEL_SPACE & PAGE_MASK; + end = PAGE_MASK; + } + + if (BUILTIN_EXPECT(!npages, 0)) + return 0; + + spinlock_lock(pgd_lock); + + viraddr = i = start; + j = 0; + do { + uint16_t idx_pd4 = (viraddr >> 39) & 0x1FF; + uint16_t idx_dirp = (viraddr >> 30) & 0x1FF; + uint16_t idx_dir = (viraddr >> 21) & 0x1FF; + uint16_t idx_table = (viraddr >> 12) & 0x1FF; + + // Currently, we allocate pages only in kernel space. + // => physical address of the page table is identical of the virtual address + pgt = (page_table_t*) (task->pgd->entries[idx_pd4] & PAGE_MASK); + if (!pgt) { + i += (size_t)PGT_ENTRIES*PGT_ENTRIES*PGT_ENTRIES*PAGE_SIZE; + j += PGT_ENTRIES*PGT_ENTRIES*PGT_ENTRIES; + continue; + } + + pgt = (page_table_t*) (pgt->entries[idx_dirp] & PAGE_MASK); + if (!pgt) { + i += PGT_ENTRIES*PGT_ENTRIES*PAGE_SIZE; + j += PGT_ENTRIES*PGT_ENTRIES; + continue; + } + + pgt = (page_table_t*) (pgt->entries[idx_dir] & PAGE_MASK); + if (!pgt) { + i += PGT_ENTRIES*PAGE_SIZE; + j += PGT_ENTRIES; + continue; + } + + if (!(pgt->entries[idx_table])) { + i += PAGE_SIZE; + j++; + } else { + // restart search + j = 0; + viraddr = i + PAGE_SIZE; + i = i + PAGE_SIZE; + } + } while((j < npages) && (i<=end)); + + if ((j >= npages) && (viraddr < end)) + ret = viraddr; + + spinlock_unlock(pgd_lock); + + return ret; +} + +int unmap_region(size_t viraddr, uint32_t npages) +{ + task_t* task = per_core(current_task); + spinlock_t* pgd_lock; + page_table_t* pgt; + size_t i; + uint16_t idx_pd4, idx_dirp; + uint16_t idx_dir, idx_table; + + if (BUILTIN_EXPECT(!task || !task->pgd || !paging_enabled, 0)) + return -EINVAL; + + if (viraddr <= KERNEL_SPACE) + pgd_lock = &kslock; + else + pgd_lock = &task->pgd_lock; + + spinlock_lock(pgd_lock); + + i = 0; + while(i> 39) & 0x1FF; + idx_dirp = (viraddr >> 30) & 0x1FF; + idx_dir = (viraddr >> 21) & 0x1FF; + idx_table = (viraddr >> 12) & 0x1FF; + + // Currently, we allocate pages only in kernel space. + // => physical address of the page table is identical of the virtual address + pgt = (page_table_t*) (task->pgd->entries[idx_pd4] & PAGE_MASK); + if (!pgt) { + viraddr += (size_t) PGT_ENTRIES*PGT_ENTRIES*PGT_ENTRIES*PAGE_SIZE; + i += PGT_ENTRIES*PGT_ENTRIES*PGT_ENTRIES; + continue; + } + + pgt = (page_table_t*) (pgt->entries[idx_dirp] & PAGE_MASK); + if (!pgt) { + viraddr += PGT_ENTRIES*PGT_ENTRIES*PAGE_SIZE; + i += PGT_ENTRIES*PGT_ENTRIES; + continue; + } + + pgt = (page_table_t*) (pgt->entries[idx_dir] & PAGE_MASK); + if (!pgt) { + viraddr += PGT_ENTRIES*PAGE_SIZE; + i += PGT_ENTRIES; + continue; + } + + if (pgt->entries[idx_table]) + pgt->entries[idx_table] &= ~PG_PRESENT; + + viraddr +=PAGE_SIZE; + i++; + + if (viraddr > KERNEL_SPACE) + atomic_int32_dec(&task->user_usage); + + tlb_flush_one_page(viraddr); + } + + spinlock_unlock(pgd_lock); + + return 0; +} + +int vm_free(size_t viraddr, uint32_t npages) +{ + task_t* task = per_core(current_task); + spinlock_t* pgd_lock; + page_table_t* pgt; + size_t i; + uint16_t idx_pd4, idx_dirp; + uint16_t idx_dir, idx_table; + + if (BUILTIN_EXPECT(!task || !task->pgd || !paging_enabled, 0)) + return -EINVAL; + + if (viraddr <= KERNEL_SPACE) + pgd_lock = &kslock; + else + pgd_lock = &task->pgd_lock; + + spinlock_lock(pgd_lock); + + i = 0; + while(i> 39) & 0x1FF; + idx_dirp = (viraddr >> 30) & 0x1FF; + idx_dir = (viraddr >> 21) & 0x1FF; + idx_table = (viraddr >> 12) & 0x1FF; + + // Currently, we allocate pages only in kernel space. + // => physical address of the page table is identical of the virtual address + pgt = (page_table_t*) (task->pgd->entries[idx_pd4] & PAGE_MASK); + if (!pgt) { + viraddr += (size_t) PGT_ENTRIES*PGT_ENTRIES*PGT_ENTRIES*PAGE_SIZE; + i += PGT_ENTRIES*PGT_ENTRIES*PGT_ENTRIES; + continue; + } + + pgt = (page_table_t*) (pgt->entries[idx_dirp] & PAGE_MASK); + if (!pgt) { + viraddr += PGT_ENTRIES*PGT_ENTRIES*PAGE_SIZE; + i += PGT_ENTRIES*PGT_ENTRIES; + continue; + } + + pgt = (page_table_t*) (pgt->entries[idx_dir] & PAGE_MASK); + if (!pgt) { + viraddr += PGT_ENTRIES*PAGE_SIZE; + i += PGT_ENTRIES; + continue; + } + + if (pgt->entries[idx_table]) + pgt->entries[idx_table] = 0; + + viraddr +=PAGE_SIZE; + i++; + + tlb_flush_one_page(viraddr); + } + + spinlock_unlock(pgd_lock); + + return 0; +} + +static void pagefault_handler(struct state *s) +{ + task_t* task = per_core(current_task); + //page_dir_t* pgd = task->pgd; + //page_table_t* pgt = NULL; + size_t viraddr = read_cr2(); + //size_t phyaddr; + +#if 0 + if ((viraddr >= task->start_heap) && (viraddr <= task->end_heap) && (viraddr > KERNEL_SPACE)) { + viraddr = viraddr & PAGE_MASK; + + phyaddr = get_page(); + if (BUILTIN_EXPECT(!phyaddr, 0)) + goto default_handler; + + if (map_region(viraddr, phyaddr, 1, MAP_USER_SPACE) == viraddr) { + memset((void*) viraddr, 0x00, PAGE_SIZE); + return; + } + + kprintf("Could not map 0x%x at 0x%x\n", phyaddr, viraddr); + put_page(phyaddr); + } +#endif + +default_handler: + kprintf("PAGE FAULT: Task %u got page fault at %p (irq %llu, cs:rip 0x%llx:0x%llx)\n", task->id, viraddr, s->int_no, s->cs, s->rip); + kprintf("Register state: rax = 0x%llx, rbx = 0x%llx, rcx = 0x%llx, rdx = 0x%llx, rdi = 0x%llx, rsi = 0x%llx, rbp = 0x%llx, rsp = 0x%llx\n", + s->rax, s->rbx, s->rcx, s->rdx, s->rdi, s->rsi, s->rbp, s->rsp); + +while(1); + irq_enable(); + abort(); +} + +int arch_paging_init(void) +{ + uint32_t i, npages; + + // uninstall default handler and install our own + irq_uninstall_handler(14); + irq_install_handler(14, pagefault_handler); + + // kernel is already maped into the kernel space (see entry64.asm) + // this includes .data, .bss, .text, video memory and the multiboot structure + +#if MAX_CORES > 1 + // Reserve page for smp boot code + if (!map_region(SMP_SETUP_ADDR, SMP_SETUP_ADDR, 1, MAP_KERNEL_SPACE|MAP_NO_CACHE)) { + kputs("could not reserve page for smp boot code\n"); + return -ENOMEM; + } +#endif + +#ifdef CONFIG_MULTIBOOT +#if 0 + /* + * Map reserved memory regions into the kernel space + */ + if (mb_info && (mb_info->flags & MULTIBOOT_INFO_MEM_MAP)) { + multiboot_memory_map_t* mmap = (multiboot_memory_map_t*) mb_info->mmap_addr; + multiboot_memory_map_t* mmap_end = (void*) ((size_t) mb_info->mmap_addr + mb_info->mmap_length); + + while (mmap < mmap_end) { + if (mmap->type != MULTIBOOT_MEMORY_AVAILABLE) { + npages = mmap->len / PAGE_SIZE; + if ((mmap->addr+mmap->len) % PAGE_SIZE) + npages++; + map_region(mmap->addr, mmap->addr, npages, MAP_KERNEL_SPACE|MAP_NO_CACHE); + } + mmap++; + } + } +#endif + + /* + * Modules like the init ram disk are already loaded. + * Therefore, we map these moduels into the kernel space. + */ + if (mb_info && (mb_info->flags & MULTIBOOT_INFO_MODS)) { + multiboot_module_t* mmodule = (multiboot_module_t*) ((size_t) mb_info->mods_addr); + + npages = mb_info->mods_count * sizeof(multiboot_module_t) >> PAGE_SHIFT; + if (mb_info->mods_count * sizeof(multiboot_module_t) & (PAGE_SIZE-1)) + npages++; + map_region((size_t) (mb_info->mods_addr), (size_t) (mb_info->mods_addr), npages, MAP_REMAP|MAP_KERNEL_SPACE); + + for(i=0; imods_count; i++, mmodule++) { + // map physical address to the same virtual address + npages = (mmodule->mod_end - mmodule->mod_start) >> PAGE_SHIFT; + if (mmodule->mod_end & (PAGE_SIZE-1)) + npages++; + kprintf("Map module %s at 0x%x (%u pages)\n", (char*) mmodule->cmdline, mmodule->mod_start, npages); + map_region((size_t) (mmodule->mod_start), (size_t) (mmodule->mod_start), npages, MAP_REMAP|MAP_KERNEL_SPACE); + } + } +#endif + + /* signalize that we are able to use paging */ + paging_enabled = 1; + + /* + * we turned on paging + * => now, we are able to register our task + */ + register_task(); + + // APIC registers into the kernel address space + map_apic(); + + return 0; +} diff --git a/arch/x86/mm/svm.c b/arch/x86/mm/svm.c index 6a0449b2..68b62fbb 100644 --- a/arch/x86/mm/svm.c +++ b/arch/x86/mm/svm.c @@ -26,6 +26,7 @@ #include #include #include +#include #ifdef CONFIG_ROCKCREEK #include #include @@ -34,11 +35,60 @@ #include #include -#define SHARED_PAGES (RCCE_SHM_SIZE_MAX >> PAGE_SHIFT) +#define USE_PERFCOUNTERS 1 +#define USE_RESP_MAIL 1 + +#define SHARED_PAGES (4*(RCCE_SHM_SIZE_MAX >> PAGE_SHIFT)) #define OWNER_SIZE ((SHARED_PAGES * sizeof(uint8_t) + PAGE_SIZE - 1) & ~(PAGE_SIZE - 1)) +#define AIREG1 0 +#define AIREG2 (AIREG1 + 1) +#define LOCK_ID 0 + +#define ABS(a) (((a) < 0) ? -(a) : (a)) + t_vcharp RC_SHM_BUFFER_START(); +typedef struct { + int counter; + int initializer; +} atomic_increg_t; + +static volatile atomic_increg_t *incregs = NULL; +static RCCE_FLAG release; + +/* + * Details on L2 cache (nedded for flushing) + */ + +#define OWN_MPB 0xd8000000 + +#define L2_LINESIZE 32UL +#define L2_WAYS 4UL +#define L2_CAPACITY (256*1024UL) +#define L2_WBSTRIDE (L2_CAPACITY/L2_WAYS) + +#ifdef SVM_WB +/* Helper function to read data into all 4 ways of L2 cache */ + +__attribute__((always_inline)) static inline void svm_purge_set(const size_t set) +{ + register char tmp; + + /* Translate the set to a kernel space virtual address */ + const volatile char* dummyData = (volatile char*)set; + + /* Now read new data into all four ways, and then reread the first */ + tmp = *dummyData; + tmp = *(dummyData + L2_WBSTRIDE); + tmp = *(dummyData + L2_WBSTRIDE * 2); + tmp = *(dummyData + L2_WBSTRIDE * 3); +} + +static size_t dummy_base = OWN_MPB + L2_CAPACITY; +static size_t dummy_offset = 0; +#endif + /* * This array describes the owner of a specific page. * Only the owner of a page is able to change the possession. @@ -48,61 +98,198 @@ static volatile uint8_t* page_owner = NULL; // helper array to convert a physical to a virtual address static size_t phys2virt[SHARED_PAGES] = {[0 ... SHARED_PAGES-1] = 0}; -static size_t shmbegin = 0; +static const size_t shmbegin = SHM_ADDR; static uint32_t emit[RCCE_MAXNP] = {[0 ... RCCE_MAXNP-1] = 0}; static uint32_t request[RCCE_MAXNP] = {[0 ... RCCE_MAXNP-1] = 0}; static uint32_t forward[RCCE_MAXNP] = {[0 ... RCCE_MAXNP-1] = 0}; +static uint32_t alloc_page = 0; +static uint32_t map_page = 0; +#if USE_PERFCOUNTERS +static uint64_t alloc_ticks = 0; +static uint64_t request_ticks = 0; +static uint64_t emit_ticks = 0; +static uint64_t wait_ticks = 0; +static uint64_t max_wait = 0; +static uint64_t min_wait = (uint64_t) -1; +#endif int svm_init(void) { - size_t phyaddr; - uint32_t flags; - - // iRCCE is not thread save => disable interrupts - flags = irq_nested_disable(); - shmbegin = (size_t)RC_SHM_BUFFER_START(); - phyaddr = (size_t) RCCE_shmalloc(OWNER_SIZE); - irq_nested_enable(flags); - - if (BUILTIN_EXPECT(!phyaddr, 0)) - return -ENOMEM; - if (BUILTIN_EXPECT(phyaddr & 0xFFF, 0)) { - kprintf("RCCE_shmalloc returns not a page aligned physiacl address: 0x%x\n", phyaddr); - return -ENOMEM; - } + uint32_t i, flags; kprintf("Shared memory starts at the physical address 0x%x\n", shmbegin); - - page_owner = (uint8_t*) map_region(0, phyaddr, OWNER_SIZE >> PAGE_SHIFT, MAP_KERNEL_SPACE|MAP_NO_CACHE); - if (BUILTIN_EXPECT(!page_owner, 0)) { - flags = irq_nested_disable(); - RCCE_shfree((t_vcharp) phyaddr); - irq_nested_enable(flags); + + page_owner = (uint8_t*) map_region(0, shmbegin, OWNER_SIZE >> PAGE_SHIFT, MAP_KERNEL_SPACE|MAP_NO_CACHE); + if (BUILTIN_EXPECT(!page_owner, 0)) return -ENOMEM; + + if (!RCCE_IAM) { + memset((void*)page_owner, 0xFF, OWNER_SIZE); + + // owner vector is owned by core 0 + for(i=0; i<(OWNER_SIZE >> PAGE_SHIFT); i++) + page_owner[i] = 0; } - // per default is core 0 owner - if (!RCCE_IAM) - memset((void*)page_owner, 0x00, OWNER_SIZE); + // initialize svm barrier + incregs = (volatile atomic_increg_t*) map_region(0, 0xF900E000, 2, MAP_KERNEL_SPACE|MAP_NO_CACHE); + if (BUILTIN_EXPECT(!incregs, 0)) + return -ENOMEM; + kprintf("Map atomic counters at 0x%x\n", incregs); + + if (!RCCE_IAM) { + incregs[AIREG1].initializer = 0; + incregs[AIREG2].initializer = 0; + } // iRCCE is not thread save => disable interrupts flags = irq_nested_disable(); - RCCE_barrier(&RCCE_COMM_WORLD); + RCCE_flag_alloc(&release); irq_nested_enable(flags); - + + RCCE_barrier(&RCCE_COMM_WORLD); + return 0; } +static size_t get_shpages(uint32_t n) +{ + int x = X_PID(RC_MY_COREID); + int y = Y_PID(RC_MY_COREID); + size_t i, j = 0, k = 0, start = SHM_X0_Y0; + int diff, min = x + y; + + diff = ABS(5 - x) + ABS(0 - y); + if (diff < min) { + min = diff; + start = SHM_X5_Y0; + } + + diff = ABS(0 - x) + ABS(2 - y); + if (diff < min) { + min = diff; + start = SHM_X0_Y2; + } + + diff = ABS(5 - x) + ABS(2 - y); + if (diff < min) { + min = diff; + start = SHM_X5_Y2; + } + + for(i=0; (i < SHARED_PAGES) && (k < n); i++) { + k = 0; + j = (((start - shmbegin) >> PAGE_SHIFT) + i) % SHARED_PAGES; + + while((k= RCCE_MAXNP)) { + k++; i++; + } + } + + if (BUILTIN_EXPECT(i >= SHARED_PAGES, 0)) + return 0; + + memset((void*) (page_owner+j), RCCE_IAM, sizeof(uint8_t)*n); + + return shmbegin + (j << PAGE_SHIFT); +} + +size_t shmalloc(uint32_t n) +{ + size_t ret; + + RCCE_acquire_lock(RC_COREID[LOCK_ID]); + ret = get_shpages(n); + RCCE_release_lock(RC_COREID[LOCK_ID]); + + return ret; +} + +/* + * This function is called by the pagefault handler + * => the interrupt flags is already cleared + */ +int svm_alloc_page(size_t addr, page_table_t* pgt) +{ +#if USE_PERFCOUNTERS + uint64_t start = rdtsc(); +#endif + uint32_t index2 = (addr >> 12) & 0x3FF; + size_t phyaddr; + t_vcharp mpb = (t_vcharp) ((size_t)(virt_to_phys(addr) >> PAGE_SHIFT) | ((size_t) RCCE_comm_buffer[RCCE_IAM] - RCCE_LINE_SIZE)); + uint16_t offset = 0xFFFF; + + addr &= PAGE_MASK; // align address to the page boundary + + RCCE_acquire_lock(RC_COREID[LOCK_ID]); + + iRCCE_get((t_vcharp) &offset, mpb, sizeof(uint16_t), RCCE_IAM); + + if (!offset) { + int i; + + phyaddr = get_shpages(1); + offset = (uint16_t) ((phyaddr - shmbegin) >> PAGE_SHIFT); + for(i=0; ientries[index2] &= 0xFFF; + pgt->entries[index2] &= ~PG_SVM_INIT; + pgt->entries[index2] |= phyaddr|PG_PRESENT; + phys2virt[(phyaddr - shmbegin) >> PAGE_SHIFT] = addr; + tlb_flush_one_page(addr); + + alloc_page++; + //kprintf("map new page frame 0x%x at 0x%x, flags0x%x, offset 0x%x, mpb 0x%x\n", phyaddr, addr, pgt->entries[index2] & 0xFFF, (int) offset, mpb); + +#if USE_PERFCOUNTERS + alloc_ticks += rdtsc() - start; +#endif + return 0; + } else { + RCCE_release_lock(RC_COREID[LOCK_ID]); + + phyaddr = shmbegin + ((size_t)offset << PAGE_SHIFT); + pgt->entries[index2] &= 0xFFF; + pgt->entries[index2] &= ~PG_SVM_INIT; + if (pgt->entries[index2] & PG_SVM_LAZYRELEASE) + pgt->entries[index2] |= phyaddr|PG_PRESENT; + else + pgt->entries[index2] |= phyaddr; + phys2virt[(phyaddr - shmbegin) >> PAGE_SHIFT] = addr; + tlb_flush_one_page(addr); + + map_page++; + //kprintf("map existing page frame 0x%x at 0x%x, offset 0x%x, mpb 0x%x\n", phyaddr, addr, offset, mpb); + +#if USE_PERFCOUNTERS + alloc_ticks += rdtsc() - start; +#endif + if (pgt->entries[index2] & PG_SVM_LAZYRELEASE) + return 0; + if (pgt->entries[index2] & PG_RW) + return svm_access_request(addr); + + return 0; + } +} + /* * This function is called by the pagefault handler * => the interrupt flags is already cleared */ int svm_access_request(size_t addr) { +#if USE_PERFCOUNTERS + uint64_t start = rdtsc(); +#endif size_t phyaddr = virt_to_phys(addr); uint32_t pageid; int remote_rank; uint8_t payload[iRCCE_MAIL_HEADER_PAYLOAD]; + int ret; if (phyaddr < shmbegin) return -EINVAL; @@ -110,40 +297,74 @@ int svm_access_request(size_t addr) return -EINVAL; pageid = (phyaddr-shmbegin) >> PAGE_SHIFT; - if (page_owner[pageid] == RCCE_IAM) + remote_rank = page_owner[pageid]; + if (remote_rank == RCCE_IAM) return 0; - remote_rank = page_owner[pageid]; ((size_t*) payload)[0] = RCCE_IAM; ((size_t*) payload)[1] = phyaddr; + //kprintf("send request (0x%x) to %d\n", addr, remote_rank); /* send ping request */ - iRCCE_mail_send(2*sizeof(size_t), SVM_REQUEST, 0, payload, remote_rank); - + iRCCE_mail_send(2*sizeof(size_t), SVM_REQ, 0, (char*) payload, remote_rank); + NOP4; + icc_send_gic_irq(remote_rank); request[remote_rank]++; - icc_send_gic_irq(remote_rank); +#if USE_RESP_MAIL +#if USE_PERFCOUNTERS + uint64_t wait_start = rdtsc(); +#endif + // wait for response + icc_wait(SVM_RESP); +#if USE_PERFCOUNTERS + uint64_t res = rdtsc() - wait_start; + wait_ticks += res; + if (min_wait > res) + min_wait = res; + if (max_wait < res) + max_wait = res; +#endif + +#else + NOP8; while (page_owner[pageid] != RCCE_IAM) { icc_mail_check(); NOP8; } +#endif - return change_page_permissions(addr, addr+PAGE_SIZE, VMA_READ|VMA_WRITE|VMA_CACHEABLE); + addr &= PAGE_MASK; // align address to page boundary + ret = change_page_permissions(addr, addr + PAGE_SIZE, VMA_READ|VMA_WRITE|VMA_CACHEABLE); + +#if USE_PERFCOUNTERS + request_ticks += rdtsc() - start; +#endif + + return ret; } -static atomic_int32_t size_counter = ATOMIC_INIT(0); +//static atomic_int32_t size_counter = ATOMIC_INIT(0); -void* svmmalloc(size_t size, uint32_t consistency) +void* svm_malloc(size_t size, uint32_t consistency) { - size_t phyaddr, viraddr, i; + size_t viraddr, phyaddr, i, j; + t_vcharp mpb_addr; uint32_t flags; - uint32_t map_flags = MAP_KERNEL_SPACE|MAP_MPE; + task_t* task = per_core(current_task); + uint32_t map_flags = MAP_KERNEL_SPACE|MAP_SVM_INIT; + uint8_t buffer[RCCE_LINE_SIZE]= {[0 ... RCCE_LINE_SIZE-1] = 0}; + + if(!(consistency & SVM_L2)) + map_flags |= MAP_MPE; + else + task->flags |= TASK_L2; if (consistency & SVM_STRONG) map_flags |= MAP_SVM_STRONG; else if (consistency & SVM_LAZYRELEASE) map_flags |= MAP_SVM_LAZYRELEASE; - else return 0; + else return NULL; // currently, we allocate memory in page size granulation size = (size + PAGE_SIZE - 1) & ~(PAGE_SIZE - 1); @@ -197,7 +418,8 @@ void* svmmalloc(size_t size, uint32_t consistency) kprintf("shmmalloc returns 0x%x\n", viraddr); return (void*) viraddr; -#else +#endif +#if 0 // iRCCE is not thread save => disable interrupts flags = irq_nested_disable(); phyaddr = (size_t) RCCE_shmalloc(size); @@ -209,7 +431,7 @@ void* svmmalloc(size_t size, uint32_t consistency) if (BUILTIN_EXPECT(!phyaddr, 0)) return NULL; if (BUILTIN_EXPECT(phyaddr & 0xFFF, 0)) { - kprintf("RCCE_shmalloc returns not a page aligned physiacl address: 0x%x\n", phyaddr); + kprintf("RCCE_shmalloc returns not a page aligned physical address: 0x%x\n", phyaddr); return NULL; } @@ -221,16 +443,58 @@ void* svmmalloc(size_t size, uint32_t consistency) return (void*) viraddr; #endif + + map_flags |= MAP_NO_ACCESS; +#ifndef SVM_WB + map_flags |= MAP_MPE; +#endif + + viraddr = map_region(0, 0, size >> PAGE_SHIFT, map_flags); + kprintf("svmmalloc: viraddr 0x%x, size 0x%x, flags 0x%x\n", viraddr, size, map_flags); + + map_flags |= MAP_REMAP; + for(i=0, j=0, mpb_addr=0; i Interrupt flag is alread cleared. */ int svm_emit_page(size_t phyaddr, int ue) -{ +{ +#if USE_PERFCOUNTERS + uint64_t start = rdtsc(); +#endif uint32_t pageid; + int remote_rank; //kprintf("Try to emit page 0x%x to %d\n", phyaddr, ue); @@ -264,19 +533,18 @@ int svm_emit_page(size_t phyaddr, int ue) return -EINVAL; pageid = (phyaddr-shmbegin) >> PAGE_SHIFT; - if (page_owner[pageid] != RCCE_IAM) { + remote_rank = page_owner[pageid]; + if (remote_rank != RCCE_IAM) { // Core is nor owner => forward request to new owner - int remote_rank; uint8_t payload[iRCCE_MAIL_HEADER_PAYLOAD]; kprintf("Ups, core %d is not owner of page 0x%x\n", RCCE_IAM, phyaddr); - remote_rank = page_owner[pageid]; ((size_t*) payload)[0] = ue; ((size_t*) payload)[1] = phyaddr; /* send ping request */ - iRCCE_mail_send(2*sizeof(size_t), SVM_REQUEST, 0, payload, remote_rank); + iRCCE_mail_send(2*sizeof(size_t), SVM_REQ, 0, (char*)payload, remote_rank); /* send interrupt */ icc_send_gic_irq(remote_rank); @@ -284,18 +552,28 @@ int svm_emit_page(size_t phyaddr, int ue) } else { size_t viraddr; - svm_flush(); - page_owner[pageid] = ue; + svm_flush(phyaddr); + +#if USE_RESP_MAIL + // send response back to ue + // ue is polling for the response => no irq is needed + iRCCE_mail_send(0, SVM_RESP, 0, NULL, ue); +#endif emit[ue]++; - viraddr = phys2virt[(phyaddr - shmbegin) >> PAGE_SHIFT]; + page_owner[pageid] = ue; + viraddr = phys2virt[pageid]; change_page_permissions(viraddr, viraddr+PAGE_SIZE, VMA_NOACCESS|VMA_READ|VMA_CACHEABLE); } +#if USE_PERFCOUNTERS + emit_ticks += rdtsc() - start; +#endif + return 0; } -#ifdef SVM_WB +#if 0 void svm_flush(void) { int z, tmp; @@ -319,6 +597,141 @@ void svm_flush(void) } #endif +/* + * Function to flush one page or entire cache. + */ + +#ifdef SVM_WB + +void svm_invalidate(void) +{ + task_t* task = per_core(current_task); + + if(task->flags & TASK_L2) { + asm volatile ( ".byte 0x0f; .byte 0x0a;\n" ); // CL1FLUSHMB + } else { + /* no action needed svm_flush already invalidates cache */ + return; + } + +} + +void svm_flush(size_t phyaddr) +{ + task_t* task = per_core(current_task); + page_dir_t* pgd = task->pgd; + page_table_t* pgt = NULL; + + size_t step = 0; + size_t stride = L2_LINESIZE; + size_t range = L2_WBSTRIDE; + + size_t viraddr; + + uint32_t index1, index2; + + uint32_t flags; + + /* flush entire Cache if phyaddr == 0 */ + if(!phyaddr) { + if( task->flags & TASK_L2 ){ + goto flush_l2; + } else { + goto flush_l1; + } + /* flush one page */ + } else { + /* align the address to page boundaries */ + phyaddr &= ~(PAGE_SIZE-1); + + /* lookup pgt to check if L2 is enabled */ + viraddr = phys2virt[(phyaddr - shmbegin) >> PAGE_SHIFT]; + index1 = viraddr >> 22; + index2 = (viraddr >> 12) & 0x3FF; + + /* check if pgt is present */ + if (!pgd || !(pgd->entries[index1] & PAGE_MASK)) + goto wrong_addr; + + pgt = (page_table_t*)((KERNEL_SPACE - 1024 * PAGE_SIZE + index1 * PAGE_SIZE) & PAGE_MASK); + + if( pgt->entries[index2] & PG_MPE ) { + goto flush_l1; + } else { + phyaddr = phyaddr % L2_WBSTRIDE; + range = PAGE_SIZE; + goto flush_l2; + } + } + + /* + * FLUSH L1 CACHE: + */ + +flush_l1: + kputs("flush L1\n"); + *(int *)RCCE_fool_write_combine_buffer = 1; + //__asm__ volatile ( "wbinvd;\n\t" ); + flush_cache(); + return; + +flush_l2: + /* + * FLUSH L2 CACHE: + * disable iterrupts due to pseudo LRU behavior of L2 cache + */ + flags = irq_nested_disable(); + /* toggle between dummy areas */ + phyaddr += dummy_base + dummy_offset; + kprintf("flush-l2: phyaddr 0x%x\n", phyaddr); + if(dummy_offset) + dummy_offset = 0; + else + dummy_offset = L2_CAPACITY; + + flush_cache(); + for( step = 0; step < range; step += stride ) + svm_purge_set( phyaddr + step ); + irq_nested_enable(flags); + return; + +wrong_addr: + kputs("svm flush error: address not valid!\n"); + return; + +} +#endif + +int svm_barrier(uint32_t flags) +{ + int i; + RCCE_COMM *comm = &RCCE_COMM_WORLD; + static int index = 0; + + if (flags & SVM_LAZYRELEASE) { + svm_flush(0); + svm_invalidate(); + } + +#if 1 + // Lubachevsky barrier with flags + index = !index; + if (incregs[AIREG1].counter > (comm->size - 2)) { + incregs[AIREG1].initializer = 0; + while(incregs[AIREG1].initializer); + for (i = 0; i < comm->size; i++) + RCCE_flag_write(&release, index, comm->member[i]); + } else RCCE_wait_until(release, index); +#else + RCCE_barrier(&RCCE_COMM_WORLD); +#endif + + return 0; +} + +//extern uint64_t check_ticks; +//extern uint64_t recv_ticks; + int svm_statistics(void) { uint32_t i; @@ -333,6 +746,18 @@ int svm_statistics(void) for(i=0; i #define MIN(x,y) ( (x) < (y) ? (x) : (y) ) #define MAX(x,y) ( (x) > (y) ? (x) : (y) ) -#include -#include +#include +#include + +#ifdef CONFIG_ROCKCREEK //-------------------------------------------------------------------------------------- // FUNCTION: RCCE_reduce_general @@ -185,3 +187,4 @@ int RCCE_reduce( RCCE_reduce_general(inbuf, outbuf, num, type, op, root, all, comm))); } +#endif diff --git a/arch/x86/scc/iRCCE_mailbox.c b/arch/x86/scc/iRCCE_mailbox.c index 1ca67c1f..36553505 100644 --- a/arch/x86/scc/iRCCE_mailbox.c +++ b/arch/x86/scc/iRCCE_mailbox.c @@ -104,8 +104,7 @@ static int iRCCE_mail_fetch( if( iRCCE_mail_garbage.first == NULL ) { iRCCE_mail_garbage.last = NULL; } - } - else { + } else { header = (iRCCE_MAIL_HEADER*)kmalloc(sizeof(iRCCE_MAIL_HEADER)); } @@ -120,8 +119,7 @@ static int iRCCE_mail_fetch( iRCCE_last_mail[rank] = 1; iRCCE_mailbox_close_one( rank, 0 ); // we can close respective mailbox iRCCE_mail_release( &header ); - } - else { + } else { // check mail priority int prio = header->prio; @@ -144,77 +142,63 @@ static int iRCCE_mail_fetch( } -//------------------------------------------------------------------------------ -// FUNCTION: iRCCE_mailbox_check -//------------------------------------------------------------------------------ -/** - * @brief routine to check for new mail in mailboxes - * - * This function has to be called from time to time. It empties all mailboxes of - * the participating cores if the corresponding sent-flag is set and the mailbox - * is not closed. After calling iRCCE_mail_fetch the sent-flag has to be reset. - * Here we have to use a little trick because we can only write to the MPB in - * cacheline granularity. We set the appropriate flag to zero and afterwords - * touch the MPB on another cacheline. That causes the write combine buffer to - * write out the data. - */ -//------------------------------------------------------------------------------ -static iRCCE_MAIL_HEADER dummy_header = {0, 0, 0, NULL, 0, 0, 0, {[0 ... iRCCE_MAIL_HEADER_PAYLOAD-1] = 0} }; - - -static int iRCCE_mailbox_check(void) { - int i,j; - uint32_t flags; - - /* disable interrupts */ - flags = irq_nested_disable(); - - for( j=1; jsent ) { - iRCCE_mail_fetch(i); - - // reset senders flag - RC_cache_invalidate(); - *(iRCCE_mailbox_recv[i]) = dummy_header; - } - } - } - - /* enable interrupts */ - irq_nested_enable(flags); - return iRCCE_SUCCESS; -} - //------------------------------------------------------------------------------ // FUNCTION: iRCCE_mail_check //------------------------------------------------------------------------------ /** - * @brief routine to check one specific mailbox - * @param sender is the core ID from which the mailbox is checked + * @brief routine to check one specific mailbox or all + * @param sender is the core ID from which the mailbox is checked use + * iRCCE_MAILBOX_ALL as wildcard to check all mailboxes * * This function may be called by the user application to check one specific - * mailbox. It is recommended to use it in combination with an inter core - * interrupt. + * mailbox. It is recommended to use it in combination with an inter-core + * interrupt. It empties one or all mailboxes of the participating cores if the + * corresponding sent-flag is set and the mailbox is not closed. After calling + * iRCCE_mail_fetch the sent-flag has to be reset. Here we have to use a little + * trick because we can only write to the MPB in cacheline granularity. We set + * the appropriate flag to zero and afterwords touch the MPB on another + * cacheline. That causes the write combine buffer to write out the data. * */ //------------------------------------------------------------------------------ +const static iRCCE_MAIL_HEADER dummy_header = + {0, 0, 0, NULL, 0, 0, 0, {[0 ... iRCCE_MAIL_HEADER_PAYLOAD-1] = 0} }; + int iRCCE_mail_check(int sender) { uint32_t flags; + int j, i; + int found = 0; // check all mailboxes in case of wildcard if( sender == iRCCE_MAILBOX_ALL ) { - iRCCE_mailbox_check(); + /* disable interrupts */ + flags = irq_nested_disable(); - return iRCCE_SUCCESS; + for( j=1; jsent ) { + if( !found ) found = 1; + + iRCCE_mail_fetch(i); + // reset senders flag + RC_cache_invalidate(); + *(iRCCE_mailbox_recv[i]) = dummy_header; + } + } + } + + /* enable interrupts */ + irq_nested_enable(flags); + + return (found == 1)? iRCCE_SUCCESS : iRCCE_MAILBOX_EMPTY; } // verify sender's ID - if( (sender < 0) || (sender > RCCE_NP) || (sender == RCCE_IAM) ) { + if(BUILTIN_EXPECT((sender < 0) || (sender > RCCE_NP) || (sender == RCCE_IAM), 0)) { return iRCCE_ERROR_SOURCE; } @@ -223,26 +207,28 @@ int iRCCE_mail_check(int sender) { return iRCCE_MAILBOX_CLOSED; } - RC_cache_invalidate(); - if( iRCCE_mailbox_recv[sender]->sent ) { - /* disable interrupts */ - flags = irq_nested_disable(); - - iRCCE_mail_fetch(sender); - - // reset senders flag + for(i=0; i<5; i++) { RC_cache_invalidate(); - *(iRCCE_mailbox_recv[sender]) = dummy_header; + if( iRCCE_mailbox_recv[sender]->sent ) { + /* disable interrupts */ + flags = irq_nested_disable(); + + iRCCE_mail_fetch(sender); + + // reset senders flag + RC_cache_invalidate(); + *(iRCCE_mailbox_recv[sender]) = dummy_header; - /* enable interrupts */ - irq_nested_enable(flags); + /* enable interrupts */ + irq_nested_enable(flags); - return iRCCE_SUCCESS; - } - else { - return iRCCE_MAILBOX_EMPTY; + return iRCCE_SUCCESS; + } + + NOP8; } + return iRCCE_MAILBOX_EMPTY; } //------------------------------------------------------------------------------ @@ -256,25 +242,28 @@ int iRCCE_mail_check(int sender) { * * The function checks if the receive queue with highest priority (priority 0) * contains any mail headers. In this case we pop the first element of that list - * in a FIFO maner. Otherwise iRCCE_mailbox_check() has to be called. Afterwards - * the first element of a non-empty receive queue with highest priority is - * returned. + * in a FIFO maner. Afterwards the first element of a non-empty receive queue + * with highest priority is returned. */ //------------------------------------------------------------------------------ int iRCCE_mail_recv( iRCCE_MAIL_HEADER** header // pointer to incoming header ) { // (memory allocated by iRCCE) - int i; + int i, found = 0; uint32_t flags; iRCCE_MAIL_HEADER* help_header; + + // if no mail queued pointer must be ZERO + *header = NULL; + + /* disable interrupts */ + flags = irq_nested_disable(); // check priority queues for( i=0; inext = NULL; - *header = help_header; - - /* enable interrupts */ - irq_nested_enable(flags); - - return iRCCE_SUCCESS; - + found = 1; + break; } + + } - // no mail queued - *header = NULL; - - return iRCCE_MAILBOX_EMPTY; + /* enable interrupts */ + irq_nested_enable(flags); + + return (found == 1)? iRCCE_SUCCESS : iRCCE_MAILBOX_EMPTY; } @@ -385,27 +372,35 @@ int iRCCE_mail_send( uint32_t flags; // verify sender's ID - if( (dest < 0) || (dest > RCCE_NP) || (dest == RCCE_IAM) ) { + if(BUILTIN_EXPECT((dest < 0) || (dest > RCCE_NP) || (dest == RCCE_IAM),0)) { return iRCCE_ERROR_TARGET; } // if dest mailbox is full, check for incoming mail RC_cache_invalidate(); while( iRCCE_mailbox_send[dest]->sent ) { - iRCCE_mailbox_check(); + // iRCCE_mail_check(iRCCE_MAILBOX_ALL); RC_cache_invalidate(); + NOP8; + NOP8; + NOP8; } + + /* disable interrupts */ + flags = irq_nested_disable(); + // check if mailbox is closed RCCE_acquire_lock( dest ); RC_cache_invalidate(); if( iRCCE_mailbox_send[dest]->closed ) { RCCE_release_lock( dest ); + + /* enable interrupts */ + irq_nested_enable(flags); + return iRCCE_MAILBOX_CLOSED; } - /* disable interrupts */ -// flags = irq_nested_disable(); - // prepare header iRCCE_MAIL_HEADER header = { RCCE_IAM, size, tag, NULL, prio, RCCE_FLAG_UNSET, RCCE_FLAG_UNSET, @@ -427,11 +422,11 @@ int iRCCE_mail_send( *(int *)RCCE_fool_write_combine_buffer = 1; RC_cache_invalidate(); - /* enable interrupts */ -// irq_nested_enable(flags); - RCCE_release_lock( dest ); + /* enable interrupts */ + irq_nested_enable(flags); + return iRCCE_SUCCESS; } @@ -476,7 +471,7 @@ int iRCCE_last_mail_recv(void) { //------------------------------------------------------------------------------ int iRCCE_mailbox_wait(void) { while( iRCCE_last_mail_recv() == iRCCE_LAST_MAILS_NOT_RECV ) { - iRCCE_mailbox_check(); + iRCCE_mail_check(iRCCE_MAILBOX_ALL); } return iRCCE_SUCCESS; @@ -533,7 +528,7 @@ int iRCCE_mailbox_flush(void) { * last-mail. * * This function closes a mailbox of the given rank. If the check flag is set - * an iRCCE_mailbox_check()-call is performed. The close procedure has to be + * an iRCCE_mail_check()-call is performed. The close procedure has to be * locked to be sure that no UE sends any mail while closing the mailbox. */ //------------------------------------------------------------------------------ diff --git a/arch/x86/scc/icc.c b/arch/x86/scc/icc.c index 97796658..f3ae7f41 100644 --- a/arch/x86/scc/icc.c +++ b/arch/x86/scc/icc.c @@ -26,6 +26,7 @@ #include #include #include +#include #define IRQ_STATUS 0xD000 #define IRQ_MASK 0xD200 @@ -35,7 +36,6 @@ #include - bootinfo_t* bootinfo = (bootinfo_t*) SCC_BOOTINFO; /* PSE bit for Pentium+ equals MPE (message buffer enable) flag in RCK! So, use it to create _PAGE_MPB symbol... */ @@ -97,9 +97,11 @@ static inline void icc_mail_check_tag(iRCCE_MAIL_HEADER* mail) { case PING_REQ: iRCCE_mail_send(0, PING_RESP, 0, NULL, mail->source); break; - case SVM_REQUEST: + case SVM_REQ: svm_emit_page(((size_t*) mail->payload)[1], ((size_t*) mail->payload)[0]); break; + case SVM_RESP: + break; case NOISE: // kprintf( "XXX " ); default: @@ -162,9 +164,6 @@ static void icc_handler(struct state *s) while( iRCCE_mail_recv(&header) == iRCCE_SUCCESS ) { icc_mail_check_tag(header); iRCCE_mail_release(&header); - NOP8; - NOP8; - NOP8; } } @@ -297,9 +296,9 @@ int icc_halt(void) return 0; } -#define ROUNDS 1000 -#define CORE_A 0 // sender -#define CORE_B 1 // receiver +#define ROUNDS 20000 +#define CORE_A RC_RCCEID[0] // sender +#define CORE_B RC_RCCEID[30] // receiver int icc_send_gic_irq(int core_num) { volatile uint32_t* irq_request = (volatile uint32_t*)(FPGA_BASE+IRQ_REQUEST+RC_MY_COREID*8); @@ -347,7 +346,7 @@ int icc_mail_ping(void) /* wait for response */ do { - res = iRCCE_mail_check(CORE_B); + res = iRCCE_mail_check(iRCCE_MAILBOX_ALL); //CORE_B); } while( res != iRCCE_SUCCESS ); /* release mail */ @@ -358,7 +357,7 @@ int icc_mail_ping(void) else { /* wait for request */ do { - res = iRCCE_mail_check(CORE_A); + res = iRCCE_mail_check(iRCCE_MAILBOX_ALL); //CORE_A); } while( res != iRCCE_SUCCESS ); /* check mail */ @@ -434,21 +433,92 @@ int icc_mail_ping_irq(void) kprintf( "timer = %d\n", timer ); kprintf( "mail_pingpong needs in average %d nsec (%d ticks)!\n", timer*1000/(2*ROUNDS*get_cpu_frequency()), timer/(2*ROUNDS) ); - irq_nested_enable(flags); return 0; } -#define _iRQ_NOISE_ 0 +int icc_mail_ping_jitter(void) +{ + kprintf( "Hello from jitter_test ... \n" ); + /* return if not core A */ + if( RCCE_IAM != CORE_A ) return 0; + + uint32_t flags; + uint64_t timer = 0; + uint64_t max = 0; + uint64_t min = ULONG_MAX; + uint64_t sum = 0; + + int i; + int res; + iRCCE_MAIL_HEADER* recv_header = NULL; + + kprintf( "my_rank = %d\n", RCCE_IAM ); + kprintf( "rem_rank = %d\n", CORE_B ); + kprintf( "rounds = %d\n", ROUNDS ); + + // disable interrupts + flags = irq_nested_disable(); + + for( i=0; i 0 ) { + max = ( max < timer )? timer : max; + min = ( min > timer )? timer : min; + sum += timer; + } + } + + kprintf( "Average was: %d nsec\n", sum*1000/(2*ROUNDS*533) ); + kprintf( "Maximum was: %d nsec\n", max*1000/(2*533) ); + kprintf( "Minimum was: %d nsec\n", min*1000/(2*533) ); + kprintf( "Jitter was: %d nsec\n", (max-min)*1000/(2*533) ); + + irq_nested_enable(flags); + + return 0; +} + +#undef _IRQ_NOISE_ +#define NOISE_PRIO 1 int icc_mail_noise(void) { int i, j, res; int num_ranks = RCCE_num_ues(); + int count = 0; iRCCE_MAIL_HEADER* recv_mail = NULL; + /* timer vars */ + uint64_t timer; + uint64_t tmr; + uint64_t tmr_send = 0; + uint64_t tmr_recv = 0; + uint64_t tmr_release = 0; + uint64_t tmr_chck = 0; + + kprintf( "my_ue = %d\n", RCCE_IAM ); + // leave function if not participating - if( !((RCCE_IAM == 4) || (RCCE_IAM == 2) || (RCCE_IAM == CORE_B)) ) { + if( (RCCE_IAM == CORE_A) || (RCCE_IAM == CORE_B) ) { kprintf( "mail_noise: leaving" ); return -1; } @@ -456,46 +526,84 @@ int icc_mail_noise(void) { kprintf( "Hello from icc_mail_noise: my_ue = %d\n", RCCE_IAM ); kprintf( "num_ues = %d\n", num_ranks ); - - for( i=0; i<10000; ++i ) { + + timer = rdtsc(); + + for( i=0; i<40000; ++i ) { if( !(i%1000) ) kprintf( "%d ", i ); + tmr = rdtsc(); + iRCCE_mail_check(iRCCE_MAILBOX_ALL); + tmr = rdtsc() - tmr; + tmr_chck += tmr; + /* send a mail to each UE */ for( j=0; jtag == tag) { + iRCCE_mail_release( &header ); + goto out; + } else iRCCE_mail_release( &header ); + } + //recv_ticks += rdtsc() - start; + + goto retry; + +out: + //recv_ticks += rdtsc() - start; + /* enable interrupts */ + irq_nested_enable(flags); +} #endif diff --git a/documentation/img/mike_ostrich.jpg b/documentation/img/mike_ostrich.jpg new file mode 100644 index 00000000..9ab2b54f Binary files /dev/null and b/documentation/img/mike_ostrich.jpg differ diff --git a/documentation/text/compilation.dox b/documentation/text/compilation.dox index 20f13d6a..5dba4875 100644 --- a/documentation/text/compilation.dox +++ b/documentation/text/compilation.dox @@ -14,7 +14,7 @@ * * The MetalSVM project is hosted in a Git repository. To check it out, just type: * - * \verbatim$ git clone gitosis@git.lfbs.rwth-aachen.de:metalsvm.git \endverbatim + * \verbatim$ git clone git://git.lfbs.rwth-aachen.de:metalsvm.git \endverbatim * * If you are asked for a password you are not authorized to clone the repository. In this case you will need to get your public SSH key authorized. * @@ -25,7 +25,7 @@ * \verbatim $ cd MetalSVM $ cp Makefile.example Makefile -$ cp include/metalsvm/config.h.example include/metalsvm/config.h \endverbatim +$ (cd include/metalsvm; cp config.h.example config.h) \endverbatim * * The standard configuration works on usual PC hardware configurations as well as in emulators. * @@ -90,20 +90,8 @@ $ make SCC \endverbatim * @section runsccmc Running MetalSVM on multiple SCC cores * * -# Build the kernel like described above (items 1-7) and change to the \c tools directory. - * -# The \c scc_bootinfo.asm file contains boot-information relevant to the SCC-cores. - * It is generated automatically by the \c bootinfo.sh script.\n - * \n - * The following example generates the \c scc_bootinfo.asm file needed for use of the cores 0 and 1: - * \verbatim$ ./bootinfo.sh 0x01000000 initrd.img 2 533 0 1 > scc_bootinfo.asm \endverbatim - * Parameters describe the following: - * -# First parameter describes the address at which the initrd shall be located at later (You will not need to change this: 0x00100000) - * -# Second is path to the initrd image file - * -# The other parameters are analogous to RCCE-App-parameters. This example starts MetalSVM on cores 0 and 1, clocked with 533MHz. - * -# Now the file \c metalsvm.mt has to be edited. It defines the layout of the memory image (Where the kernels will be located in the memory later). For the example from above it looks like the following: - * \verbatim# pid mch-route mch-dest-id mch-offset-base testcase - 0x00 0x00 6 0x00 metalsvm.obj - 0x01 0x00 6 0x01 metalsvm.obj \endverbatim - * This locates two instances of MetalSVM on core 0 and 1, supplied with memory from memory controller 0. See \c sccMerge \c -h for more information. + * -# Now the file \c metalsvm.mt can be edited, depending on how many cores you want MetalSVM running. + * Just remove the cores which shall be unaffected. Having a slim \c metalsvm.tm accelerates the build procedure. * -# The final image must be generated then with \code$ make SCC\endcode * -# A directory \c obj was created, containing the final MetalSVM Image. This image can now be loaded with the following command: \code$ sccBoot -g obj\endcode * -# Everything has been placed in the cores' memory. To release the reset pins of the corresponding cores, type \code$ sccReset -r 0x00 0x01\endcode diff --git a/documentation/text/tasks.dox b/documentation/text/tasks.dox index 0b9bebaf..835068ea 100644 --- a/documentation/text/tasks.dox +++ b/documentation/text/tasks.dox @@ -19,7 +19,7 @@ create_kernel_task (&id, initd, NULL, NORMAL_PRIO); ...\endcode * * \c Initd starts all the other processes and then exits. - * The list of processes to start is defined in \c kernel/tests.c + * The list of processes to start is defined in \c apps/tests.c * within \c test_init(): * * \code diff --git a/documentation/tmpl/footer.html b/documentation/tmpl/footer.html index c93cb561..80222ad9 100644 --- a/documentation/tmpl/footer.html +++ b/documentation/tmpl/footer.html @@ -1,9 +1,13 @@ + + + doxygen $doxygenversion + +