code_compiler: Add comments and internal commands to Lexer

This should make parsing even easier as the internal commands are tokens instead of text. Additionally it is now possible to properly include comments as they are no longer considered separate parts and instead are a single string lasting until the end of the line.
This commit is contained in:
Michael Fabian 'Xaymar' Dirks
2017-11-18 19:15:18 +01:00
parent a9f4fd9d47
commit 726bd78a59
3 changed files with 219 additions and 55 deletions
+62 -1
View File
@@ -39,7 +39,7 @@ bool BlitzLLVM::Compiler::Compile(std::string in, std::string out) {
std::cout << "EOF" << std::endl;
break;
case Lexer::Token::TokenNewLine:
std::cout << "NewLine" << std::endl;
std::cout << "" << std::endl;
break;
case Lexer::Token::TokenPlus:
case Lexer::Token::TokenMinus:
@@ -61,7 +61,65 @@ bool BlitzLLVM::Compiler::Compile(std::string in, std::string out) {
case Lexer::Token::TokenComma:
case Lexer::Token::TokenSemicolon:
case Lexer::Token::TokenCaret:
case Lexer::Token::TokenBitNot:
case Lexer::Token::TokenDoubleQuote:
case Lexer::Token::TokenNot:
case Lexer::Token::TokenAnd:
case Lexer::Token::TokenOr:
case Lexer::Token::TokenXor:
case Lexer::Token::TokenShl:
case Lexer::Token::TokenShr:
case Lexer::Token::TokenSal:
case Lexer::Token::TokenFalse:
case Lexer::Token::TokenTrue:
case Lexer::Token::TokenFloat:
case Lexer::Token::TokenString:
case Lexer::Token::TokenHex:
case Lexer::Token::TokenInt:
case Lexer::Token::TokenIf:
case Lexer::Token::TokenThen:
case Lexer::Token::TokenElseIf:
case Lexer::Token::TokenElse:
case Lexer::Token::TokenEndIf:
case Lexer::Token::TokenSelect:
case Lexer::Token::TokenCase:
case Lexer::Token::TokenDefault:
case Lexer::Token::TokenGoto:
case Lexer::Token::TokenGosub:
case Lexer::Token::TokenReturn:
case Lexer::Token::TokenFunction:
case Lexer::Token::TokenEnd:
case Lexer::Token::TokenStop:
case Lexer::Token::TokenFor:
case Lexer::Token::TokenTo:
case Lexer::Token::TokenNext:
case Lexer::Token::TokenWhile:
case Lexer::Token::TokenWend:
case Lexer::Token::TokenRepeat:
case Lexer::Token::TokenUntil:
case Lexer::Token::TokenForever:
case Lexer::Token::TokenExit:
case Lexer::Token::TokenAbs:
case Lexer::Token::TokenSign:
case Lexer::Token::TokenCos:
case Lexer::Token::TokenSin:
case Lexer::Token::TokenTan:
case Lexer::Token::TokenACos:
case Lexer::Token::TokenASin:
case Lexer::Token::TokenATan:
case Lexer::Token::TokenATan2:
case Lexer::Token::TokenLog:
case Lexer::Token::TokenLog10:
case Lexer::Token::TokenCeil:
case Lexer::Token::TokenFloor:
case Lexer::Token::TokenMod:
case Lexer::Token::TokenPi:
case Lexer::Token::TokenExp:
case Lexer::Token::TokenSqr:
case Lexer::Token::TokenConst:
case Lexer::Token::TokenGlobal:
case Lexer::Token::TokenLocal:
case Lexer::Token::TokenInclude:
std::cout << tkn.second << ' ';
break;
case Lexer::Token::TokenText:
@@ -76,6 +134,9 @@ bool BlitzLLVM::Compiler::Compile(std::string in, std::string out) {
case Lexer::Token::TokenQuotedText:
std::cout << "QuotedText(" << tkn.second << ")" << ' ';
break;
case Lexer::Token::TokenComment:
std::cout << "Comment(" << tkn.second << ")" << ' ';
break;
case Lexer::Token::TokenUnknown:
default:
std::cout << "Unknown(" << tkn.second << ") ";
+149 -51
View File
@@ -16,6 +16,7 @@
#include "lexer.hpp"
#include <codecvt>
#include <boost/algorithm/string/predicate.hpp>
std::pair<char, BlitzLLVM::Lexer::Token> g_symbolCharacters[] = {
//{ '\"', BlitzLLVM::Lexer::Token::TokenDoubleQuote }, // Has special meaning.
@@ -37,7 +38,7 @@ std::pair<char, BlitzLLVM::Lexer::Token> g_symbolCharacters[] = {
//{ '.', BlitzLLVM::Lexer::Token::TokenDot }, // Special meaning.
{ ':', BlitzLLVM::Lexer::Token::TokenColon },
{ ',', BlitzLLVM::Lexer::Token::TokenComma },
{ ';', BlitzLLVM::Lexer::Token::TokenSemicolon },
//{ ';', BlitzLLVM::Lexer::Token::TokenSemicolon },
{ '^', BlitzLLVM::Lexer::Token::TokenCaret },
{ '~', BlitzLLVM::Lexer::Token::TokenBitNot },
};
@@ -74,56 +75,8 @@ std::pair<BlitzLLVM::Lexer::Token, std::string> BlitzLLVM::Lexer::GetNextToken()
m_isStringMode = false;
m_isNumberMode = false;
m_isTextMode = false;
m_isCommentMode = false;
break;
} else if (!m_isStringMode && !m_isTextMode && !m_isNumberMode) {
// Whitespace
if (isspace(chr))
continue;
// Control Code
if (iscntrl(chr)) {
tkn = Token::TokenUnknown;
buf = chr;
}
// Symbol
for (auto v : g_symbolCharacters) {
if (v.first == chr) {
tkn = v.second;
buf = v.first;
break;
}
}
if (tkn != Token::TokenEOF) {
haveResult = true;
break;
}
// Strings, Text, Numbers
if (chr == '\"') {
m_isStringMode = true;
tkn = Token::TokenDoubleQuote;
buf = chr;
break;
} else if (isalpha(chr)) {
m_isTextMode = true;
tkn = Token::TokenText;
buf = chr;
} else if (isdigit(chr)) {
m_isNumberMode = true;
m_numberModeHasDecimal = false;
tkn = Token::TokenNumber;
buf = chr;
} else if (chr == '.') {
m_isNumberMode = true;
m_numberModeHasDecimal = true;
tkn = Token::TokenDecimal;
buf = "0" + chr;
} else {
tkn = Token::TokenUnknown;
buf = chr;
break;
}
} else if (m_isStringMode) {
if (chr == '\"') {
m_overrideToken = Token::TokenDoubleQuote;
@@ -155,15 +108,160 @@ std::pair<BlitzLLVM::Lexer::Token, std::string> BlitzLLVM::Lexer::GetNextToken()
tkn = Token::TokenDecimal;
buf += chr;
} else {
throw std::runtime_error("Unexpected '.' while parsing number.");
m_fileStream.putback(chr);
m_isNumberMode = false;
break;
}
} else {
m_fileStream.putback(chr);
m_isNumberMode = false;
break;
}
} else if (m_isCommentMode) {
buf += chr;
tkn = Token::TokenComment;
} else {
// Whitespace
if (isspace(chr))
continue;
// Control Code
if (iscntrl(chr)) {
tkn = Token::TokenUnknown;
buf = chr;
}
// Symbol
for (auto v : g_symbolCharacters) {
if (v.first == chr) {
tkn = v.second;
buf = v.first;
break;
}
}
if (tkn != Token::TokenEOF) {
haveResult = true;
break;
}
// Strings, Text, Numbers
if (chr == ';') {
m_isCommentMode = true;
tkn = Token::TokenSemicolon;
buf = chr;
break;
} else if (chr == '\"') {
m_isStringMode = true;
tkn = Token::TokenDoubleQuote;
buf = chr;
break;
} else if (isalpha(chr)) {
m_isTextMode = true;
tkn = Token::TokenText;
buf = chr;
} else if (isdigit(chr)) {
m_isNumberMode = true;
m_numberModeHasDecimal = false;
tkn = Token::TokenNumber;
buf = chr;
} else if (chr == '.') {
m_isNumberMode = true;
m_numberModeHasDecimal = true;
tkn = Token::TokenDecimal;
buf = "0" + chr;
} else {
tkn = Token::TokenUnknown;
buf = chr;
break;
}
}
}
// Convert from Text into native Token.
if (tkn == Token::TokenText)
tkn = ConvertTextToToken(tkn, buf);
return std::make_pair(tkn, buf);
}
BlitzLLVM::Lexer::Token BlitzLLVM::Lexer::ConvertTextToToken(Token in, std::string text) {
static std::pair<const char*, Token> l_textToTokenList[] = {
// Binary
{ "not", Token::TokenNot },
{ "and", Token::TokenAnd },
{ "or", Token::TokenOr },
{ "xor", Token::TokenXor },
{ "shl", Token::TokenShl },
{ "shr", Token::TokenShr },
{ "sal", Token::TokenSal },
{ "sar", Token::TokenSar },
{ "false", Token::TokenFalse },
{ "true", Token::TokenTrue },
// Conversion
{ "float", Token::TokenFloat },
{ "string", Token::TokenString },
{ "hex", Token::TokenHex },
{ "int", Token::TokenInt },
// Control
{ "if", Token::TokenIf },
{ "then", Token::TokenThen },
{ "elseIf", Token::TokenElseIf },
{ "else", Token::TokenElse },
{ "endIf", Token::TokenEndIf },
{ "select", Token::TokenSelect },
{ "case", Token::TokenCase },
{ "default", Token::TokenDefault },
{ "goto", Token::TokenGoto },
{ "gosub", Token::TokenGosub },
{ "return", Token::TokenReturn },
{ "function", Token::TokenFunction },
{ "end", Token::TokenEnd },
{ "stop", Token::TokenStop },
// Loop
{ "for", Token::TokenFor },
{ "to", Token::TokenTo },
{ "next", Token::TokenNext },
{ "while", Token::TokenWhile },
{ "wend", Token::TokenWend },
{ "repeat", Token::TokenRepeat },
{ "until", Token::TokenUntil },
{ "forever", Token::TokenForever },
{ "exit", Token::TokenExit },
// Math
{ "abs", Token::TokenAbs },
{ "sign", Token::TokenSign },
{ "cos", Token::TokenCos },
{ "sin", Token::TokenSin },
{ "tan", Token::TokenTan },
{ "acos", Token::TokenACos },
{ "asin", Token::TokenASin },
{ "atan", Token::TokenATan },
{ "atan2", Token::TokenATan2 },
{ "log", Token::TokenLog },
{ "log10", Token::TokenLog10 },
{ "ceil", Token::TokenCeil },
{ "floor", Token::TokenFloor },
{ "mod", Token::TokenMod },
{ "pi", Token::TokenPi },
{ "exp", Token::TokenExp },
{ "sqr", Token::TokenSqr },
// Variables
{ "const", Token::TokenConst },
{ "global", Token::TokenGlobal },
{ "local", Token::TokenLocal },
// Includes
{ "include", Token::TokenInclude },
};
for (auto v : l_textToTokenList) {
if (boost::iequals(text, v.first)) {
return v.second;
}
}
return in;
}
+6 -1
View File
@@ -59,12 +59,13 @@ namespace BlitzLLVM {
TokenNumber,
TokenDecimal,
TokenQuotedText, // Text encapsulated by TokenDoubleQuote
TokenComment,
// Binary
TokenNot,
TokenAnd, TokenOr, TokenXor,
TokenShl, TokenShr,
TokenSar, TokenSal,
TokenSal, TokenSar,
TokenFalse, TokenTrue,
// Conversion
@@ -112,12 +113,16 @@ namespace BlitzLLVM {
std::pair<Token, std::string> GetNextToken();
private:
BlitzLLVM::Lexer::Token ConvertTextToToken(Token in, std::string text);
private:
std::istream& m_fileStream;
bool m_isTextMode = false;
bool m_isNumberMode = false;
bool m_isStringMode = false;
bool m_isCommentMode = false;
bool m_numberModeHasDecimal = false;
Token m_overrideToken = Token::TokenUnknown;