diff --git a/projects/code_compiler/source/compiler.cpp b/projects/code_compiler/source/compiler.cpp index 2636a6f..abced7a 100644 --- a/projects/code_compiler/source/compiler.cpp +++ b/projects/code_compiler/source/compiler.cpp @@ -39,7 +39,7 @@ bool BlitzLLVM::Compiler::Compile(std::string in, std::string out) { std::cout << "EOF" << std::endl; break; case Lexer::Token::TokenNewLine: - std::cout << "NewLine" << std::endl; + std::cout << "" << std::endl; break; case Lexer::Token::TokenPlus: case Lexer::Token::TokenMinus: @@ -61,7 +61,65 @@ bool BlitzLLVM::Compiler::Compile(std::string in, std::string out) { case Lexer::Token::TokenComma: case Lexer::Token::TokenSemicolon: case Lexer::Token::TokenCaret: + case Lexer::Token::TokenBitNot: case Lexer::Token::TokenDoubleQuote: + case Lexer::Token::TokenNot: + case Lexer::Token::TokenAnd: + case Lexer::Token::TokenOr: + case Lexer::Token::TokenXor: + case Lexer::Token::TokenShl: + case Lexer::Token::TokenShr: + case Lexer::Token::TokenSal: + case Lexer::Token::TokenFalse: + case Lexer::Token::TokenTrue: + case Lexer::Token::TokenFloat: + case Lexer::Token::TokenString: + case Lexer::Token::TokenHex: + case Lexer::Token::TokenInt: + case Lexer::Token::TokenIf: + case Lexer::Token::TokenThen: + case Lexer::Token::TokenElseIf: + case Lexer::Token::TokenElse: + case Lexer::Token::TokenEndIf: + case Lexer::Token::TokenSelect: + case Lexer::Token::TokenCase: + case Lexer::Token::TokenDefault: + case Lexer::Token::TokenGoto: + case Lexer::Token::TokenGosub: + case Lexer::Token::TokenReturn: + case Lexer::Token::TokenFunction: + case Lexer::Token::TokenEnd: + case Lexer::Token::TokenStop: + case Lexer::Token::TokenFor: + case Lexer::Token::TokenTo: + case Lexer::Token::TokenNext: + case Lexer::Token::TokenWhile: + case Lexer::Token::TokenWend: + case Lexer::Token::TokenRepeat: + case Lexer::Token::TokenUntil: + case Lexer::Token::TokenForever: + case Lexer::Token::TokenExit: + case Lexer::Token::TokenAbs: + case Lexer::Token::TokenSign: + case Lexer::Token::TokenCos: + case Lexer::Token::TokenSin: + case Lexer::Token::TokenTan: + case Lexer::Token::TokenACos: + case Lexer::Token::TokenASin: + case Lexer::Token::TokenATan: + case Lexer::Token::TokenATan2: + case Lexer::Token::TokenLog: + case Lexer::Token::TokenLog10: + case Lexer::Token::TokenCeil: + case Lexer::Token::TokenFloor: + case Lexer::Token::TokenMod: + case Lexer::Token::TokenPi: + case Lexer::Token::TokenExp: + case Lexer::Token::TokenSqr: + case Lexer::Token::TokenConst: + case Lexer::Token::TokenGlobal: + case Lexer::Token::TokenLocal: + case Lexer::Token::TokenInclude: std::cout << tkn.second << ' '; break; case Lexer::Token::TokenText: @@ -76,6 +134,9 @@ bool BlitzLLVM::Compiler::Compile(std::string in, std::string out) { case Lexer::Token::TokenQuotedText: std::cout << "QuotedText(" << tkn.second << ")" << ' '; break; + case Lexer::Token::TokenComment: + std::cout << "Comment(" << tkn.second << ")" << ' '; + break; case Lexer::Token::TokenUnknown: default: std::cout << "Unknown(" << tkn.second << ") "; diff --git a/projects/code_compiler/source/lexer.cpp b/projects/code_compiler/source/lexer.cpp index e8313c6..e5d40ff 100644 --- a/projects/code_compiler/source/lexer.cpp +++ b/projects/code_compiler/source/lexer.cpp @@ -16,6 +16,7 @@ #include "lexer.hpp" #include +#include std::pair g_symbolCharacters[] = { //{ '\"', BlitzLLVM::Lexer::Token::TokenDoubleQuote }, // Has special meaning. @@ -37,7 +38,7 @@ std::pair g_symbolCharacters[] = { //{ '.', BlitzLLVM::Lexer::Token::TokenDot }, // Special meaning. { ':', BlitzLLVM::Lexer::Token::TokenColon }, { ',', BlitzLLVM::Lexer::Token::TokenComma }, - { ';', BlitzLLVM::Lexer::Token::TokenSemicolon }, + //{ ';', BlitzLLVM::Lexer::Token::TokenSemicolon }, { '^', BlitzLLVM::Lexer::Token::TokenCaret }, { '~', BlitzLLVM::Lexer::Token::TokenBitNot }, }; @@ -74,56 +75,8 @@ std::pair BlitzLLVM::Lexer::GetNextToken() m_isStringMode = false; m_isNumberMode = false; m_isTextMode = false; + m_isCommentMode = false; break; - } else if (!m_isStringMode && !m_isTextMode && !m_isNumberMode) { - // Whitespace - if (isspace(chr)) - continue; - - // Control Code - if (iscntrl(chr)) { - tkn = Token::TokenUnknown; - buf = chr; - } - - // Symbol - for (auto v : g_symbolCharacters) { - if (v.first == chr) { - tkn = v.second; - buf = v.first; - break; - } - } - if (tkn != Token::TokenEOF) { - haveResult = true; - break; - } - - // Strings, Text, Numbers - if (chr == '\"') { - m_isStringMode = true; - tkn = Token::TokenDoubleQuote; - buf = chr; - break; - } else if (isalpha(chr)) { - m_isTextMode = true; - tkn = Token::TokenText; - buf = chr; - } else if (isdigit(chr)) { - m_isNumberMode = true; - m_numberModeHasDecimal = false; - tkn = Token::TokenNumber; - buf = chr; - } else if (chr == '.') { - m_isNumberMode = true; - m_numberModeHasDecimal = true; - tkn = Token::TokenDecimal; - buf = "0" + chr; - } else { - tkn = Token::TokenUnknown; - buf = chr; - break; - } } else if (m_isStringMode) { if (chr == '\"') { m_overrideToken = Token::TokenDoubleQuote; @@ -155,15 +108,160 @@ std::pair BlitzLLVM::Lexer::GetNextToken() tkn = Token::TokenDecimal; buf += chr; } else { - throw std::runtime_error("Unexpected '.' while parsing number."); + m_fileStream.putback(chr); + m_isNumberMode = false; + break; } } else { m_fileStream.putback(chr); m_isNumberMode = false; - break; + break; + } + } else if (m_isCommentMode) { + buf += chr; + tkn = Token::TokenComment; + } else { + // Whitespace + if (isspace(chr)) + continue; + + // Control Code + if (iscntrl(chr)) { + tkn = Token::TokenUnknown; + buf = chr; + } + + // Symbol + for (auto v : g_symbolCharacters) { + if (v.first == chr) { + tkn = v.second; + buf = v.first; + break; + } + } + if (tkn != Token::TokenEOF) { + haveResult = true; + break; + } + + // Strings, Text, Numbers + if (chr == ';') { + m_isCommentMode = true; + tkn = Token::TokenSemicolon; + buf = chr; + break; + } else if (chr == '\"') { + m_isStringMode = true; + tkn = Token::TokenDoubleQuote; + buf = chr; + break; + } else if (isalpha(chr)) { + m_isTextMode = true; + tkn = Token::TokenText; + buf = chr; + } else if (isdigit(chr)) { + m_isNumberMode = true; + m_numberModeHasDecimal = false; + tkn = Token::TokenNumber; + buf = chr; + } else if (chr == '.') { + m_isNumberMode = true; + m_numberModeHasDecimal = true; + tkn = Token::TokenDecimal; + buf = "0" + chr; + } else { + tkn = Token::TokenUnknown; + buf = chr; + break; } } } + // Convert from Text into native Token. + if (tkn == Token::TokenText) + tkn = ConvertTextToToken(tkn, buf); + return std::make_pair(tkn, buf); } + +BlitzLLVM::Lexer::Token BlitzLLVM::Lexer::ConvertTextToToken(Token in, std::string text) { + static std::pair l_textToTokenList[] = { + // Binary + { "not", Token::TokenNot }, + { "and", Token::TokenAnd }, + { "or", Token::TokenOr }, + { "xor", Token::TokenXor }, + { "shl", Token::TokenShl }, + { "shr", Token::TokenShr }, + { "sal", Token::TokenSal }, + { "sar", Token::TokenSar }, + { "false", Token::TokenFalse }, + { "true", Token::TokenTrue }, + + // Conversion + { "float", Token::TokenFloat }, + { "string", Token::TokenString }, + { "hex", Token::TokenHex }, + { "int", Token::TokenInt }, + + // Control + { "if", Token::TokenIf }, + { "then", Token::TokenThen }, + { "elseIf", Token::TokenElseIf }, + { "else", Token::TokenElse }, + { "endIf", Token::TokenEndIf }, + { "select", Token::TokenSelect }, + { "case", Token::TokenCase }, + { "default", Token::TokenDefault }, + { "goto", Token::TokenGoto }, + { "gosub", Token::TokenGosub }, + { "return", Token::TokenReturn }, + { "function", Token::TokenFunction }, + { "end", Token::TokenEnd }, + { "stop", Token::TokenStop }, + + // Loop + { "for", Token::TokenFor }, + { "to", Token::TokenTo }, + { "next", Token::TokenNext }, + { "while", Token::TokenWhile }, + { "wend", Token::TokenWend }, + { "repeat", Token::TokenRepeat }, + { "until", Token::TokenUntil }, + { "forever", Token::TokenForever }, + { "exit", Token::TokenExit }, + + // Math + { "abs", Token::TokenAbs }, + { "sign", Token::TokenSign }, + { "cos", Token::TokenCos }, + { "sin", Token::TokenSin }, + { "tan", Token::TokenTan }, + { "acos", Token::TokenACos }, + { "asin", Token::TokenASin }, + { "atan", Token::TokenATan }, + { "atan2", Token::TokenATan2 }, + { "log", Token::TokenLog }, + { "log10", Token::TokenLog10 }, + { "ceil", Token::TokenCeil }, + { "floor", Token::TokenFloor }, + { "mod", Token::TokenMod }, + { "pi", Token::TokenPi }, + { "exp", Token::TokenExp }, + { "sqr", Token::TokenSqr }, + + // Variables + { "const", Token::TokenConst }, + { "global", Token::TokenGlobal }, + { "local", Token::TokenLocal }, + + // Includes + { "include", Token::TokenInclude }, + }; + for (auto v : l_textToTokenList) { + if (boost::iequals(text, v.first)) { + return v.second; + } + } + return in; +} diff --git a/projects/code_compiler/source/lexer.hpp b/projects/code_compiler/source/lexer.hpp index 823a2b3..07de7ae 100644 --- a/projects/code_compiler/source/lexer.hpp +++ b/projects/code_compiler/source/lexer.hpp @@ -59,12 +59,13 @@ namespace BlitzLLVM { TokenNumber, TokenDecimal, TokenQuotedText, // Text encapsulated by TokenDoubleQuote + TokenComment, // Binary TokenNot, TokenAnd, TokenOr, TokenXor, TokenShl, TokenShr, - TokenSar, TokenSal, + TokenSal, TokenSar, TokenFalse, TokenTrue, // Conversion @@ -103,7 +104,7 @@ namespace BlitzLLVM { TokenLocal, // Including files. - TokenInclude, + TokenInclude, }; public: @@ -112,12 +113,16 @@ namespace BlitzLLVM { std::pair GetNextToken(); + private: + BlitzLLVM::Lexer::Token ConvertTextToToken(Token in, std::string text); + private: std::istream& m_fileStream; bool m_isTextMode = false; bool m_isNumberMode = false; bool m_isStringMode = false; + bool m_isCommentMode = false; bool m_numberModeHasDecimal = false; Token m_overrideToken = Token::TokenUnknown;