From 726bd78a59b9b8f7c3e95cc88f17b4f77ec0db36 Mon Sep 17 00:00:00 2001 From: Michael Fabian 'Xaymar' Dirks Date: Sat, 18 Nov 2017 19:15:18 +0100 Subject: [PATCH] code_compiler: Add comments and internal commands to Lexer This should make parsing even easier as the internal commands are tokens instead of text. Additionally it is now possible to properly include comments as they are no longer considered separate parts and instead are a single string lasting until the end of the line. --- projects/code_compiler/source/compiler.cpp | 63 ++++++- projects/code_compiler/source/lexer.cpp | 202 +++++++++++++++------ projects/code_compiler/source/lexer.hpp | 9 +- 3 files changed, 219 insertions(+), 55 deletions(-) diff --git a/projects/code_compiler/source/compiler.cpp b/projects/code_compiler/source/compiler.cpp index 2636a6f..abced7a 100644 --- a/projects/code_compiler/source/compiler.cpp +++ b/projects/code_compiler/source/compiler.cpp @@ -39,7 +39,7 @@ bool BlitzLLVM::Compiler::Compile(std::string in, std::string out) { std::cout << "EOF" << std::endl; break; case Lexer::Token::TokenNewLine: - std::cout << "NewLine" << std::endl; + std::cout << "" << std::endl; break; case Lexer::Token::TokenPlus: case Lexer::Token::TokenMinus: @@ -61,7 +61,65 @@ bool BlitzLLVM::Compiler::Compile(std::string in, std::string out) { case Lexer::Token::TokenComma: case Lexer::Token::TokenSemicolon: case Lexer::Token::TokenCaret: + case Lexer::Token::TokenBitNot: case Lexer::Token::TokenDoubleQuote: + case Lexer::Token::TokenNot: + case Lexer::Token::TokenAnd: + case Lexer::Token::TokenOr: + case Lexer::Token::TokenXor: + case Lexer::Token::TokenShl: + case Lexer::Token::TokenShr: + case Lexer::Token::TokenSal: + case Lexer::Token::TokenFalse: + case Lexer::Token::TokenTrue: + case Lexer::Token::TokenFloat: + case Lexer::Token::TokenString: + case Lexer::Token::TokenHex: + case Lexer::Token::TokenInt: + case Lexer::Token::TokenIf: + case Lexer::Token::TokenThen: + case Lexer::Token::TokenElseIf: + case Lexer::Token::TokenElse: + case Lexer::Token::TokenEndIf: + case Lexer::Token::TokenSelect: + case Lexer::Token::TokenCase: + case Lexer::Token::TokenDefault: + case Lexer::Token::TokenGoto: + case Lexer::Token::TokenGosub: + case Lexer::Token::TokenReturn: + case Lexer::Token::TokenFunction: + case Lexer::Token::TokenEnd: + case Lexer::Token::TokenStop: + case Lexer::Token::TokenFor: + case Lexer::Token::TokenTo: + case Lexer::Token::TokenNext: + case Lexer::Token::TokenWhile: + case Lexer::Token::TokenWend: + case Lexer::Token::TokenRepeat: + case Lexer::Token::TokenUntil: + case Lexer::Token::TokenForever: + case Lexer::Token::TokenExit: + case Lexer::Token::TokenAbs: + case Lexer::Token::TokenSign: + case Lexer::Token::TokenCos: + case Lexer::Token::TokenSin: + case Lexer::Token::TokenTan: + case Lexer::Token::TokenACos: + case Lexer::Token::TokenASin: + case Lexer::Token::TokenATan: + case Lexer::Token::TokenATan2: + case Lexer::Token::TokenLog: + case Lexer::Token::TokenLog10: + case Lexer::Token::TokenCeil: + case Lexer::Token::TokenFloor: + case Lexer::Token::TokenMod: + case Lexer::Token::TokenPi: + case Lexer::Token::TokenExp: + case Lexer::Token::TokenSqr: + case Lexer::Token::TokenConst: + case Lexer::Token::TokenGlobal: + case Lexer::Token::TokenLocal: + case Lexer::Token::TokenInclude: std::cout << tkn.second << ' '; break; case Lexer::Token::TokenText: @@ -76,6 +134,9 @@ bool BlitzLLVM::Compiler::Compile(std::string in, std::string out) { case Lexer::Token::TokenQuotedText: std::cout << "QuotedText(" << tkn.second << ")" << ' '; break; + case Lexer::Token::TokenComment: + std::cout << "Comment(" << tkn.second << ")" << ' '; + break; case Lexer::Token::TokenUnknown: default: std::cout << "Unknown(" << tkn.second << ") "; diff --git a/projects/code_compiler/source/lexer.cpp b/projects/code_compiler/source/lexer.cpp index e8313c6..e5d40ff 100644 --- a/projects/code_compiler/source/lexer.cpp +++ b/projects/code_compiler/source/lexer.cpp @@ -16,6 +16,7 @@ #include "lexer.hpp" #include +#include std::pair g_symbolCharacters[] = { //{ '\"', BlitzLLVM::Lexer::Token::TokenDoubleQuote }, // Has special meaning. @@ -37,7 +38,7 @@ std::pair g_symbolCharacters[] = { //{ '.', BlitzLLVM::Lexer::Token::TokenDot }, // Special meaning. { ':', BlitzLLVM::Lexer::Token::TokenColon }, { ',', BlitzLLVM::Lexer::Token::TokenComma }, - { ';', BlitzLLVM::Lexer::Token::TokenSemicolon }, + //{ ';', BlitzLLVM::Lexer::Token::TokenSemicolon }, { '^', BlitzLLVM::Lexer::Token::TokenCaret }, { '~', BlitzLLVM::Lexer::Token::TokenBitNot }, }; @@ -74,56 +75,8 @@ std::pair BlitzLLVM::Lexer::GetNextToken() m_isStringMode = false; m_isNumberMode = false; m_isTextMode = false; + m_isCommentMode = false; break; - } else if (!m_isStringMode && !m_isTextMode && !m_isNumberMode) { - // Whitespace - if (isspace(chr)) - continue; - - // Control Code - if (iscntrl(chr)) { - tkn = Token::TokenUnknown; - buf = chr; - } - - // Symbol - for (auto v : g_symbolCharacters) { - if (v.first == chr) { - tkn = v.second; - buf = v.first; - break; - } - } - if (tkn != Token::TokenEOF) { - haveResult = true; - break; - } - - // Strings, Text, Numbers - if (chr == '\"') { - m_isStringMode = true; - tkn = Token::TokenDoubleQuote; - buf = chr; - break; - } else if (isalpha(chr)) { - m_isTextMode = true; - tkn = Token::TokenText; - buf = chr; - } else if (isdigit(chr)) { - m_isNumberMode = true; - m_numberModeHasDecimal = false; - tkn = Token::TokenNumber; - buf = chr; - } else if (chr == '.') { - m_isNumberMode = true; - m_numberModeHasDecimal = true; - tkn = Token::TokenDecimal; - buf = "0" + chr; - } else { - tkn = Token::TokenUnknown; - buf = chr; - break; - } } else if (m_isStringMode) { if (chr == '\"') { m_overrideToken = Token::TokenDoubleQuote; @@ -155,15 +108,160 @@ std::pair BlitzLLVM::Lexer::GetNextToken() tkn = Token::TokenDecimal; buf += chr; } else { - throw std::runtime_error("Unexpected '.' while parsing number."); + m_fileStream.putback(chr); + m_isNumberMode = false; + break; } } else { m_fileStream.putback(chr); m_isNumberMode = false; - break; + break; + } + } else if (m_isCommentMode) { + buf += chr; + tkn = Token::TokenComment; + } else { + // Whitespace + if (isspace(chr)) + continue; + + // Control Code + if (iscntrl(chr)) { + tkn = Token::TokenUnknown; + buf = chr; + } + + // Symbol + for (auto v : g_symbolCharacters) { + if (v.first == chr) { + tkn = v.second; + buf = v.first; + break; + } + } + if (tkn != Token::TokenEOF) { + haveResult = true; + break; + } + + // Strings, Text, Numbers + if (chr == ';') { + m_isCommentMode = true; + tkn = Token::TokenSemicolon; + buf = chr; + break; + } else if (chr == '\"') { + m_isStringMode = true; + tkn = Token::TokenDoubleQuote; + buf = chr; + break; + } else if (isalpha(chr)) { + m_isTextMode = true; + tkn = Token::TokenText; + buf = chr; + } else if (isdigit(chr)) { + m_isNumberMode = true; + m_numberModeHasDecimal = false; + tkn = Token::TokenNumber; + buf = chr; + } else if (chr == '.') { + m_isNumberMode = true; + m_numberModeHasDecimal = true; + tkn = Token::TokenDecimal; + buf = "0" + chr; + } else { + tkn = Token::TokenUnknown; + buf = chr; + break; } } } + // Convert from Text into native Token. + if (tkn == Token::TokenText) + tkn = ConvertTextToToken(tkn, buf); + return std::make_pair(tkn, buf); } + +BlitzLLVM::Lexer::Token BlitzLLVM::Lexer::ConvertTextToToken(Token in, std::string text) { + static std::pair l_textToTokenList[] = { + // Binary + { "not", Token::TokenNot }, + { "and", Token::TokenAnd }, + { "or", Token::TokenOr }, + { "xor", Token::TokenXor }, + { "shl", Token::TokenShl }, + { "shr", Token::TokenShr }, + { "sal", Token::TokenSal }, + { "sar", Token::TokenSar }, + { "false", Token::TokenFalse }, + { "true", Token::TokenTrue }, + + // Conversion + { "float", Token::TokenFloat }, + { "string", Token::TokenString }, + { "hex", Token::TokenHex }, + { "int", Token::TokenInt }, + + // Control + { "if", Token::TokenIf }, + { "then", Token::TokenThen }, + { "elseIf", Token::TokenElseIf }, + { "else", Token::TokenElse }, + { "endIf", Token::TokenEndIf }, + { "select", Token::TokenSelect }, + { "case", Token::TokenCase }, + { "default", Token::TokenDefault }, + { "goto", Token::TokenGoto }, + { "gosub", Token::TokenGosub }, + { "return", Token::TokenReturn }, + { "function", Token::TokenFunction }, + { "end", Token::TokenEnd }, + { "stop", Token::TokenStop }, + + // Loop + { "for", Token::TokenFor }, + { "to", Token::TokenTo }, + { "next", Token::TokenNext }, + { "while", Token::TokenWhile }, + { "wend", Token::TokenWend }, + { "repeat", Token::TokenRepeat }, + { "until", Token::TokenUntil }, + { "forever", Token::TokenForever }, + { "exit", Token::TokenExit }, + + // Math + { "abs", Token::TokenAbs }, + { "sign", Token::TokenSign }, + { "cos", Token::TokenCos }, + { "sin", Token::TokenSin }, + { "tan", Token::TokenTan }, + { "acos", Token::TokenACos }, + { "asin", Token::TokenASin }, + { "atan", Token::TokenATan }, + { "atan2", Token::TokenATan2 }, + { "log", Token::TokenLog }, + { "log10", Token::TokenLog10 }, + { "ceil", Token::TokenCeil }, + { "floor", Token::TokenFloor }, + { "mod", Token::TokenMod }, + { "pi", Token::TokenPi }, + { "exp", Token::TokenExp }, + { "sqr", Token::TokenSqr }, + + // Variables + { "const", Token::TokenConst }, + { "global", Token::TokenGlobal }, + { "local", Token::TokenLocal }, + + // Includes + { "include", Token::TokenInclude }, + }; + for (auto v : l_textToTokenList) { + if (boost::iequals(text, v.first)) { + return v.second; + } + } + return in; +} diff --git a/projects/code_compiler/source/lexer.hpp b/projects/code_compiler/source/lexer.hpp index 823a2b3..07de7ae 100644 --- a/projects/code_compiler/source/lexer.hpp +++ b/projects/code_compiler/source/lexer.hpp @@ -59,12 +59,13 @@ namespace BlitzLLVM { TokenNumber, TokenDecimal, TokenQuotedText, // Text encapsulated by TokenDoubleQuote + TokenComment, // Binary TokenNot, TokenAnd, TokenOr, TokenXor, TokenShl, TokenShr, - TokenSar, TokenSal, + TokenSal, TokenSar, TokenFalse, TokenTrue, // Conversion @@ -103,7 +104,7 @@ namespace BlitzLLVM { TokenLocal, // Including files. - TokenInclude, + TokenInclude, }; public: @@ -112,12 +113,16 @@ namespace BlitzLLVM { std::pair GetNextToken(); + private: + BlitzLLVM::Lexer::Token ConvertTextToToken(Token in, std::string text); + private: std::istream& m_fileStream; bool m_isTextMode = false; bool m_isNumberMode = false; bool m_isStringMode = false; + bool m_isCommentMode = false; bool m_numberModeHasDecimal = false; Token m_overrideToken = Token::TokenUnknown;