code_compiler: Implement Lexer for Blitz

The Lexer should now be able to parse all existing Blitz code into easily identified Tokens. With this information, a Parser can now verify that the syntax was kept correctly, warn about possible dangerous or wrong use and convert the Tokens into actual LLVM code for compiling.
2017-11-13 02:14:57 +01:00
parent 89b98c6925
commit d5945e8c39
2 changed files with 297 additions and 0 deletions
@@ -0,0 +1,171 @@
 //	Code Compiler for BlitzLLVM
 //	Copyright(C) 2017 Michael Fabian Dirks
 //
 //	This program is free software : you can redistribute it and/or modify
 //	it under the terms of the GNU General Public License as published by
 //	the Free Software Foundation, either version 3 of the License, or
 //	(at your option) any later version.
 //
 //	This program is distributed in the hope that it will be useful,
 //	but WITHOUT ANY WARRANTY; without even the implied warranty of
 //	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.See the
 //	GNU General Public License for more details.
 //
 //	You should have received a copy of the GNU General Public License
 //	along with this program.If not, see <https://www.gnu.org/licenses/>.
 #include "lexer.hpp"
 #include <codecvt>
 char g_whitespaceCharacters[] = {
 	' ',
 	'\t',
 };
 std::pair<char, BlitzLLVM::Lexer::Token> g_symbolCharacters[] = {
 	//{ '\"', BlitzLLVM::Lexer::Token::TokenDoubleQuote }, // Has special meaning.
 	{ '+', BlitzLLVM::Lexer::Token::TokenPlus },
 	{ '-', BlitzLLVM::Lexer::Token::TokenMinus },
 	{ '/', BlitzLLVM::Lexer::Token::TokenSlashForward },
 	{ '\\', BlitzLLVM::Lexer::Token::TokenSlashBackward },
 	{ '*', BlitzLLVM::Lexer::Token::TokenMultiply },
 	{ '=', BlitzLLVM::Lexer::Token::TokenEqual },
 	{ '#', BlitzLLVM::Lexer::Token::TokenOctothorp },
 	{ '%', BlitzLLVM::Lexer::Token::TokenPercent },
 	{ '$', BlitzLLVM::Lexer::Token::TokenDollar },
 	{ '(', BlitzLLVM::Lexer::Token::TokenRoundBracketOpen },
 	{ ')', BlitzLLVM::Lexer::Token::TokenRoundBracketClose },
 	{ '[', BlitzLLVM::Lexer::Token::TokenSquareBracketOpen },
 	{ ']', BlitzLLVM::Lexer::Token::TokenSquareBracketClose },
 	{ '<', BlitzLLVM::Lexer::Token::TokenAngleBracketOpen },
 	{ '>', BlitzLLVM::Lexer::Token::TokenAngleBracketClose },
 	//{ '.', BlitzLLVM::Lexer::Token::TokenDot }, // Special meaning.
 	{ ':', BlitzLLVM::Lexer::Token::TokenColon },
 	{ ',', BlitzLLVM::Lexer::Token::TokenComma },
 	{ ';', BlitzLLVM::Lexer::Token::TokenSemicolon },
 	{ '^', BlitzLLVM::Lexer::Token::TokenCaret },
 	{ '~', BlitzLLVM::Lexer::Token::TokenBitNot },
 };
 BlitzLLVM::Lexer::Lexer(std::istream& fs) : m_fileStream(fs) {}
 BlitzLLVM::Lexer::~Lexer() {}
 std::pair<BlitzLLVM::Lexer::Token, std::string> BlitzLLVM::Lexer::GetNextToken() {
 	std::string buf;
 	Token tkn = Token::TokenEOF;
 	bool haveResult = false;
 	// Allow "overriding" the next retrieved Token.
 	if (m_overrideToken != Token::TokenUnknown) {
 		buf = m_overrideText;
 		tkn = m_overrideToken;
 		m_overrideToken = Token::TokenUnknown;
 		haveResult = true;
 	}
 	while (((m_fileStream.eof() == false) && (m_fileStream.good())) && !haveResult) {
 		char chr = m_fileStream.get();
 		if (chr == '\r' || chr == '\n') {
 			if (tkn != Token::TokenEOF) {
 				m_overrideToken = Token::TokenNewLine;
 				m_overrideText = "";
 			} else {
 				tkn = Token::TokenNewLine;
 				buf = "";
 			}
 			m_isStringMode = false;
 			m_isNumberMode = false;
 			m_isTextMode = false;
 			break;
 		} else if (!m_isStringMode && !m_isTextMode && !m_isNumberMode) {
 			// Whitespace
 			bool isWhitespace = false;
 			for (char v : g_whitespaceCharacters) {
 				if (v == chr) {
 					isWhitespace = true;
 					break;
 				}
 			}
 			if (isWhitespace)
 				continue;
 			// Symbol
 			for (auto v : g_symbolCharacters) {
 				if (v.first == chr) {
 					tkn = v.second;
 					buf = v.first;
 					break;
 				}
 			}
 			if (tkn != Token::TokenEOF) {
 				haveResult = true;
 				break;
 			}
 			// Strings, Text, Numbers
 			if (chr == '\"') {
 				m_isStringMode = true;
 				tkn = Token::TokenDoubleQuote;
 				buf = chr;
 				break;
 			} else if (isalpha(chr)) {
 				m_isTextMode = true;
 				tkn = Token::TokenText;
 				buf = chr;
 			} else if (isdigit(chr)) {
 				m_isNumberMode = true;
 				m_numberModeHasDecimal = false;
 				tkn = Token::TokenNumber;
 				buf = chr;
 			} else if (chr == '.') {
 				m_isNumberMode = true;
 				m_numberModeHasDecimal = true;
 				tkn = Token::TokenDecimal;
 				buf = "0" + chr;
 			} else {
 				tkn = Token::TokenUnknown;
 				buf = chr;
 				break;
 			}
 		} else if (m_isStringMode) {
 			if (chr == '\"') {
 				m_overrideToken = Token::TokenDoubleQuote;
 				m_overrideText = chr;
 				m_isStringMode = false;
 				tkn = Token::TokenQuotedText;
 				break;
 			} else {
 				buf += chr;
 			}
 		} else if (m_isTextMode) {
 			if (isalnum(chr) || (chr == '_')) {
 				buf += chr;
 			} else {
 				m_fileStream.putback(chr);
 				m_isTextMode = false;
 				break;
 			}
 		} else if (m_isNumberMode) {
 			if (isdigit(chr)) {
 				buf += chr;
 			} else if (chr == '.') {
 				if (m_numberModeHasDecimal == false) {
 					m_numberModeHasDecimal = true;
 					tkn = Token::TokenDecimal;
 					buf += chr;
 				} else {
 					throw std::runtime_error("Unexpected '.' while parsing number.");
 				}
 			} else {
 				m_fileStream.putback(chr);
 				m_isNumberMode = false;
 				break;				
 			}
 		}
 	}
 	return std::make_pair(tkn, buf);
 }
@@ -0,0 +1,126 @@
 //	Code Compiler for BlitzLLVM
 //	Copyright(C) 2017 Michael Fabian Dirks
 //
 //	This program is free software : you can redistribute it and/or modify
 //	it under the terms of the GNU General Public License as published by
 //	the Free Software Foundation, either version 3 of the License, or
 //	(at your option) any later version.
 //
 //	This program is distributed in the hope that it will be useful,
 //	but WITHOUT ANY WARRANTY; without even the implied warranty of
 //	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.See the
 //	GNU General Public License for more details.
 //
 //	You should have received a copy of the GNU General Public License
 //	along with this program.If not, see <https://www.gnu.org/licenses/>.
 #pragma once
 #include <list>
 #include <istream>
 #include <string>
 #include <inttypes.h>
 namespace BlitzLLVM {	
 	class Lexer {
 		public:
 		enum class Token : uint64_t {
 			TokenUnknown,
 			TokenEOF,
 			TokenNewLine,
 			// Symbols
 			TokenPlus,
 			TokenMinus,
 			TokenSlashForward,
 			TokenSlashBackward,
 			TokenMultiply,
 			TokenEqual,
 			TokenOctothorp,
 			TokenPercent,
 			TokenDollar,
 			TokenRoundBracketOpen,
 			TokenRoundBracketClose,
 			TokenSquareBracketOpen,
 			TokenSquareBracketClose,
 			TokenAngleBracketOpen,
 			TokenAngleBracketClose,
 			TokenDot,
 			TokenColon,
 			TokenComma,
 			TokenSemicolon,
 			TokenCaret,
 			TokenBitNot /*~*/,
 			// String Delimiter
 			TokenDoubleQuote,
 			// Types
 			TokenText,
 			TokenNumber,
 			TokenDecimal,
 			TokenQuotedText, // Text encapsulated by TokenDoubleQuote
 			// Binary
 			TokenNot,
 			TokenAnd, TokenOr, TokenXor,
 			TokenShl, TokenShr,
 			TokenSar, TokenSal,
 			TokenFalse, TokenTrue,
 			// Conversion
 			TokenFloat,
 			TokenString, TokenHex,
 			TokenInt,
 			// Control
 			TokenIf, TokenThen, TokenElseIf, TokenElse, TokenEndIf,
 			TokenSelect, TokenCase, TokenDefault, // End Select = TokenEnd, TokenSelect.
 			TokenGoto, TokenGosub,
 			TokenReturn,
 			TokenFunction, // End Function = TokenEnd, TokenFunction.
 			TokenEnd,
 			TokenStop /* DEBUGGER! Ignore in Release mode. */,
 			// Loop
 			TokenFor, TokenTo, TokenNext,
 			TokenWhile, TokenWend,
 			TokenRepeat, TokenUntil, TokenForever,
 			TokenExit,
 			// Math
 			TokenAbs, TokenSign /*Sgn*/,
 			TokenCos, TokenSin, TokenTan,
 			TokenACos, TokenASin, TokenATan, TokenATan2,
 			TokenLog, TokenLog10,
 			TokenCeil, TokenFloor,
 			TokenMod,
 			TokenPi,
 			TokenExp, TokenSqr,
 			// Variables
 			TokenConst,
 			TokenGlobal,
 			TokenLocal,
 			// Including files.
 			TokenInclude,			
 		};
 		public:
 		Lexer(std::istream& fs);
 		~Lexer();
 		std::pair<Token, std::string> GetNextToken();
 		private:
 		std::istream& m_fileStream;
 		bool m_isTextMode = false;
 		bool m_isNumberMode = false;
 		bool m_isStringMode = false;
 		bool m_numberModeHasDecimal = false;
 		Token m_overrideToken = Token::TokenUnknown;
 		std::string m_overrideText = "";
 	};
 }