Files
BlitzLLVM/code_compiler/source/lexer.cpp
T
Michael Fabian 'Xaymar' Dirks 4e2dd8e30b Update to modern standard i guess
2024-06-06 13:37:13 +02:00

297 lines
8.0 KiB
C++

// Code Compiler for BlitzLLVM
// Copyright(C) 2017 Michael Fabian Dirks
//
// This program is free software : you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program.If not, see <https://www.gnu.org/licenses/>.
#include "lexer.hpp"
#include <codecvt>
#include <boost/algorithm/string/predicate.hpp>
std::pair<char, BlitzLLVM::Lexer::Token> g_symbolCharacters[] = {
//{ '\"', BlitzLLVM::Lexer::Token::TokenDoubleQuote }, // Has special meaning.
{ '+', BlitzLLVM::Lexer::Token::TokenPlus },
{ '-', BlitzLLVM::Lexer::Token::TokenMinus },
{ '/', BlitzLLVM::Lexer::Token::TokenSlashForward },
{ '\\', BlitzLLVM::Lexer::Token::TokenSlashBackward },
{ '*', BlitzLLVM::Lexer::Token::TokenMultiply },
{ '=', BlitzLLVM::Lexer::Token::TokenEqual },
{ '#', BlitzLLVM::Lexer::Token::TokenOctothorp },
{ '%', BlitzLLVM::Lexer::Token::TokenPercent },
{ '$', BlitzLLVM::Lexer::Token::TokenDollar },
{ '(', BlitzLLVM::Lexer::Token::TokenRoundBracketOpen },
{ ')', BlitzLLVM::Lexer::Token::TokenRoundBracketClose },
{ '[', BlitzLLVM::Lexer::Token::TokenSquareBracketOpen },
{ ']', BlitzLLVM::Lexer::Token::TokenSquareBracketClose },
{ '<', BlitzLLVM::Lexer::Token::TokenAngleBracketOpen },
{ '>', BlitzLLVM::Lexer::Token::TokenAngleBracketClose },
//{ '.', BlitzLLVM::Lexer::Token::TokenDot }, // Special meaning.
{ ':', BlitzLLVM::Lexer::Token::TokenColon },
{ ',', BlitzLLVM::Lexer::Token::TokenComma },
//{ ';', BlitzLLVM::Lexer::Token::TokenSemicolon },
{ '^', BlitzLLVM::Lexer::Token::TokenCaret },
{ '~', BlitzLLVM::Lexer::Token::TokenBitNot },
};
BlitzLLVM::Lexer::Lexer() {}
BlitzLLVM::Lexer::~Lexer() {}
std::pair<BlitzLLVM::Lexer::Token, std::string> BlitzLLVM::Lexer::GetCurrentToken() {
return std::make_pair(m_currentToken, m_currentText);
}
std::pair<BlitzLLVM::Lexer::Token, std::string> BlitzLLVM::Lexer::GetNextToken(std::shared_ptr<std::istream> fs) {
std::string buf;
Token tkn = Token::TokenEOF;
bool haveResult = false;
// Allow "overriding" the next retrieved Token.
if (m_overrideToken != Token::TokenUnknown) {
buf = m_overrideText;
tkn = m_overrideToken;
m_overrideToken = Token::TokenUnknown;
haveResult = true;
}
bool m_isTextMode = false;
bool m_isNumberMode = false;
bool m_isStringMode = false;
bool m_isCommentMode = false;
bool m_numberModeHasDecimal = false;
while (((fs->eof() == false) && (fs->good())) && !haveResult) {
char chr = fs->get();
if (chr == '\r' || chr == '\n') {
if (tkn != Token::TokenEOF) {
m_overrideToken = Token::TokenNewLine;
m_overrideText = "";
} else {
tkn = Token::TokenNewLine;
buf = "";
}
m_isStringMode = false;
m_isNumberMode = false;
m_isTextMode = false;
m_isCommentMode = false;
break;
} else if (m_isStringMode) {
if (chr == '\"') {
m_overrideToken = Token::TokenDoubleQuote;
m_overrideText = chr;
m_isStringMode = false;
tkn = Token::TokenQuotedText;
break;
} else if (iscntrl(chr) || !isprint(chr)) {
fs->putback(chr);
m_isStringMode = false;
break;
} else {
buf += chr;
}
} else if (m_isTextMode) {
if (isalnum(chr) || (chr == '_')) {
buf += chr;
} else {
fs->putback(chr);
m_isTextMode = false;
break;
}
} else if (m_isNumberMode) {
if (isdigit(chr)) {
buf += chr;
} else if (chr == '.') {
if (m_numberModeHasDecimal == false) {
m_numberModeHasDecimal = true;
tkn = Token::TokenDecimal;
buf += chr;
} else {
fs->putback(chr);
m_isNumberMode = false;
break;
}
} else {
fs->putback(chr);
m_isNumberMode = false;
break;
}
} else if (m_isCommentMode) {
buf += chr;
tkn = Token::TokenComment;
} else {
// Whitespace
if (isspace(chr))
continue;
// Control Code
if (iscntrl(chr)) {
tkn = Token::TokenUnknown;
buf = chr;
}
// Special handling for + and -, due to numbers and decimals.
if (chr == '+' || chr == '-') {
char chr2 = fs->get();
if (isdigit(chr2)) {
m_isNumberMode = true;
m_numberModeHasDecimal = false;
tkn = Token::TokenNumber;
buf = chr + chr2;
break;
} else if (chr2 == '.') {
m_isNumberMode = true;
m_numberModeHasDecimal = true;
tkn = Token::TokenDecimal;
buf = chr + "0" + chr2;
break;
} else {
fs->putback(chr2);
}
}
// Symbol
for (auto v : g_symbolCharacters) {
if (v.first == chr) {
tkn = v.second;
buf = v.first;
break;
}
}
if (tkn != Token::TokenEOF) {
haveResult = true;
break;
}
// Strings, Text, Numbers
if (chr == ';') {
m_isCommentMode = true;
tkn = Token::TokenSemicolon;
buf = chr;
break;
} else if (chr == '\"') {
m_isStringMode = true;
tkn = Token::TokenDoubleQuote;
buf = chr;
break;
} else if (isalpha(chr)) {
m_isTextMode = true;
tkn = Token::TokenText;
buf = chr;
} else if (isdigit(chr)) {
m_isNumberMode = true;
m_numberModeHasDecimal = false;
tkn = Token::TokenNumber;
buf = chr;
} else if (chr == '.') {
m_isNumberMode = true;
m_numberModeHasDecimal = true;
tkn = Token::TokenDecimal;
buf = "0" + chr;
} else {
tkn = Token::TokenUnknown;
buf = chr;
break;
}
}
}
// Convert from Text into native Token.
if (tkn == Token::TokenText)
tkn = ConvertTextToToken(tkn, buf);
return std::make_pair(tkn, buf);
}
BlitzLLVM::Lexer::Token BlitzLLVM::Lexer::ConvertTextToToken(Token in, std::string text) {
static std::pair<const char*, Token> l_textToTokenList[] = {
// Binary
{ "not", Token::TokenNot },
{ "and", Token::TokenAnd },
{ "or", Token::TokenOr },
{ "xor", Token::TokenXor },
{ "shl", Token::TokenShl },
{ "shr", Token::TokenShr },
{ "sal", Token::TokenSal },
{ "sar", Token::TokenSar },
{ "false", Token::TokenFalse },
{ "true", Token::TokenTrue },
// Conversion
{ "float", Token::TokenFloat },
{ "string", Token::TokenString },
{ "hex", Token::TokenHex },
{ "int", Token::TokenInt },
// Control
{ "if", Token::TokenIf },
{ "then", Token::TokenThen },
{ "elseIf", Token::TokenElseIf },
{ "else", Token::TokenElse },
{ "endIf", Token::TokenEndIf },
{ "select", Token::TokenSelect },
{ "case", Token::TokenCase },
{ "default", Token::TokenDefault },
{ "goto", Token::TokenGoto },
{ "gosub", Token::TokenGosub },
{ "return", Token::TokenReturn },
{ "function", Token::TokenFunction },
{ "end", Token::TokenEnd },
{ "stop", Token::TokenStop },
// Loop
{ "for", Token::TokenFor },
{ "to", Token::TokenTo },
{ "next", Token::TokenNext },
{ "while", Token::TokenWhile },
{ "wend", Token::TokenWend },
{ "repeat", Token::TokenRepeat },
{ "until", Token::TokenUntil },
{ "forever", Token::TokenForever },
{ "exit", Token::TokenExit },
// Math
{ "abs", Token::TokenAbs },
{ "sign", Token::TokenSign },
{ "cos", Token::TokenCos },
{ "sin", Token::TokenSin },
{ "tan", Token::TokenTan },
{ "acos", Token::TokenACos },
{ "asin", Token::TokenASin },
{ "atan", Token::TokenATan },
{ "atan2", Token::TokenATan2 },
{ "log", Token::TokenLog },
{ "log10", Token::TokenLog10 },
{ "ceil", Token::TokenCeil },
{ "floor", Token::TokenFloor },
{ "mod", Token::TokenMod },
{ "pi", Token::TokenPi },
{ "exp", Token::TokenExp },
{ "sqr", Token::TokenSqr },
// Variables
{ "const", Token::TokenConst },
{ "global", Token::TokenGlobal },
{ "local", Token::TokenLocal },
// Includes
{ "include", Token::TokenInclude },
};
for (auto v : l_textToTokenList) {
if (boost::iequals(text, v.first)) {
return v.second;
}
}
return in;
}