297 lines
8.0 KiB
C++
297 lines
8.0 KiB
C++
// Code Compiler for BlitzLLVM
|
|
// Copyright(C) 2017 Michael Fabian Dirks
|
|
//
|
|
// This program is free software : you can redistribute it and/or modify
|
|
// it under the terms of the GNU General Public License as published by
|
|
// the Free Software Foundation, either version 3 of the License, or
|
|
// (at your option) any later version.
|
|
//
|
|
// This program is distributed in the hope that it will be useful,
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.See the
|
|
// GNU General Public License for more details.
|
|
//
|
|
// You should have received a copy of the GNU General Public License
|
|
// along with this program.If not, see <https://www.gnu.org/licenses/>.
|
|
|
|
#include "lexer.hpp"
|
|
#include <codecvt>
|
|
#include <boost/algorithm/string/predicate.hpp>
|
|
|
|
std::pair<char, BlitzLLVM::Lexer::Token> g_symbolCharacters[] = {
|
|
//{ '\"', BlitzLLVM::Lexer::Token::TokenDoubleQuote }, // Has special meaning.
|
|
{ '+', BlitzLLVM::Lexer::Token::TokenPlus },
|
|
{ '-', BlitzLLVM::Lexer::Token::TokenMinus },
|
|
{ '/', BlitzLLVM::Lexer::Token::TokenSlashForward },
|
|
{ '\\', BlitzLLVM::Lexer::Token::TokenSlashBackward },
|
|
{ '*', BlitzLLVM::Lexer::Token::TokenMultiply },
|
|
{ '=', BlitzLLVM::Lexer::Token::TokenEqual },
|
|
{ '#', BlitzLLVM::Lexer::Token::TokenOctothorp },
|
|
{ '%', BlitzLLVM::Lexer::Token::TokenPercent },
|
|
{ '$', BlitzLLVM::Lexer::Token::TokenDollar },
|
|
{ '(', BlitzLLVM::Lexer::Token::TokenRoundBracketOpen },
|
|
{ ')', BlitzLLVM::Lexer::Token::TokenRoundBracketClose },
|
|
{ '[', BlitzLLVM::Lexer::Token::TokenSquareBracketOpen },
|
|
{ ']', BlitzLLVM::Lexer::Token::TokenSquareBracketClose },
|
|
{ '<', BlitzLLVM::Lexer::Token::TokenAngleBracketOpen },
|
|
{ '>', BlitzLLVM::Lexer::Token::TokenAngleBracketClose },
|
|
//{ '.', BlitzLLVM::Lexer::Token::TokenDot }, // Special meaning.
|
|
{ ':', BlitzLLVM::Lexer::Token::TokenColon },
|
|
{ ',', BlitzLLVM::Lexer::Token::TokenComma },
|
|
//{ ';', BlitzLLVM::Lexer::Token::TokenSemicolon },
|
|
{ '^', BlitzLLVM::Lexer::Token::TokenCaret },
|
|
{ '~', BlitzLLVM::Lexer::Token::TokenBitNot },
|
|
};
|
|
|
|
BlitzLLVM::Lexer::Lexer() {}
|
|
|
|
BlitzLLVM::Lexer::~Lexer() {}
|
|
|
|
std::pair<BlitzLLVM::Lexer::Token, std::string> BlitzLLVM::Lexer::GetCurrentToken() {
|
|
return std::make_pair(m_currentToken, m_currentText);
|
|
}
|
|
|
|
std::pair<BlitzLLVM::Lexer::Token, std::string> BlitzLLVM::Lexer::GetNextToken(std::shared_ptr<std::istream> fs) {
|
|
std::string buf;
|
|
Token tkn = Token::TokenEOF;
|
|
bool haveResult = false;
|
|
|
|
// Allow "overriding" the next retrieved Token.
|
|
if (m_overrideToken != Token::TokenUnknown) {
|
|
buf = m_overrideText;
|
|
tkn = m_overrideToken;
|
|
m_overrideToken = Token::TokenUnknown;
|
|
haveResult = true;
|
|
}
|
|
|
|
bool m_isTextMode = false;
|
|
bool m_isNumberMode = false;
|
|
bool m_isStringMode = false;
|
|
bool m_isCommentMode = false;
|
|
bool m_numberModeHasDecimal = false;
|
|
while (((fs->eof() == false) && (fs->good())) && !haveResult) {
|
|
char chr = fs->get();
|
|
|
|
if (chr == '\r' || chr == '\n') {
|
|
if (tkn != Token::TokenEOF) {
|
|
m_overrideToken = Token::TokenNewLine;
|
|
m_overrideText = "";
|
|
} else {
|
|
tkn = Token::TokenNewLine;
|
|
buf = "";
|
|
}
|
|
|
|
m_isStringMode = false;
|
|
m_isNumberMode = false;
|
|
m_isTextMode = false;
|
|
m_isCommentMode = false;
|
|
break;
|
|
} else if (m_isStringMode) {
|
|
if (chr == '\"') {
|
|
m_overrideToken = Token::TokenDoubleQuote;
|
|
m_overrideText = chr;
|
|
m_isStringMode = false;
|
|
tkn = Token::TokenQuotedText;
|
|
break;
|
|
} else if (iscntrl(chr) || !isprint(chr)) {
|
|
fs->putback(chr);
|
|
m_isStringMode = false;
|
|
break;
|
|
} else {
|
|
buf += chr;
|
|
}
|
|
} else if (m_isTextMode) {
|
|
if (isalnum(chr) || (chr == '_')) {
|
|
buf += chr;
|
|
} else {
|
|
fs->putback(chr);
|
|
m_isTextMode = false;
|
|
break;
|
|
}
|
|
} else if (m_isNumberMode) {
|
|
if (isdigit(chr)) {
|
|
buf += chr;
|
|
} else if (chr == '.') {
|
|
if (m_numberModeHasDecimal == false) {
|
|
m_numberModeHasDecimal = true;
|
|
tkn = Token::TokenDecimal;
|
|
buf += chr;
|
|
} else {
|
|
fs->putback(chr);
|
|
m_isNumberMode = false;
|
|
break;
|
|
}
|
|
} else {
|
|
fs->putback(chr);
|
|
m_isNumberMode = false;
|
|
break;
|
|
}
|
|
} else if (m_isCommentMode) {
|
|
buf += chr;
|
|
tkn = Token::TokenComment;
|
|
} else {
|
|
// Whitespace
|
|
if (isspace(chr))
|
|
continue;
|
|
|
|
// Control Code
|
|
if (iscntrl(chr)) {
|
|
tkn = Token::TokenUnknown;
|
|
buf = chr;
|
|
}
|
|
|
|
// Special handling for + and -, due to numbers and decimals.
|
|
if (chr == '+' || chr == '-') {
|
|
char chr2 = fs->get();
|
|
if (isdigit(chr2)) {
|
|
m_isNumberMode = true;
|
|
m_numberModeHasDecimal = false;
|
|
tkn = Token::TokenNumber;
|
|
buf = chr + chr2;
|
|
break;
|
|
} else if (chr2 == '.') {
|
|
m_isNumberMode = true;
|
|
m_numberModeHasDecimal = true;
|
|
tkn = Token::TokenDecimal;
|
|
buf = chr + "0" + chr2;
|
|
break;
|
|
} else {
|
|
fs->putback(chr2);
|
|
}
|
|
}
|
|
|
|
// Symbol
|
|
for (auto v : g_symbolCharacters) {
|
|
if (v.first == chr) {
|
|
tkn = v.second;
|
|
buf = v.first;
|
|
break;
|
|
}
|
|
}
|
|
if (tkn != Token::TokenEOF) {
|
|
haveResult = true;
|
|
break;
|
|
}
|
|
|
|
// Strings, Text, Numbers
|
|
if (chr == ';') {
|
|
m_isCommentMode = true;
|
|
tkn = Token::TokenSemicolon;
|
|
buf = chr;
|
|
break;
|
|
} else if (chr == '\"') {
|
|
m_isStringMode = true;
|
|
tkn = Token::TokenDoubleQuote;
|
|
buf = chr;
|
|
break;
|
|
} else if (isalpha(chr)) {
|
|
m_isTextMode = true;
|
|
tkn = Token::TokenText;
|
|
buf = chr;
|
|
} else if (isdigit(chr)) {
|
|
m_isNumberMode = true;
|
|
m_numberModeHasDecimal = false;
|
|
tkn = Token::TokenNumber;
|
|
buf = chr;
|
|
} else if (chr == '.') {
|
|
m_isNumberMode = true;
|
|
m_numberModeHasDecimal = true;
|
|
tkn = Token::TokenDecimal;
|
|
buf = "0" + chr;
|
|
} else {
|
|
tkn = Token::TokenUnknown;
|
|
buf = chr;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Convert from Text into native Token.
|
|
if (tkn == Token::TokenText)
|
|
tkn = ConvertTextToToken(tkn, buf);
|
|
|
|
return std::make_pair(tkn, buf);
|
|
}
|
|
|
|
BlitzLLVM::Lexer::Token BlitzLLVM::Lexer::ConvertTextToToken(Token in, std::string text) {
|
|
static std::pair<const char*, Token> l_textToTokenList[] = {
|
|
// Binary
|
|
{ "not", Token::TokenNot },
|
|
{ "and", Token::TokenAnd },
|
|
{ "or", Token::TokenOr },
|
|
{ "xor", Token::TokenXor },
|
|
{ "shl", Token::TokenShl },
|
|
{ "shr", Token::TokenShr },
|
|
{ "sal", Token::TokenSal },
|
|
{ "sar", Token::TokenSar },
|
|
{ "false", Token::TokenFalse },
|
|
{ "true", Token::TokenTrue },
|
|
|
|
// Conversion
|
|
{ "float", Token::TokenFloat },
|
|
{ "string", Token::TokenString },
|
|
{ "hex", Token::TokenHex },
|
|
{ "int", Token::TokenInt },
|
|
|
|
// Control
|
|
{ "if", Token::TokenIf },
|
|
{ "then", Token::TokenThen },
|
|
{ "elseIf", Token::TokenElseIf },
|
|
{ "else", Token::TokenElse },
|
|
{ "endIf", Token::TokenEndIf },
|
|
{ "select", Token::TokenSelect },
|
|
{ "case", Token::TokenCase },
|
|
{ "default", Token::TokenDefault },
|
|
{ "goto", Token::TokenGoto },
|
|
{ "gosub", Token::TokenGosub },
|
|
{ "return", Token::TokenReturn },
|
|
{ "function", Token::TokenFunction },
|
|
{ "end", Token::TokenEnd },
|
|
{ "stop", Token::TokenStop },
|
|
|
|
// Loop
|
|
{ "for", Token::TokenFor },
|
|
{ "to", Token::TokenTo },
|
|
{ "next", Token::TokenNext },
|
|
{ "while", Token::TokenWhile },
|
|
{ "wend", Token::TokenWend },
|
|
{ "repeat", Token::TokenRepeat },
|
|
{ "until", Token::TokenUntil },
|
|
{ "forever", Token::TokenForever },
|
|
{ "exit", Token::TokenExit },
|
|
|
|
// Math
|
|
{ "abs", Token::TokenAbs },
|
|
{ "sign", Token::TokenSign },
|
|
{ "cos", Token::TokenCos },
|
|
{ "sin", Token::TokenSin },
|
|
{ "tan", Token::TokenTan },
|
|
{ "acos", Token::TokenACos },
|
|
{ "asin", Token::TokenASin },
|
|
{ "atan", Token::TokenATan },
|
|
{ "atan2", Token::TokenATan2 },
|
|
{ "log", Token::TokenLog },
|
|
{ "log10", Token::TokenLog10 },
|
|
{ "ceil", Token::TokenCeil },
|
|
{ "floor", Token::TokenFloor },
|
|
{ "mod", Token::TokenMod },
|
|
{ "pi", Token::TokenPi },
|
|
{ "exp", Token::TokenExp },
|
|
{ "sqr", Token::TokenSqr },
|
|
|
|
// Variables
|
|
{ "const", Token::TokenConst },
|
|
{ "global", Token::TokenGlobal },
|
|
{ "local", Token::TokenLocal },
|
|
|
|
// Includes
|
|
{ "include", Token::TokenInclude },
|
|
};
|
|
for (auto v : l_textToTokenList) {
|
|
if (boost::iequals(text, v.first)) {
|
|
return v.second;
|
|
}
|
|
}
|
|
return in;
|
|
}
|