Latest stuff, rewriting lexer
This commit is contained in:
+234
-112
@@ -1,49 +1,170 @@
|
||||
/// AUTOGENERATED COPYRIGHT HEADER START
|
||||
// Copyright (C) 2017-2024 Michael Fabian 'Xaymar' Dirks <info@xaymar.com>
|
||||
// AUTOGENERATED COPYRIGHT HEADER END
|
||||
#include "lexer.hpp"
|
||||
#include <codecvt>
|
||||
#include <sstream>
|
||||
|
||||
std::pair<char, blitz::Lexer::Token> g_symbolCharacters[] = {
|
||||
/*std::pair<char, blitz::tokentype> g_symbolCharacters[] = {
|
||||
//{ '\"', BlitzLLVM::Lexer::Token::TokenDoubleQuote }, // Has special meaning.
|
||||
{ '+', blitz::Lexer::Token::TokenPlus },
|
||||
{ '-', blitz::Lexer::Token::TokenMinus },
|
||||
{ '/', blitz::Lexer::Token::TokenSlashForward },
|
||||
{ '\\', blitz::Lexer::Token::TokenSlashBackward },
|
||||
{ '*', blitz::Lexer::Token::TokenMultiply },
|
||||
{ '=', blitz::Lexer::Token::TokenEqual },
|
||||
{ '#', blitz::Lexer::Token::TokenOctothorp },
|
||||
{ '%', blitz::Lexer::Token::TokenPercent },
|
||||
{ '$', blitz::Lexer::Token::TokenDollar },
|
||||
{ '(', blitz::Lexer::Token::TokenRoundBracketOpen },
|
||||
{ ')', blitz::Lexer::Token::TokenRoundBracketClose },
|
||||
{ '[', blitz::Lexer::Token::TokenSquareBracketOpen },
|
||||
{ ']', blitz::Lexer::Token::TokenSquareBracketClose },
|
||||
{ '<', blitz::Lexer::Token::TokenAngleBracketOpen },
|
||||
{ '>', blitz::Lexer::Token::TokenAngleBracketClose },
|
||||
//{ '.', BlitzLLVM::Lexer::Token::TokenDot }, // Special meaning.
|
||||
{ ':', blitz::Lexer::Token::TokenColon },
|
||||
{ ',', blitz::Lexer::Token::TokenComma },
|
||||
//{ ';', BlitzLLVM::Lexer::Token::TokenSemicolon },
|
||||
{ '^', blitz::Lexer::Token::TokenCaret },
|
||||
{ '~', blitz::Lexer::Token::TokenBitNot },
|
||||
};
|
||||
{ '+', blitz::tokentype::TokenPlus },
|
||||
{ '-', blitz::tokentype::TokenMinus },
|
||||
{ '/', blitz:::tokentype::TokenSlashForward },
|
||||
{ '\\', blitz::tokentype::TokenSlashBackward },
|
||||
{ '*', blitz::tokentype::TokenMultiply },
|
||||
{ '=', blitz::tokentype::TokenEqual },
|
||||
{ '#', blitz::tokentype::TokenOctothorp },
|
||||
{ '%', blitz::tokentype::TokenPercent },
|
||||
{ '$', blitz::tokentype::TokenDollar },
|
||||
{ '(', blitz::tokentype::TokenRoundBracketOpen },
|
||||
{ ')', blitz::tokentype::TokenRoundBracketClose },
|
||||
{ '[', blitz::tokentype::TokenSquareBracketOpen },
|
||||
{ ']', blitz::tokentype::TokenSquareBracketClose },
|
||||
{ '<', blitz::tokentype::TokenAngleBracketOpen },
|
||||
{ '>', blitz::tokentype::TokenAngleBracketClose },
|
||||
//{ '.', BlitzLLVM::Token::TokenDot }, // Special meaning.
|
||||
{ ':', blitz::tokentype::TokenColon },
|
||||
{ ',', blitz::tokentype::TokenComma },
|
||||
//{ ';', BlitzLLVM::Token::TokenSemicolon },
|
||||
{ '^', blitz::tokentype::TokenCaret },
|
||||
{ '~', blitz::tokentype::TokenBitNot },
|
||||
};*/
|
||||
|
||||
blitz::Lexer::Lexer() {}
|
||||
blitz::lexer::~lexer() {}
|
||||
|
||||
blitz::Lexer::~Lexer() {}
|
||||
blitz::lexer::lexer(std::filesystem::path file)
|
||||
{
|
||||
// Usually files start at line and character 0, so we should start there too.
|
||||
_line = _character = 0;
|
||||
|
||||
std::pair<blitz::Lexer::Token, std::string> blitz::Lexer::GetCurrentToken() {
|
||||
return std::make_pair(m_currentToken, m_currentText);
|
||||
// Try and open the file for reading.
|
||||
_file = file;
|
||||
_stream = std::ifstream(_file, std::ios_base::binary); // We use binary so we can eventually support UTF-8.
|
||||
if (!_stream.good() || _stream.eof() || _stream.bad() || _stream.fail()) {
|
||||
char buffer[16384];
|
||||
int len = snprintf(buffer, sizeof(buffer), "Reading file '%s' failed.\0", file.generic_string().c_str());
|
||||
throw std::runtime_error(std::string(buffer, buffer + len));
|
||||
}
|
||||
|
||||
// Initialize token storage to a default token.
|
||||
_override = _current = blitz::token{
|
||||
.line = 0,
|
||||
.character = 0,
|
||||
.text = "",
|
||||
.type = token::variant::UNKNOWN,
|
||||
};
|
||||
}
|
||||
|
||||
std::pair<blitz::Lexer::Token, std::string> blitz::Lexer::GetNextToken(std::shared_ptr<std::istream> fs) {
|
||||
blitz::token blitz::lexer::current()
|
||||
{
|
||||
return _current;
|
||||
}
|
||||
|
||||
blitz::token blitz::lexer::next()
|
||||
{
|
||||
enum class stage {
|
||||
DEFAULT,
|
||||
TEXT,
|
||||
NUMBER,
|
||||
STRING,
|
||||
COMMENT,
|
||||
} state = stage::DEFAULT;
|
||||
bool numberHasDot = false;
|
||||
|
||||
std::stringstream buffer;
|
||||
blitz::token token{
|
||||
.line = _line,
|
||||
.character = _character,
|
||||
.text = "",
|
||||
.type = blitz::token::variant::UNKNOWN,
|
||||
};
|
||||
|
||||
// Helper function to advance text.
|
||||
auto advance = [this]() {
|
||||
_character++;
|
||||
return _stream.get();
|
||||
};
|
||||
|
||||
while ((token.type == blitz::token::variant::UNKNOWN) && _stream.good() && !_stream.eof()) {
|
||||
auto chr = advance();
|
||||
|
||||
if (state == stage::DEFAULT) {
|
||||
if (chr == ';') { // We've encountered a comment, so we should change state and ignore this symbol.
|
||||
state = stage::COMMENT;
|
||||
token.line = _line;
|
||||
token.character = _character;
|
||||
token.type = blitz::token::variant::COMMENT;
|
||||
} else {
|
||||
buffer << chr;
|
||||
}
|
||||
} else if (state == stage::NUMBER) {
|
||||
if (isdigit(chr)) {
|
||||
buffer << chr;
|
||||
} else if (chr == '.')
|
||||
{
|
||||
if (numberHasDot) {
|
||||
throw std::runtime_error("")
|
||||
}
|
||||
numberHasDot = true;
|
||||
} else {
|
||||
|
||||
}
|
||||
|
||||
} else if (state == stage::TEXT) {
|
||||
} else if (state == stage::STRING) {
|
||||
} else if (state == stage::COMMENT) {
|
||||
if (chr == '\r' && _stream.peek() == '\n') {
|
||||
token.text = buffer.str();
|
||||
} else {
|
||||
buffer << chr;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
_current = token;
|
||||
return _current;
|
||||
}
|
||||
|
||||
/*
|
||||
std::pair<blitz::tokentype, std::string> blitz::lexer::current() {
|
||||
return _current;
|
||||
}
|
||||
|
||||
std::pair<blitz::tokentype, std::string> blitz::lexer::next(std::istream& fs) {
|
||||
std::stringstream buffer;
|
||||
blitz::tokentype token;
|
||||
|
||||
enum class parserState {
|
||||
DEFAULT,
|
||||
TEXT,
|
||||
NUMBER,
|
||||
STRING,
|
||||
COMMENT,
|
||||
} state = parserState::DEFAULT;
|
||||
|
||||
while ((token == blitz::tokentype::TokenUnknown) && !fs.eof() && fs.good()) {
|
||||
auto chr = fs.get();
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
std::pair<blitz::lexer::token, std::string> blitz::lexer::next(std::shared_ptr<std::istream> fs) {
|
||||
std::string buf;
|
||||
Token tkn = Token::TokenEOF;
|
||||
token tkn = token::TokenUnknown;
|
||||
bool haveResult = false;
|
||||
|
||||
// Allow "overriding" the next retrieved Token.
|
||||
if (m_overrideToken != Token::TokenUnknown) {
|
||||
if (m_overrideToken != token::TokenUnknown) {
|
||||
buf = m_overrideText;
|
||||
tkn = m_overrideToken;
|
||||
m_overrideToken = Token::TokenUnknown;
|
||||
m_overrideToken = token::TokenUnknown;
|
||||
haveResult = true;
|
||||
}
|
||||
|
||||
@@ -56,11 +177,11 @@ std::pair<blitz::Lexer::Token, std::string> blitz::Lexer::GetNextToken(std::shar
|
||||
char chr = fs->get();
|
||||
|
||||
if (chr == '\r' || chr == '\n') {
|
||||
if (tkn != Token::TokenEOF) {
|
||||
m_overrideToken = Token::TokenNewLine;
|
||||
if (tkn != token::TokenEOF) {
|
||||
m_overrideToken = token::TokenNewLine;
|
||||
m_overrideText = "";
|
||||
} else {
|
||||
tkn = Token::TokenNewLine;
|
||||
tkn = token::TokenNewLine;
|
||||
buf = "";
|
||||
}
|
||||
|
||||
@@ -71,10 +192,10 @@ std::pair<blitz::Lexer::Token, std::string> blitz::Lexer::GetNextToken(std::shar
|
||||
break;
|
||||
} else if (m_isStringMode) {
|
||||
if (chr == '\"') {
|
||||
m_overrideToken = Token::TokenDoubleQuote;
|
||||
m_overrideToken = token::TokenDoubleQuote;
|
||||
m_overrideText = chr;
|
||||
m_isStringMode = false;
|
||||
tkn = Token::TokenQuotedText;
|
||||
tkn = token::TokenQuotedText;
|
||||
break;
|
||||
} else if (iscntrl(chr) || !isprint(chr)) {
|
||||
fs->putback(chr);
|
||||
@@ -97,7 +218,7 @@ std::pair<blitz::Lexer::Token, std::string> blitz::Lexer::GetNextToken(std::shar
|
||||
} else if (chr == '.') {
|
||||
if (m_numberModeHasDecimal == false) {
|
||||
m_numberModeHasDecimal = true;
|
||||
tkn = Token::TokenDecimal;
|
||||
tkn = token::TokenDecimal;
|
||||
buf += chr;
|
||||
} else {
|
||||
fs->putback(chr);
|
||||
@@ -111,7 +232,7 @@ std::pair<blitz::Lexer::Token, std::string> blitz::Lexer::GetNextToken(std::shar
|
||||
}
|
||||
} else if (m_isCommentMode) {
|
||||
buf += chr;
|
||||
tkn = Token::TokenComment;
|
||||
tkn = token::TokenComment;
|
||||
} else {
|
||||
// Whitespace
|
||||
if (isspace(chr))
|
||||
@@ -119,7 +240,7 @@ std::pair<blitz::Lexer::Token, std::string> blitz::Lexer::GetNextToken(std::shar
|
||||
|
||||
// Control Code
|
||||
if (iscntrl(chr)) {
|
||||
tkn = Token::TokenUnknown;
|
||||
tkn = token::TokenUnknown;
|
||||
buf = chr;
|
||||
}
|
||||
|
||||
@@ -129,13 +250,13 @@ std::pair<blitz::Lexer::Token, std::string> blitz::Lexer::GetNextToken(std::shar
|
||||
if (isdigit(chr2)) {
|
||||
m_isNumberMode = true;
|
||||
m_numberModeHasDecimal = false;
|
||||
tkn = Token::TokenNumber;
|
||||
tkn = token::TokenNumber;
|
||||
buf = chr + chr2;
|
||||
break;
|
||||
} else if (chr2 == '.') {
|
||||
m_isNumberMode = true;
|
||||
m_numberModeHasDecimal = true;
|
||||
tkn = Token::TokenDecimal;
|
||||
tkn = token::TokenDecimal;
|
||||
buf = chr + "0" + chr2;
|
||||
break;
|
||||
} else {
|
||||
@@ -151,7 +272,7 @@ std::pair<blitz::Lexer::Token, std::string> blitz::Lexer::GetNextToken(std::shar
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (tkn != Token::TokenEOF) {
|
||||
if (tkn != token::TokenEOF) {
|
||||
haveResult = true;
|
||||
break;
|
||||
}
|
||||
@@ -159,30 +280,30 @@ std::pair<blitz::Lexer::Token, std::string> blitz::Lexer::GetNextToken(std::shar
|
||||
// Strings, Text, Numbers
|
||||
if (chr == ';') {
|
||||
m_isCommentMode = true;
|
||||
tkn = Token::TokenSemicolon;
|
||||
tkn = token::TokenSemicolon;
|
||||
buf = chr;
|
||||
break;
|
||||
} else if (chr == '\"') {
|
||||
m_isStringMode = true;
|
||||
tkn = Token::TokenDoubleQuote;
|
||||
tkn = token::TokenDoubleQuote;
|
||||
buf = chr;
|
||||
break;
|
||||
} else if (isalpha(chr)) {
|
||||
m_isTextMode = true;
|
||||
tkn = Token::TokenText;
|
||||
tkn = token::TokenText;
|
||||
buf = chr;
|
||||
} else if (isdigit(chr)) {
|
||||
m_isNumberMode = true;
|
||||
m_numberModeHasDecimal = false;
|
||||
tkn = Token::TokenNumber;
|
||||
tkn = token::TokenNumber;
|
||||
buf = chr;
|
||||
} else if (chr == '.') {
|
||||
m_isNumberMode = true;
|
||||
m_numberModeHasDecimal = true;
|
||||
tkn = Token::TokenDecimal;
|
||||
tkn = token::TokenDecimal;
|
||||
buf = "0" + chr;
|
||||
} else {
|
||||
tkn = Token::TokenUnknown;
|
||||
tkn = token::TokenUnknown;
|
||||
buf = chr;
|
||||
break;
|
||||
}
|
||||
@@ -190,90 +311,91 @@ std::pair<blitz::Lexer::Token, std::string> blitz::Lexer::GetNextToken(std::shar
|
||||
}
|
||||
|
||||
// Convert from Text into native Token.
|
||||
if (tkn == Token::TokenText)
|
||||
tkn = ConvertTextToToken(tkn, buf);
|
||||
if (tkn == token::TokenText)
|
||||
tkn = to_token(tkn, buf);
|
||||
|
||||
return std::make_pair(tkn, buf);
|
||||
}
|
||||
|
||||
blitz::Lexer::Token blitz::Lexer::ConvertTextToToken(Token in, std::string text) {
|
||||
static std::pair<const char*, Token> l_textToTokenList[] = {
|
||||
blitz::lexer::token blitz::lexer::to_token(token in, std::string text) {
|
||||
static std::pair<const char*, token> l_textToTokenList[] = {
|
||||
// Binary
|
||||
{ "not", Token::TokenNot },
|
||||
{ "and", Token::TokenAnd },
|
||||
{ "or", Token::TokenOr },
|
||||
{ "xor", Token::TokenXor },
|
||||
{ "shl", Token::TokenShl },
|
||||
{ "shr", Token::TokenShr },
|
||||
{ "sal", Token::TokenSal },
|
||||
{ "sar", Token::TokenSar },
|
||||
{ "false", Token::TokenFalse },
|
||||
{ "true", Token::TokenTrue },
|
||||
{ "not", token::TokenNot },
|
||||
{ "and", token::TokenAnd },
|
||||
{ "or", token::TokenOr },
|
||||
{ "xor", token::TokenXor },
|
||||
{ "shl", token::TokenShl },
|
||||
{ "shr", token::TokenShr },
|
||||
{ "sal", token::TokenSal },
|
||||
{ "sar", token::TokenSar },
|
||||
{ "false", token::TokenFalse },
|
||||
{ "true", token::TokenTrue },
|
||||
|
||||
// Conversion
|
||||
{ "float", Token::TokenFloat },
|
||||
{ "string", Token::TokenString },
|
||||
{ "hex", Token::TokenHex },
|
||||
{ "int", Token::TokenInt },
|
||||
{ "float", token::TokenFloat },
|
||||
{ "string", token::TokenString },
|
||||
{ "hex", token::TokenHex },
|
||||
{ "int", token::TokenInt },
|
||||
|
||||
// Control
|
||||
{ "if", Token::TokenIf },
|
||||
{ "then", Token::TokenThen },
|
||||
{ "elseIf", Token::TokenElseIf },
|
||||
{ "else", Token::TokenElse },
|
||||
{ "endIf", Token::TokenEndIf },
|
||||
{ "select", Token::TokenSelect },
|
||||
{ "case", Token::TokenCase },
|
||||
{ "default", Token::TokenDefault },
|
||||
{ "goto", Token::TokenGoto },
|
||||
{ "gosub", Token::TokenGosub },
|
||||
{ "return", Token::TokenReturn },
|
||||
{ "function", Token::TokenFunction },
|
||||
{ "end", Token::TokenEnd },
|
||||
{ "stop", Token::TokenStop },
|
||||
|
||||
{ "if", token::TokenIf },
|
||||
{ "then", token::TokenThen },
|
||||
{ "elseif", token::TokenElseIf },
|
||||
{ "else", token::TokenElse },
|
||||
{ "endif", token::TokenEndIf },
|
||||
{ "select", token::TokenSelect },
|
||||
{ "case", token::TokenCase },
|
||||
{ "default", token::TokenDefault },
|
||||
{ "goto", token::TokenGoto },
|
||||
{ "gosub", token::TokenGosub },
|
||||
{ "return", token::TokenReturn },
|
||||
{ "function", token::TokenFunction },
|
||||
{ "end", token::TokenEnd },
|
||||
{ "stop", token::TokenStop },
|
||||
|
||||
// Loop
|
||||
{ "for", Token::TokenFor },
|
||||
{ "to", Token::TokenTo },
|
||||
{ "next", Token::TokenNext },
|
||||
{ "while", Token::TokenWhile },
|
||||
{ "wend", Token::TokenWend },
|
||||
{ "repeat", Token::TokenRepeat },
|
||||
{ "until", Token::TokenUntil },
|
||||
{ "forever", Token::TokenForever },
|
||||
{ "exit", Token::TokenExit },
|
||||
{ "for", token::TokenFor },
|
||||
{ "to", token::TokenTo },
|
||||
{ "next", token::TokenNext },
|
||||
{ "while", token::TokenWhile },
|
||||
{ "wend", token::TokenWend },
|
||||
{ "repeat", token::TokenRepeat },
|
||||
{ "until", token::TokenUntil },
|
||||
{ "forever", token::TokenForever },
|
||||
{ "exit", token::TokenExit },
|
||||
|
||||
// Math
|
||||
{ "abs", Token::TokenAbs },
|
||||
{ "sign", Token::TokenSign },
|
||||
{ "cos", Token::TokenCos },
|
||||
{ "sin", Token::TokenSin },
|
||||
{ "tan", Token::TokenTan },
|
||||
{ "acos", Token::TokenACos },
|
||||
{ "asin", Token::TokenASin },
|
||||
{ "atan", Token::TokenATan },
|
||||
{ "atan2", Token::TokenATan2 },
|
||||
{ "log", Token::TokenLog },
|
||||
{ "log10", Token::TokenLog10 },
|
||||
{ "ceil", Token::TokenCeil },
|
||||
{ "floor", Token::TokenFloor },
|
||||
{ "mod", Token::TokenMod },
|
||||
{ "pi", Token::TokenPi },
|
||||
{ "exp", Token::TokenExp },
|
||||
{ "sqr", Token::TokenSqr },
|
||||
{ "abs", token::TokenAbs },
|
||||
{ "sign", token::TokenSign },
|
||||
{ "cos", token::TokenCos },
|
||||
{ "sin", token::TokenSin },
|
||||
{ "tan", token::TokenTan },
|
||||
{ "acos", token::TokenACos },
|
||||
{ "asin", token::TokenASin },
|
||||
{ "atan", token::TokenATan },
|
||||
{ "atan2", token::TokenATan2 },
|
||||
{ "log", token::TokenLog },
|
||||
{ "log10", token::TokenLog10 },
|
||||
{ "ceil", token::TokenCeil },
|
||||
{ "floor", token::TokenFloor },
|
||||
{ "mod", token::TokenMod },
|
||||
{ "pi", token::TokenPi },
|
||||
{ "exp", token::TokenExp },
|
||||
{ "sqr", token::TokenSqr },
|
||||
|
||||
// Variables
|
||||
{ "const", Token::TokenConst },
|
||||
{ "global", Token::TokenGlobal },
|
||||
{ "local", Token::TokenLocal },
|
||||
{ "const", token::TokenConst },
|
||||
{ "global", token::TokenGlobal },
|
||||
{ "local", token::TokenLocal },
|
||||
|
||||
// Includes
|
||||
{ "include", Token::TokenInclude },
|
||||
{ "include", token::TokenInclude },
|
||||
};
|
||||
for (auto v : l_textToTokenList) {
|
||||
if (boost::iequals(text, v.first)) {
|
||||
if (stricmp(text.c_str(), v.first)) {
|
||||
return v.second;
|
||||
}
|
||||
}
|
||||
return in;
|
||||
}
|
||||
*/
|
||||
|
||||
Reference in New Issue
Block a user