/// AUTOGENERATED COPYRIGHT HEADER START // Copyright (C) 2017-2024 Michael Fabian 'Xaymar' Dirks // AUTOGENERATED COPYRIGHT HEADER END #include "lexer.hpp" #include #include /*std::pair g_symbolCharacters[] = { //{ '\"', BlitzLLVM::Lexer::Token::TokenDoubleQuote }, // Has special meaning. { '+', blitz::tokentype::TokenPlus }, { '-', blitz::tokentype::TokenMinus }, { '/', blitz:::tokentype::TokenSlashForward }, { '\\', blitz::tokentype::TokenSlashBackward }, { '*', blitz::tokentype::TokenMultiply }, { '=', blitz::tokentype::TokenEqual }, { '#', blitz::tokentype::TokenOctothorp }, { '%', blitz::tokentype::TokenPercent }, { '$', blitz::tokentype::TokenDollar }, { '(', blitz::tokentype::TokenRoundBracketOpen }, { ')', blitz::tokentype::TokenRoundBracketClose }, { '[', blitz::tokentype::TokenSquareBracketOpen }, { ']', blitz::tokentype::TokenSquareBracketClose }, { '<', blitz::tokentype::TokenAngleBracketOpen }, { '>', blitz::tokentype::TokenAngleBracketClose }, //{ '.', BlitzLLVM::Token::TokenDot }, // Special meaning. { ':', blitz::tokentype::TokenColon }, { ',', blitz::tokentype::TokenComma }, //{ ';', BlitzLLVM::Token::TokenSemicolon }, { '^', blitz::tokentype::TokenCaret }, { '~', blitz::tokentype::TokenBitNot }, };*/ blitz::lexer::~lexer() {} blitz::lexer::lexer(std::filesystem::path file) { // Usually files start at line and character 0, so we should start there too. _line = _character = 0; // Try and open the file for reading. _file = file; _stream = std::ifstream(_file, std::ios_base::binary); // We use binary so we can eventually support UTF-8. if (!_stream.good() || _stream.eof() || _stream.bad() || _stream.fail()) { char buffer[16384]; int len = snprintf(buffer, sizeof(buffer), "Reading file '%s' failed.\0", file.generic_string().c_str()); throw std::runtime_error(std::string(buffer, buffer + len)); } // Initialize token storage to a default token. _override = _current = blitz::token{ .line = 0, .character = 0, .text = "", .type = token::variant::UNKNOWN, }; } blitz::token blitz::lexer::current() { return _current; } blitz::token blitz::lexer::next() { enum class stage { DEFAULT, TEXT, NUMBER, STRING, COMMENT, } state = stage::DEFAULT; bool numberHasDot = false; std::stringstream buffer; blitz::token token{ .line = _line, .character = _character, .text = "", .type = blitz::token::variant::UNKNOWN, }; // Helper function to advance text. auto advance = [this]() { _character++; return _stream.get(); }; while ((token.type == blitz::token::variant::UNKNOWN) && _stream.good() && !_stream.eof()) { auto chr = advance(); if (state == stage::DEFAULT) { if (chr == ';') { // We've encountered a comment, so we should change state and ignore this symbol. state = stage::COMMENT; token.line = _line; token.character = _character; token.type = blitz::token::variant::COMMENT; } else { buffer << chr; } } else if (state == stage::NUMBER) { if (isdigit(chr)) { buffer << chr; } else if (chr == '.') { if (numberHasDot) { throw std::runtime_error("") } numberHasDot = true; } else { } } else if (state == stage::TEXT) { } else if (state == stage::STRING) { } else if (state == stage::COMMENT) { if (chr == '\r' && _stream.peek() == '\n') { token.text = buffer.str(); } else { buffer << chr; } } } _current = token; return _current; } /* std::pair blitz::lexer::current() { return _current; } std::pair blitz::lexer::next(std::istream& fs) { std::stringstream buffer; blitz::tokentype token; enum class parserState { DEFAULT, TEXT, NUMBER, STRING, COMMENT, } state = parserState::DEFAULT; while ((token == blitz::tokentype::TokenUnknown) && !fs.eof() && fs.good()) { auto chr = fs.get(); } } /* std::pair blitz::lexer::next(std::shared_ptr fs) { std::string buf; token tkn = token::TokenUnknown; bool haveResult = false; // Allow "overriding" the next retrieved Token. if (m_overrideToken != token::TokenUnknown) { buf = m_overrideText; tkn = m_overrideToken; m_overrideToken = token::TokenUnknown; haveResult = true; } bool m_isTextMode = false; bool m_isNumberMode = false; bool m_isStringMode = false; bool m_isCommentMode = false; bool m_numberModeHasDecimal = false; while (((fs->eof() == false) && (fs->good())) && !haveResult) { char chr = fs->get(); if (chr == '\r' || chr == '\n') { if (tkn != token::TokenEOF) { m_overrideToken = token::TokenNewLine; m_overrideText = ""; } else { tkn = token::TokenNewLine; buf = ""; } m_isStringMode = false; m_isNumberMode = false; m_isTextMode = false; m_isCommentMode = false; break; } else if (m_isStringMode) { if (chr == '\"') { m_overrideToken = token::TokenDoubleQuote; m_overrideText = chr; m_isStringMode = false; tkn = token::TokenQuotedText; break; } else if (iscntrl(chr) || !isprint(chr)) { fs->putback(chr); m_isStringMode = false; break; } else { buf += chr; } } else if (m_isTextMode) { if (isalnum(chr) || (chr == '_')) { buf += chr; } else { fs->putback(chr); m_isTextMode = false; break; } } else if (m_isNumberMode) { if (isdigit(chr)) { buf += chr; } else if (chr == '.') { if (m_numberModeHasDecimal == false) { m_numberModeHasDecimal = true; tkn = token::TokenDecimal; buf += chr; } else { fs->putback(chr); m_isNumberMode = false; break; } } else { fs->putback(chr); m_isNumberMode = false; break; } } else if (m_isCommentMode) { buf += chr; tkn = token::TokenComment; } else { // Whitespace if (isspace(chr)) continue; // Control Code if (iscntrl(chr)) { tkn = token::TokenUnknown; buf = chr; } // Special handling for + and -, due to numbers and decimals. if (chr == '+' || chr == '-') { char chr2 = fs->get(); if (isdigit(chr2)) { m_isNumberMode = true; m_numberModeHasDecimal = false; tkn = token::TokenNumber; buf = chr + chr2; break; } else if (chr2 == '.') { m_isNumberMode = true; m_numberModeHasDecimal = true; tkn = token::TokenDecimal; buf = chr + "0" + chr2; break; } else { fs->putback(chr2); } } // Symbol for (auto v : g_symbolCharacters) { if (v.first == chr) { tkn = v.second; buf = v.first; break; } } if (tkn != token::TokenEOF) { haveResult = true; break; } // Strings, Text, Numbers if (chr == ';') { m_isCommentMode = true; tkn = token::TokenSemicolon; buf = chr; break; } else if (chr == '\"') { m_isStringMode = true; tkn = token::TokenDoubleQuote; buf = chr; break; } else if (isalpha(chr)) { m_isTextMode = true; tkn = token::TokenText; buf = chr; } else if (isdigit(chr)) { m_isNumberMode = true; m_numberModeHasDecimal = false; tkn = token::TokenNumber; buf = chr; } else if (chr == '.') { m_isNumberMode = true; m_numberModeHasDecimal = true; tkn = token::TokenDecimal; buf = "0" + chr; } else { tkn = token::TokenUnknown; buf = chr; break; } } } // Convert from Text into native Token. if (tkn == token::TokenText) tkn = to_token(tkn, buf); return std::make_pair(tkn, buf); } blitz::lexer::token blitz::lexer::to_token(token in, std::string text) { static std::pair l_textToTokenList[] = { // Binary { "not", token::TokenNot }, { "and", token::TokenAnd }, { "or", token::TokenOr }, { "xor", token::TokenXor }, { "shl", token::TokenShl }, { "shr", token::TokenShr }, { "sal", token::TokenSal }, { "sar", token::TokenSar }, { "false", token::TokenFalse }, { "true", token::TokenTrue }, // Conversion { "float", token::TokenFloat }, { "string", token::TokenString }, { "hex", token::TokenHex }, { "int", token::TokenInt }, // Control { "if", token::TokenIf }, { "then", token::TokenThen }, { "elseif", token::TokenElseIf }, { "else", token::TokenElse }, { "endif", token::TokenEndIf }, { "select", token::TokenSelect }, { "case", token::TokenCase }, { "default", token::TokenDefault }, { "goto", token::TokenGoto }, { "gosub", token::TokenGosub }, { "return", token::TokenReturn }, { "function", token::TokenFunction }, { "end", token::TokenEnd }, { "stop", token::TokenStop }, // Loop { "for", token::TokenFor }, { "to", token::TokenTo }, { "next", token::TokenNext }, { "while", token::TokenWhile }, { "wend", token::TokenWend }, { "repeat", token::TokenRepeat }, { "until", token::TokenUntil }, { "forever", token::TokenForever }, { "exit", token::TokenExit }, // Math { "abs", token::TokenAbs }, { "sign", token::TokenSign }, { "cos", token::TokenCos }, { "sin", token::TokenSin }, { "tan", token::TokenTan }, { "acos", token::TokenACos }, { "asin", token::TokenASin }, { "atan", token::TokenATan }, { "atan2", token::TokenATan2 }, { "log", token::TokenLog }, { "log10", token::TokenLog10 }, { "ceil", token::TokenCeil }, { "floor", token::TokenFloor }, { "mod", token::TokenMod }, { "pi", token::TokenPi }, { "exp", token::TokenExp }, { "sqr", token::TokenSqr }, // Variables { "const", token::TokenConst }, { "global", token::TokenGlobal }, { "local", token::TokenLocal }, // Includes { "include", token::TokenInclude }, }; for (auto v : l_textToTokenList) { if (stricmp(text.c_str(), v.first)) { return v.second; } } return in; } */