From 15b8ed7690ccf7c521a63942f68d4e74027d2d73 Mon Sep 17 00:00:00 2001 From: Michael Fabian 'Xaymar' Dirks Date: Wed, 12 Feb 2025 00:03:19 +0100 Subject: [PATCH] Even more work --- code_compiler/source/ast/ast.cpp | 168 +++++++++++++++++++++++++++---- code_compiler/source/ast/ast.hpp | 14 ++- code_compiler/source/lexer.cpp | 17 +++- code_compiler/source/lexer.hpp | 2 +- code_compiler/source/main.cpp | 42 +++----- code_compiler/source/parser.cpp | 2 - code_compiler/source/util.cpp | 12 ++- code_compiler/source/util.hpp | 2 + tests/test.bb | 46 +++++++++ 9 files changed, 250 insertions(+), 55 deletions(-) diff --git a/code_compiler/source/ast/ast.cpp b/code_compiler/source/ast/ast.cpp index 5cb45e9..81854c6 100644 --- a/code_compiler/source/ast/ast.cpp +++ b/code_compiler/source/ast/ast.cpp @@ -2,9 +2,7 @@ // Copyright (C) 2017-2025 Michael Fabian 'Xaymar' Dirks // AUTOGENERATED COPYRIGHT HEADER END #include "ast.hpp" -#include -#include -#include +#include "util.hpp" blitz::ast::variable::~variable() { @@ -91,6 +89,9 @@ std::shared_ptr blitz::ast::variable::try_parse(std::shared_pt auto symbol_tk = lexer->peek(); if (symbol_tk.type != blitz::token::variant::SYMBOL) { return node; + } else if (symbol_tk.location.second != (name_tk.location.second + name_tk.text.length())) { + // We only care about these if they're immediately attached to us. + return node; } if (symbol_tk.text == ":") { // :Type @@ -99,6 +100,9 @@ std::shared_ptr blitz::ast::variable::try_parse(std::shared_pt if (type_tk != blitz::token::variant::TEXT) { throw blitz::error(file, name_tk.location, type_tk.location, blitz::format("Unexpected %s, expected text.", type_tk.to_string().c_str())); } + if (type_tk.location.second != (symbol_tk.location.second + 1)) { + throw blitz::error(file, name_tk.location, type_tk.location, blitz::format("Unexpected white space, expected text.")); + } auto type = blitz::types::from_string(type_tk.text); if (type == blitz::types::type::UNKNOWN) { @@ -114,6 +118,9 @@ std::shared_ptr blitz::ast::variable::try_parse(std::shared_pt if (type_tk != blitz::token::variant::TEXT) { throw blitz::error(file, name_tk.location, type_tk.location, blitz::format("Unexpected %s, expected text.", type_tk.to_string().c_str())); } + if (type_tk.location.second != (symbol_tk.location.second + 1)) { + throw blitz::error(file, name_tk.location, type_tk.location, blitz::format("Unexpected white space, expected text.")); + } node->tokens.push_back(type_tk); node->type = blitz::types::type::STRUCT; @@ -145,20 +152,15 @@ bool blitz::ast::value::can_parse(std::shared_ptr lexer) case blitz::token::variant::REAL: case blitz::token::variant::INTEGER: return true; - case blitz::token::variant::STRING: { + case blitz::token::variant::TEXT: { // We can only parse True, False, Null std::string text = tk.text; - std::transform(text.cbegin(), text.cend(), text.begin(), [](char from) { - if (from & 0b10000000) { // Exclude Unicode - return from; - } - return (char)std::tolower(from); - }); - if (tk.text == "false") { + std::transform(text.cbegin(), text.cend(), text.begin(), blitz::utility::utf8_safe_tolower); + if (text == "false") { return true; - } else if (tk.text == "true") { + } else if (text == "true") { return true; - } else if (tk.text == "null") { + } else if (text == "null") { return true; } break; @@ -170,8 +172,8 @@ bool blitz::ast::value::can_parse(std::shared_ptr lexer) std::shared_ptr blitz::ast::value::try_parse(std::shared_ptr lexer) { - auto tk = lexer->current(); - auto utk = lexer->peek(); + auto file = lexer->file(); + auto tk = lexer->current(); auto node = std::make_shared(); node->type = variant::UNKNOWN; @@ -180,6 +182,12 @@ std::shared_ptr blitz::ast::value::try_parse(std::shared_ptrtype = variant::STRING; node->text = tk.text; return node; + } else if (tk.type == blitz::token::variant::REAL) { + node->type = variant::REAL; + node->number.f = strtod(tk.text.c_str(), nullptr); + if (errno == ERANGE) { + throw blitz::error(file, tk.location, tk.location, blitz::format("Real '%s' is not representable on this system.", tk.text.c_str())); + } } else if (tk.type == blitz::token::variant::INTEGER) { // Figure out which base this integer is in (and where it starts). int base = 10; @@ -191,18 +199,18 @@ std::shared_ptr blitz::ast::value::try_parse(std::shared_ptrtype = variant::UNSIGNED_INTEGER; node->number.ui = strtoull(text, nullptr, base); if (errno == ERANGE) { - throw blitz::error(file, tk.location, tk.location, blitz::format("Value '%s' is not representable on this system.", tk.text.c_str())); + throw blitz::error(file, tk.location, tk.location, blitz::format("Integer '%s' is not representable on this system.", tk.text.c_str())); } } else { // Try and figure out if it is unsigned. @@ -211,12 +219,132 @@ std::shared_ptr blitz::ast::value::try_parse(std::shared_ptrtype = variant::UNSIGNED_INTEGER; node->number.ui = strtoull(text, nullptr, base); if (errno == ERANGE) { - throw blitz::error(file, tk.location, tk.location, blitz::format("Value '%s' is not representable on this system.", tk.text.c_str())); + throw blitz::error(file, tk.location, tk.location, blitz::format("Integer '%s' is not representable on this system.", tk.text.c_str())); } } else { node->type = variant::INTEGER; } } + } else if (tk.type == blitz::token::variant::TEXT) { + std::string text = tk.text; + std::transform(text.cbegin(), text.cend(), text.begin(), blitz::utility::utf8_safe_tolower); + if (text == "false") { + node->type = variant::BOOL; + node->number.b = false; + } else if (text == "true") { + node->type = variant::BOOL; + node->number.b = true; + } else if (text == "null") { + node->type = variant::NULL; + node->number.ui = 0; + } } + return node; +} + +blitz::ast::declare::~declare() +{ + /* Variable Declaration + * + * Examples: + * - Local myVar1, myVar2%, myVar3 = "Help", myVar4$ = "Me" + * - Global myVar2g# = 3.147 + * + * + * + */ +} + +bool blitz::ast::declare::can_parse(std::shared_ptr lexer) +{ + auto tk = lexer->current(); + if (tk != blitz::token::variant::TEXT) { + return false; + } + + std::string text = tk.text; + std::transform(text.cbegin(), text.cend(), text.begin(), blitz::utility::utf8_safe_tolower); + if (text == "local") { + return true; + } else if (text == "global") { + return true; + } + + return false; +} + +std::shared_ptr blitz::ast::declare::try_parse(std::shared_ptr lexer) +{ + bool is_global; + + auto file = lexer->file(); + auto tk = lexer->current(); + if (tk != blitz::token::variant::TEXT) { + throw blitz::error(file, tk.location, tk.location, blitz::format("Unexpected %s, expected text.", tk.to_string().c_str())); + } + + // Check if this is Local or Global. + std::string text = tk.text; + std::transform(text.cbegin(), text.cend(), text.begin(), blitz::utility::utf8_safe_tolower); + if (text == "local") { + is_global = false; + } else if (text == "global") { + is_global = true; + } else { + throw blitz::error(file, tk.location, tk.location, blitz::format("Unexpected %s, expected Local or Global.", tk.to_string().c_str())); + } + + auto node = std::make_shared(); + node->global = is_global; + + // Local myVar + // Local myVar, myVar2 + // Local myVar = Expression + // Local myVar = Expression, myVar2 + // Local myVar = Expression : + // Local myVar : Local MyVar + // Local myVar:Int + // Local myVar.StructType + + for (tk = lexer->peek(); tk != blitz::token::variant::ENDOFFILE; tk = lexer->peek()) { + // Declarations require a valid variable name + if (tk.type != blitz::token::variant::TEXT) { + throw blitz::error(file, tk.location, tk.location, blitz::format("Expected variable name, got %s.", tk.to_string().c_str())); + } else if (!blitz::ast::variable::can_parse(lexer)) { + throw blitz::error(file, tk.location, tk.location, blitz::format("Expected variable name, got %s.", tk.to_string().c_str())); + } + + // Advance the lexer and parse the variable declaration. + tk = lexer->next(); + auto variable_nd = blitz::ast::variable::try_parse(lexer); + node->nodes.push_back(variable_nd); + + // Peek at what's coming up and decide on behavior. + tk = lexer->peek(); + if ((tk.type == blitz::token::variant::NEWLINE) || ((tk.type == blitz::token::variant::SYMBOL) && (tk.text == ":")) || (tk.type == blitz::token::variant::ENDOFFILE)) { + // Nothing useful, break out here. + break; + } else if ((tk.type == blitz::token::variant::SYMBOL) && (tk.text == ",")) { + // Next variable is being declared. + lexer->next(); + continue; + } else if ((tk.type == blitz::token::variant::SYMBOL) && (tk.text == "=")) { + // Assignment, not implemented yet. Skip until next valid symbol. + lexer->next(); + do { + if ((tk.type == blitz::token::variant::SYMBOL) && (tk.text == ",")) { + // Next variable is being declared. + break; + } else if ((tk.type == blitz::token::variant::NEWLINE) || ((tk.type == blitz::token::variant::SYMBOL) && (tk.text == ":"))) { + return node; + } else if (tk.type == blitz::token::variant::ENDOFFILE) { + return node; + } + tk = lexer->next(); + } while (true); + } + } + + return node; } diff --git a/code_compiler/source/ast/ast.hpp b/code_compiler/source/ast/ast.hpp index 796f362..c8fdbaa 100644 --- a/code_compiler/source/ast/ast.hpp +++ b/code_compiler/source/ast/ast.hpp @@ -29,6 +29,8 @@ // - Function, Return, End Function: Defines a function, and allows returning values. Yes, I know, End itself terminates the program, this is a special case. Thanks younger Sibly. // - And, Or, Not: Logical operator, self-explanatory really. +#undef NULL + namespace blitz { namespace ast { struct node { @@ -72,7 +74,17 @@ namespace blitz { static std::shared_ptr try_parse(std::shared_ptr lexer); }; - struct expression : public node {}; + struct declare : public node { + // Local, Global + bool global; + std::list> nodes; + virtual ~declare(); + + static bool can_parse(std::shared_ptr lexer); + static std::shared_ptr try_parse(std::shared_ptr lexer); + }; + + struct expression : public node {}; } // namespace ast } // namespace blitz diff --git a/code_compiler/source/lexer.cpp b/code_compiler/source/lexer.cpp index fe18c91..fc9a369 100644 --- a/code_compiler/source/lexer.cpp +++ b/code_compiler/source/lexer.cpp @@ -271,7 +271,22 @@ blitz::token blitz::lexer::peek() if ((chr == EOF) || (chr < 32) || is_newline || iswhitespace(chr) || (chr == ';')) { // EOF, Control, NL, Whitespace, and Comments should return to default parsing. complete = true; - } else if (isdigit(chr) || (chr == '.') || (chr == 'b') || (chr == 'x')) { + } else if (chr == 'f') { + _stream.get(); + token.type = blitz::token::variant::REAL; + complete = true; + } else if (chr == 'u') { + _stream.get(); + buffer << (char)chr; + token.type = blitz::token::variant::INTEGER; + complete = true; + } else if ((chr == 'b') || (chr == 'x')) { + _stream.get(); + buffer << (char)chr; + if (buffer.tellp() > 2) { + throw blitz::error(_file, token.location, _location, blitz::format("In token %s: Expected [0-9], got '%s' instead.", token.to_string().c_str(), std::string{ 1, (char)chr }.c_str())); + } + } else if (isdigit(chr) || (chr == '.')) { _stream.get(); buffer << (char)chr; if (chr == '.') { diff --git a/code_compiler/source/lexer.hpp b/code_compiler/source/lexer.hpp index def21f4..804b8ed 100644 --- a/code_compiler/source/lexer.hpp +++ b/code_compiler/source/lexer.hpp @@ -25,7 +25,7 @@ namespace blitz { UNKNOWN, // We have absolutely no fucking clue. ENDOFFILE, // End of the file. NEWLINE, // New Line. - SEPARATOR, // Command Separator. + //SEPARATOR, // Command Separator. CONTROL, // All kinds of control signals SYMBOL, // All kinds of symbols. COMMENT, // ; Whatever diff --git a/code_compiler/source/main.cpp b/code_compiler/source/main.cpp index accc613..1df813d 100644 --- a/code_compiler/source/main.cpp +++ b/code_compiler/source/main.cpp @@ -14,48 +14,34 @@ int main(int argc, char** argv) std::setlocale(LC_ALL, "en_US.UTF-8"); std::cout << argv[1] << std::endl; + std::list> nodes; - blitz::lexer lex(argv[1]); - for (blitz::token token = lex.next(); (token.type != blitz::token::variant::ENDOFFILE); token = lex.next()) { - switch (token.type) { - case blitz::token::variant::COMMENT: - std::cout << token.text; - break; - case blitz::token::variant::SYMBOL: - std::cout << token.text << " "; - break; - case blitz::token::variant::TEXT: - case blitz::token::variant::INTEGER: - case blitz::token::variant::REAL: - std::cout << token.text << " "; - break; - case blitz::token::variant::STRING: - std::cout << "\"" << token.text << "\"" - << " "; - break; - case blitz::token::variant::NEWLINE: + std::shared_ptr lex2 = std::make_shared(argv[1]); + for (blitz::token token = lex2->next(); (token.type != blitz::token::variant::ENDOFFILE); token = lex2->next()) { + std::cout << token.to_string() << " "; + if (token.type == blitz::token::variant::NEWLINE) { std::cout << std::endl; - break; - - default: - std::cout << token.to_string() << " "; - break; } + if (token.type == blitz::token::variant::UNKNOWN) { std::cin.get(); + } else if (blitz::ast::declare::can_parse(lex2)) { + nodes.push_back(blitz::ast::declare::try_parse(lex2)); + } else if (blitz::ast::value::can_parse(lex2)) { + nodes.push_back(blitz::ast::value::try_parse(lex2)); + } else if (blitz::ast::variable::can_parse(lex2)) { + nodes.push_back(blitz::ast::variable::try_parse(lex2)); } } - blitz::parser pars(argv[1]); - //std::cin.get(); return 0; } catch (blitz::error const& ex) { - std::cout << ex.file() << std::endl; + std::cout << std::endl << ex.file() << std::endl; std::cout << "Line " << ex.at().first << ", Char " << ex.at().second << ": " << ex.what() << std::endl; return 1; } catch (std::runtime_error const& ex) { - std::cout << ex.what() << std::endl; + std::cout << std::endl << ex.what() << std::endl; return 1; } } diff --git a/code_compiler/source/parser.cpp b/code_compiler/source/parser.cpp index eb3713c..17fc1ac 100644 --- a/code_compiler/source/parser.cpp +++ b/code_compiler/source/parser.cpp @@ -12,8 +12,6 @@ blitz::parser::~parser() {} blitz::parser::parser(std::filesystem::path file) : _file(file), _lexer(), _expr() { _lexer = std::make_shared(file); - _lexer->next(); - blitz::ast::variable::try_parse(_lexer); } diff --git a/code_compiler/source/util.cpp b/code_compiler/source/util.cpp index 375c958..baa533a 100644 --- a/code_compiler/source/util.cpp +++ b/code_compiler/source/util.cpp @@ -6,7 +6,7 @@ bool blitz::utility::is_symbol(int code) { - switch (chr) { + switch (code) { case ';': // Comment case ':': // Command Separator case '=': // Equal @@ -39,7 +39,7 @@ bool blitz::utility::is_symbol(int code) bool blitz::utility::is_white_space(int code) { - switch (chr) { + switch (code) { case ' ': case '\t': return true; @@ -57,3 +57,11 @@ bool blitz::utility::is_digit(int code) bool blitz::utility::is_alpha(int code) { return isalpha(code); } + +char blitz::utility::utf8_safe_tolower(char code) +{ + if (code & 0b10000000) { // Exclude Unicode + return code; + } + return (char)std::tolower(code); +} diff --git a/code_compiler/source/util.hpp b/code_compiler/source/util.hpp index 0d3ed23..abc3a7a 100644 --- a/code_compiler/source/util.hpp +++ b/code_compiler/source/util.hpp @@ -10,4 +10,6 @@ namespace blitz::utility { bool is_digit(int code); bool is_alpha(int code); + + char utf8_safe_tolower(char code); } // namespace blitz::utility diff --git a/tests/test.bb b/tests/test.bb index effe272..2ae1241 100644 --- a/tests/test.bb +++ b/tests/test.bb @@ -1,4 +1,50 @@ ; AUTOGENERATED COPYRIGHT HEADER START ; Copyright (C) 2025 Michael Fabian 'Xaymar' Dirks ; AUTOGENERATED COPYRIGHT HEADER END +Variable +Variable% +Variable%% +Variable# +Variable## +Variable$ +Variable$$ +Variable:Int +Variable:Float Variable:String +Variable:Int8 +Variable:Int16 +Variable:Int32 +Variable:Int64 +Variable:UInt8 +Variable:UInt16 +Variable:UInt32 +Variable:UInt64 +Variable:Half +Variable:Single +Variable:Double +Variable:Real +Variable:Float16 +Variable:Float32 +Variable:Float64 +Variable:Real16 +Variable:Real32 +Variable:Real64 +0 +0x +0b +0b100 +0b100u +0x100 +0x100u +255 +255u +65535 +65535u +"Level Up" +"Hello World" +True +False +Null +Local myVar%, myVar2# +Global gVar$, gVar15:Int32 +Global gHelloWorld$ = "Hello World", localVarThatIsntLocal