Even more work

This commit is contained in:
Michael Fabian 'Xaymar' Dirks
2025-02-12 00:03:19 +01:00
parent b61005bcaa
commit 15b8ed7690
9 changed files with 250 additions and 55 deletions
+147 -19
View File
@@ -2,9 +2,7 @@
// Copyright (C) 2017-2025 Michael Fabian 'Xaymar' Dirks <info@xaymar.com>
// AUTOGENERATED COPYRIGHT HEADER END
#include "ast.hpp"
#include <algorithm>
#include <cctype>
#include <cstdlib>
#include "util.hpp"
blitz::ast::variable::~variable()
{
@@ -91,6 +89,9 @@ std::shared_ptr<blitz::ast::node> blitz::ast::variable::try_parse(std::shared_pt
auto symbol_tk = lexer->peek();
if (symbol_tk.type != blitz::token::variant::SYMBOL) {
return node;
} else if (symbol_tk.location.second != (name_tk.location.second + name_tk.text.length())) {
// We only care about these if they're immediately attached to us.
return node;
}
if (symbol_tk.text == ":") {
// :Type
@@ -99,6 +100,9 @@ std::shared_ptr<blitz::ast::node> blitz::ast::variable::try_parse(std::shared_pt
if (type_tk != blitz::token::variant::TEXT) {
throw blitz::error(file, name_tk.location, type_tk.location, blitz::format("Unexpected %s, expected text.", type_tk.to_string().c_str()));
}
if (type_tk.location.second != (symbol_tk.location.second + 1)) {
throw blitz::error(file, name_tk.location, type_tk.location, blitz::format("Unexpected white space, expected text."));
}
auto type = blitz::types::from_string(type_tk.text);
if (type == blitz::types::type::UNKNOWN) {
@@ -114,6 +118,9 @@ std::shared_ptr<blitz::ast::node> blitz::ast::variable::try_parse(std::shared_pt
if (type_tk != blitz::token::variant::TEXT) {
throw blitz::error(file, name_tk.location, type_tk.location, blitz::format("Unexpected %s, expected text.", type_tk.to_string().c_str()));
}
if (type_tk.location.second != (symbol_tk.location.second + 1)) {
throw blitz::error(file, name_tk.location, type_tk.location, blitz::format("Unexpected white space, expected text."));
}
node->tokens.push_back(type_tk);
node->type = blitz::types::type::STRUCT;
@@ -145,20 +152,15 @@ bool blitz::ast::value::can_parse(std::shared_ptr<blitz::lexer> lexer)
case blitz::token::variant::REAL:
case blitz::token::variant::INTEGER:
return true;
case blitz::token::variant::STRING: {
case blitz::token::variant::TEXT: {
// We can only parse True, False, Null
std::string text = tk.text;
std::transform(text.cbegin(), text.cend(), text.begin(), [](char from) {
if (from & 0b10000000) { // Exclude Unicode
return from;
}
return (char)std::tolower(from);
});
if (tk.text == "false") {
std::transform(text.cbegin(), text.cend(), text.begin(), blitz::utility::utf8_safe_tolower);
if (text == "false") {
return true;
} else if (tk.text == "true") {
} else if (text == "true") {
return true;
} else if (tk.text == "null") {
} else if (text == "null") {
return true;
}
break;
@@ -170,8 +172,8 @@ bool blitz::ast::value::can_parse(std::shared_ptr<blitz::lexer> lexer)
std::shared_ptr<blitz::ast::node> blitz::ast::value::try_parse(std::shared_ptr<blitz::lexer> lexer)
{
auto file = lexer->file();
auto tk = lexer->current();
auto utk = lexer->peek();
auto node = std::make_shared<blitz::ast::value>();
node->type = variant::UNKNOWN;
@@ -180,6 +182,12 @@ std::shared_ptr<blitz::ast::node> blitz::ast::value::try_parse(std::shared_ptr<b
node->type = variant::STRING;
node->text = tk.text;
return node;
} else if (tk.type == blitz::token::variant::REAL) {
node->type = variant::REAL;
node->number.f = strtod(tk.text.c_str(), nullptr);
if (errno == ERANGE) {
throw blitz::error(file, tk.location, tk.location, blitz::format("Real '%s' is not representable on this system.", tk.text.c_str()));
}
} else if (tk.type == blitz::token::variant::INTEGER) {
// Figure out which base this integer is in (and where it starts).
int base = 10;
@@ -191,18 +199,18 @@ std::shared_ptr<blitz::ast::node> blitz::ast::value::try_parse(std::shared_ptr<b
} else if (text[1] == 'b') { // Base 2
base = 2;
text = text += 2;
} else if (text[1] == '0') {
} else if (isdigit(text[1])) {
base = 8;
text = text += 1;
}
}
if (utk.type == blitz::token::variant::TEXT && utk.text == "u") {
// User specific this is unsigned, so treat it as such.
if (tk.text[tk.text.length() - 1] == 'u') {
// User wants this to be unsigned.
node->type = variant::UNSIGNED_INTEGER;
node->number.ui = strtoull(text, nullptr, base);
if (errno == ERANGE) {
throw blitz::error(file, tk.location, tk.location, blitz::format("Value '%s' is not representable on this system.", tk.text.c_str()));
throw blitz::error(file, tk.location, tk.location, blitz::format("Integer '%s' is not representable on this system.", tk.text.c_str()));
}
} else {
// Try and figure out if it is unsigned.
@@ -211,12 +219,132 @@ std::shared_ptr<blitz::ast::node> blitz::ast::value::try_parse(std::shared_ptr<b
node->type = variant::UNSIGNED_INTEGER;
node->number.ui = strtoull(text, nullptr, base);
if (errno == ERANGE) {
throw blitz::error(file, tk.location, tk.location, blitz::format("Value '%s' is not representable on this system.", tk.text.c_str()));
throw blitz::error(file, tk.location, tk.location, blitz::format("Integer '%s' is not representable on this system.", tk.text.c_str()));
}
} else {
node->type = variant::INTEGER;
}
}
} else if (tk.type == blitz::token::variant::TEXT) {
std::string text = tk.text;
std::transform(text.cbegin(), text.cend(), text.begin(), blitz::utility::utf8_safe_tolower);
if (text == "false") {
node->type = variant::BOOL;
node->number.b = false;
} else if (text == "true") {
node->type = variant::BOOL;
node->number.b = true;
} else if (text == "null") {
node->type = variant::NULL;
node->number.ui = 0;
}
}
return node;
}
blitz::ast::declare::~declare()
{
/* Variable Declaration
*
* Examples:
* - Local myVar1, myVar2%, myVar3 = "Help", myVar4$ = "Me"
* - Global myVar2g# = 3.147
*
*
*
*/
}
bool blitz::ast::declare::can_parse(std::shared_ptr<blitz::lexer> lexer)
{
auto tk = lexer->current();
if (tk != blitz::token::variant::TEXT) {
return false;
}
std::string text = tk.text;
std::transform(text.cbegin(), text.cend(), text.begin(), blitz::utility::utf8_safe_tolower);
if (text == "local") {
return true;
} else if (text == "global") {
return true;
}
return false;
}
std::shared_ptr<blitz::ast::node> blitz::ast::declare::try_parse(std::shared_ptr<blitz::lexer> lexer)
{
bool is_global;
auto file = lexer->file();
auto tk = lexer->current();
if (tk != blitz::token::variant::TEXT) {
throw blitz::error(file, tk.location, tk.location, blitz::format("Unexpected %s, expected text.", tk.to_string().c_str()));
}
// Check if this is Local or Global.
std::string text = tk.text;
std::transform(text.cbegin(), text.cend(), text.begin(), blitz::utility::utf8_safe_tolower);
if (text == "local") {
is_global = false;
} else if (text == "global") {
is_global = true;
} else {
throw blitz::error(file, tk.location, tk.location, blitz::format("Unexpected %s, expected Local or Global.", tk.to_string().c_str()));
}
auto node = std::make_shared<blitz::ast::declare>();
node->global = is_global;
// Local myVar
// Local myVar, myVar2
// Local myVar = Expression
// Local myVar = Expression, myVar2
// Local myVar = Expression :
// Local myVar : Local MyVar
// Local myVar:Int
// Local myVar.StructType
for (tk = lexer->peek(); tk != blitz::token::variant::ENDOFFILE; tk = lexer->peek()) {
// Declarations require a valid variable name
if (tk.type != blitz::token::variant::TEXT) {
throw blitz::error(file, tk.location, tk.location, blitz::format("Expected variable name, got %s.", tk.to_string().c_str()));
} else if (!blitz::ast::variable::can_parse(lexer)) {
throw blitz::error(file, tk.location, tk.location, blitz::format("Expected variable name, got %s.", tk.to_string().c_str()));
}
// Advance the lexer and parse the variable declaration.
tk = lexer->next();
auto variable_nd = blitz::ast::variable::try_parse(lexer);
node->nodes.push_back(variable_nd);
// Peek at what's coming up and decide on behavior.
tk = lexer->peek();
if ((tk.type == blitz::token::variant::NEWLINE) || ((tk.type == blitz::token::variant::SYMBOL) && (tk.text == ":")) || (tk.type == blitz::token::variant::ENDOFFILE)) {
// Nothing useful, break out here.
break;
} else if ((tk.type == blitz::token::variant::SYMBOL) && (tk.text == ",")) {
// Next variable is being declared.
lexer->next();
continue;
} else if ((tk.type == blitz::token::variant::SYMBOL) && (tk.text == "=")) {
// Assignment, not implemented yet. Skip until next valid symbol.
lexer->next();
do {
if ((tk.type == blitz::token::variant::SYMBOL) && (tk.text == ",")) {
// Next variable is being declared.
break;
} else if ((tk.type == blitz::token::variant::NEWLINE) || ((tk.type == blitz::token::variant::SYMBOL) && (tk.text == ":"))) {
return node;
} else if (tk.type == blitz::token::variant::ENDOFFILE) {
return node;
}
tk = lexer->next();
} while (true);
}
}
return node;
}
+13 -1
View File
@@ -29,6 +29,8 @@
// - Function, Return, End Function: Defines a function, and allows returning values. Yes, I know, End itself terminates the program, this is a special case. Thanks younger Sibly.
// - And, Or, Not: Logical operator, self-explanatory really.
#undef NULL
namespace blitz {
namespace ast {
struct node {
@@ -72,7 +74,17 @@ namespace blitz {
static std::shared_ptr<blitz::ast::node> try_parse(std::shared_ptr<blitz::lexer> lexer);
};
struct expression : public node {};
struct declare : public node {
// Local, Global
bool global;
std::list<std::shared_ptr<blitz::ast::node>> nodes;
virtual ~declare();
static bool can_parse(std::shared_ptr<blitz::lexer> lexer);
static std::shared_ptr<blitz::ast::node> try_parse(std::shared_ptr<blitz::lexer> lexer);
};
struct expression : public node {};
} // namespace ast
} // namespace blitz
+16 -1
View File
@@ -271,7 +271,22 @@ blitz::token blitz::lexer::peek()
if ((chr == EOF) || (chr < 32) || is_newline || iswhitespace(chr) || (chr == ';')) {
// EOF, Control, NL, Whitespace, and Comments should return to default parsing.
complete = true;
} else if (isdigit(chr) || (chr == '.') || (chr == 'b') || (chr == 'x')) {
} else if (chr == 'f') {
_stream.get();
token.type = blitz::token::variant::REAL;
complete = true;
} else if (chr == 'u') {
_stream.get();
buffer << (char)chr;
token.type = blitz::token::variant::INTEGER;
complete = true;
} else if ((chr == 'b') || (chr == 'x')) {
_stream.get();
buffer << (char)chr;
if (buffer.tellp() > 2) {
throw blitz::error(_file, token.location, _location, blitz::format("In token %s: Expected [0-9], got '%s' instead.", token.to_string().c_str(), std::string{ 1, (char)chr }.c_str()));
}
} else if (isdigit(chr) || (chr == '.')) {
_stream.get();
buffer << (char)chr;
if (chr == '.') {
+1 -1
View File
@@ -25,7 +25,7 @@ namespace blitz {
UNKNOWN, // We have absolutely no fucking clue.
ENDOFFILE, // End of the file.
NEWLINE, // New Line.
SEPARATOR, // Command Separator.
//SEPARATOR, // Command Separator.
CONTROL, // All kinds of control signals
SYMBOL, // All kinds of symbols.
COMMENT, // ; Whatever
+14 -28
View File
@@ -14,48 +14,34 @@ int main(int argc, char** argv)
std::setlocale(LC_ALL, "en_US.UTF-8");
std::cout << argv[1] << std::endl;
std::list<std::shared_ptr<blitz::ast::node>> nodes;
blitz::lexer lex(argv[1]);
for (blitz::token token = lex.next(); (token.type != blitz::token::variant::ENDOFFILE); token = lex.next()) {
switch (token.type) {
case blitz::token::variant::COMMENT:
std::cout << token.text;
break;
case blitz::token::variant::SYMBOL:
std::cout << token.text << " ";
break;
case blitz::token::variant::TEXT:
case blitz::token::variant::INTEGER:
case blitz::token::variant::REAL:
std::cout << token.text << " ";
break;
case blitz::token::variant::STRING:
std::cout << "\"" << token.text << "\""
<< " ";
break;
case blitz::token::variant::NEWLINE:
std::cout << std::endl;
break;
default:
std::shared_ptr<blitz::lexer> lex2 = std::make_shared<blitz::lexer>(argv[1]);
for (blitz::token token = lex2->next(); (token.type != blitz::token::variant::ENDOFFILE); token = lex2->next()) {
std::cout << token.to_string() << " ";
break;
if (token.type == blitz::token::variant::NEWLINE) {
std::cout << std::endl;
}
if (token.type == blitz::token::variant::UNKNOWN) {
std::cin.get();
} else if (blitz::ast::declare::can_parse(lex2)) {
nodes.push_back(blitz::ast::declare::try_parse(lex2));
} else if (blitz::ast::value::can_parse(lex2)) {
nodes.push_back(blitz::ast::value::try_parse(lex2));
} else if (blitz::ast::variable::can_parse(lex2)) {
nodes.push_back(blitz::ast::variable::try_parse(lex2));
}
}
blitz::parser pars(argv[1]);
//std::cin.get();
return 0;
} catch (blitz::error const& ex) {
std::cout << ex.file() << std::endl;
std::cout << std::endl << ex.file() << std::endl;
std::cout << "Line " << ex.at().first << ", Char " << ex.at().second << ": " << ex.what() << std::endl;
return 1;
} catch (std::runtime_error const& ex) {
std::cout << ex.what() << std::endl;
std::cout << std::endl << ex.what() << std::endl;
return 1;
}
}
-2
View File
@@ -12,8 +12,6 @@ blitz::parser::~parser() {}
blitz::parser::parser(std::filesystem::path file) : _file(file), _lexer(), _expr()
{
_lexer = std::make_shared<blitz::lexer>(file);
_lexer->next();
blitz::ast::variable::try_parse(_lexer);
}
+10 -2
View File
@@ -6,7 +6,7 @@
bool blitz::utility::is_symbol(int code)
{
switch (chr) {
switch (code) {
case ';': // Comment
case ':': // Command Separator
case '=': // Equal
@@ -39,7 +39,7 @@ bool blitz::utility::is_symbol(int code)
bool blitz::utility::is_white_space(int code)
{
switch (chr) {
switch (code) {
case ' ':
case '\t':
return true;
@@ -57,3 +57,11 @@ bool blitz::utility::is_digit(int code)
bool blitz::utility::is_alpha(int code) {
return isalpha(code);
}
char blitz::utility::utf8_safe_tolower(char code)
{
if (code & 0b10000000) { // Exclude Unicode
return code;
}
return (char)std::tolower(code);
}
+2
View File
@@ -10,4 +10,6 @@ namespace blitz::utility {
bool is_digit(int code);
bool is_alpha(int code);
char utf8_safe_tolower(char code);
} // namespace blitz::utility
+46
View File
@@ -1,4 +1,50 @@
; AUTOGENERATED COPYRIGHT HEADER START
; Copyright (C) 2025 Michael Fabian 'Xaymar' Dirks <info@xaymar.com>
; AUTOGENERATED COPYRIGHT HEADER END
Variable
Variable%
Variable%%
Variable#
Variable##
Variable$
Variable$$
Variable:Int
Variable:Float
Variable:String
Variable:Int8
Variable:Int16
Variable:Int32
Variable:Int64
Variable:UInt8
Variable:UInt16
Variable:UInt32
Variable:UInt64
Variable:Half
Variable:Single
Variable:Double
Variable:Real
Variable:Float16
Variable:Float32
Variable:Float64
Variable:Real16
Variable:Real32
Variable:Real64
0
0x
0b
0b100
0b100u
0x100
0x100u
255
255u
65535
65535u
"Level Up"
"Hello World"
True
False
Null
Local myVar%, myVar2#
Global gVar$, gVar15:Int32
Global gHelloWorld$ = "Hello World", localVarThatIsntLocal