More work on getting parsing to be functional

This commit is contained in:
Michael Fabian 'Xaymar' Dirks
2025-01-25 19:25:14 +01:00
parent e191173e7b
commit b61005bcaa
16 changed files with 855 additions and 400 deletions
+143 -94
View File
@@ -1,5 +1,5 @@
/// AUTOGENERATED COPYRIGHT HEADER START
// Copyright (C) 2024 Michael Fabian 'Xaymar' Dirks <info@xaymar.com>
// Copyright (C) 2024-2025 Michael Fabian 'Xaymar' Dirks <info@xaymar.com>
// AUTOGENERATED COPYRIGHT HEADER END
#include "parser.hpp"
#include <algorithm>
@@ -12,101 +12,150 @@ blitz::parser::~parser() {}
blitz::parser::parser(std::filesystem::path file) : _file(file), _lexer(), _expr()
{
_lexer = std::make_shared<blitz::lexer>(file);
_lexer->next();
blitz::ast::variable::try_parse(_lexer);
}
std::shared_ptr<blitz::ast::node> blitz::parser::current()
{
return _expr;
}
std::shared_ptr<blitz::ast::node> blitz::parser::next()
{
// This should return an entire "line" of expressions in one go, i.e.:
// 1. Local a = 1, b = a, c = b+a
// -> Local(Variable(a, Expression(Integer(1))), Variable(b, Expresssion(Variable(a))), Variable(c, Expression(Add(Variable(b), Variable(a)))
// 2. Include "HelloWorld.bb"
// -> Include(String("HelloWorld.bb"))
// 3. Function HelloWorld()
// -> Function(HelloWorld, ...)(
// Not quite sure if the above makes sense, we'd be returning many expressions outside of functions, but only one inside a function? Why even bother with the current/next crap then?
// Handling Include becomes a problem too. I guess we should actually return expressions on a line by line basis, and let the "compiler" figure out scope and stuff.
//
//std::shared_ptr<blitz::ast::node> blitz::parser::current()
//{
// return _expr;
//}
//
//std::shared_ptr<blitz::ast::node> blitz::parser::next()
//{
// // This should return an entire "line" of expressions in one go, i.e.:
// // 1. Local a = 1, b = a, c = b+a
// // -> Local(Variable(a, Expression(Integer(1))), Variable(b, Expresssion(Variable(a))), Variable(c, Expression(Add(Variable(b), Variable(a)))
// // 2. Include "HelloWorld.bb"
// // -> Include(String("HelloWorld.bb"))
// // 3. Function HelloWorld()
// // -> Function(HelloWorld, ...)(
// // Not quite sure if the above makes sense, we'd be returning many expressions outside of functions, but only one inside a function? Why even bother with the current/next crap then?
// // Handling Include becomes a problem too. I guess we should actually return expressions on a line by line basis, and let the "compiler" figure out scope and stuff.
//
// // Grab the next token to figure out what behavior we should have.
// while (true) {
// auto token = _lexer->next();
// try {
// switch (token.type) {
// case blitz::token::variant::ENDOFFILE:
// // End of file means there's nothing left to parse.
// _expr.reset();
// return nullptr;
// case blitz::token::variant::COMMENT:
// case blitz::token::variant::NEWLINE:
// case blitz::token::variant::SEPARATOR:
// // Ignore some things that aren't very useful right now.
// continue;
// case blitz::token::variant::TEXT:
// return try_parse(token);
// default:
// throw nullptr;
// }
// } catch (blitz::error const& ex) {
// throw ex;
// } catch (std::exception const& ex) {
// throw new blitz::error(_file, token.location, token.location, ex.what());
// } catch (...) {
// throw new blitz::error(_file, token.location, token.location, blitz::format("Token %s unexpected at this point.", token.to_string().c_str()));
// }
// }
//}
//
//std::shared_ptr<blitz::ast::node> blitz::parser::try_parse(blitz::token token)
//{
// // ToDo: Switch to a proper Unicode library. Maybe Boost?
// std::string ltext;
// std::transform(token.text.begin(), token.text.end(), ltext.begin(), [](std::string::value_type c) { return std::tolower(c); });
//
// if ((ltext == "local") || (ltext == "global")) {
// // Local/Global have the same parsing, but different functionality.
// // Should be:
// // Text Text [Symbol(=) Expression] [Symbol(,) Text [Symbol(=) Expression] [Symbol(,) ...]]
//
// } else if (ltext == "function") {
// //Example:
// // Function FunctionName[$,%,#,:TypeName,.StructName]([Variable[, Variable=Value[, ...]])
// // [Function Content ...]
// // EndFunction
//
//
// } else if (ltext == "select") {
// } else if (ltext == "case") {
// } else if (ltext == "endselect") {
// } else if (ltext == "if") {
// } else if (ltext == "elif") {
// } else if (ltext == "endif") {
//
// } else if (ltext == "end") {
// }
//
// return nullptr;
//}
//
//std::shared_ptr<blitz::ast::node> blitz::parser::try_parse_expression() {
// // () + - / * = <> > < String Integer Float Variable
//}
//
//std::shared_ptr<blitz::ast::node> blitz::parser::try_parse_variable_expression()
//{
// // Text [Symbol(=) Expression(...)] [Symbol(,) [Text [Symbol(=) Expression(...)]]]
//
// auto label = _lexer->next();
// if (label != blitz::token::variant::TEXT) {
// throw new blitz::error(_file, label.location, label.location, blitz::format("Unexpected %s, expected Text.", label.to_string().c_str()));
// }
//
// auto node = std::make_shared<blitz::ast::variable>(label);
//
// auto operand = _lexer->next();
// if (operand == "=") {
// //node->set_value(try_parse_expression());
// } else if (operand == blitz::token::variant::NEWLINE || operand == blitz::token::variant::SEPARATOR || (operand == blitz::token::variant::SYMBOL && operand == ",")) {
// return node;
// } else {
// throw new blitz::error(_file, label.location, operand.location, blitz::format("Unexpected %s, expected Symbol(=), NewLine, Separator, or Symbol(,).", operand.to_string().c_str()));
// }
//
// return node;
//}
// Grab the next token to figure out what behavior we should have.
while (true) {
auto token = _lexer->next();
try {
switch (token.type) {
case blitz::token::variant::ENDOFFILE:
// End of file means there's nothing left to parse.
_expr.reset();
return nullptr;
case blitz::token::variant::COMMENT:
case blitz::token::variant::NEWLINE:
case blitz::token::variant::SEPARATOR:
// Ignore some things that aren't very useful right now.
continue;
case blitz::token::variant::TEXT:
return try_parse(token);
default:
throw nullptr;
}
} catch (blitz::error const& ex) {
throw ex;
} catch (std::exception const& ex) {
throw new blitz::error(_file, token.location, token.location, ex.what());
} catch (...) {
throw new blitz::error(_file, token.location, token.location, blitz::format("Token %s unexpected at this point.", token.to_string().c_str()));
}
}
}
std::shared_ptr<blitz::ast::node> blitz::parser::try_parse(blitz::token token)
{
// ToDo: Switch to a proper Unicode library. Maybe Boost?
std::string ltext;
std::transform(token.text.begin(), token.text.end(), ltext.begin(), [](std::string::value_type c) { return std::tolower(c); });
if ((ltext == "local") || (ltext == "global")) {
// Local/Global have the same parsing, but different functionality.
// Should be:
// Text Text [Symbol(=) Expression] [Symbol(,) Text [Symbol(=) Expression] [Symbol(,) ...]]
} else if (ltext == "global") {
// Global ...
} else if (ltext == "function") {
} else if (ltext == "select") {
} else if (ltext == "case") {
} else if (ltext == "endselect") {
} else if (ltext == "if") {
} else if (ltext == "elif") {
} else if (ltext == "endif") {
} else if (ltext == "end") {
}
return nullptr;
}
std::shared_ptr<blitz::ast::node> blitz::parser::try_parse_variable()
{
// Text [Symbol(=) Expression(...)] [Symbol(,) [Text [Symbol(=) Expression(...)]]]
auto label = _lexer->next();
if (label != blitz::token::variant::TEXT) {
throw new blitz::error(_file, label.location, label.location, blitz::format("Unexpected %s, expected Text.", label.to_string().c_str()));
}
auto node = std::make_shared<blitz::ast::variable>(label);
auto operand = _lexer->next();
if (operand == "=") {
//node->set_value(try_parse_expression());
} else if (operand == blitz::token::variant::NEWLINE || operand == blitz::token::variant::SEPARATOR || (operand == blitz::token::variant::SYMBOL && operand == ",")) {
return node;
} else {
throw new blitz::error(_file, label.location, operand.location, blitz::format("Unexpected %s, expected Symbol(=), NewLine, Separator, or Symbol(,).", operand.to_string().c_str()));
}
return node;
}
/* Expressions
*
* Example Locations:
* - Local Var = Expression
* - Var = Expression
* - myFunction(Expression, ...)
* - If Expression Then
*
* Example Expressions:
* - 0 + 0, 0 - 0, 0 * 0, 0 / 0, 0 Shr 0, 0 Shl 0, 0 And 0, 0 Or 0, Not 0,
* -
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*/