More work on getting parsing to be functional

This commit is contained in:
Michael Fabian 'Xaymar' Dirks
2025-01-25 19:25:14 +01:00
parent e191173e7b
commit b61005bcaa
16 changed files with 855 additions and 400 deletions
+10 -15
View File
@@ -1,16 +1,17 @@
# AUTOGENERATED COPYRIGHT HEADER START
# Copyright (C) 2024 Michael Fabian 'Xaymar' Dirks <info@xaymar.com>
# Copyright (C) 2024-2025 Michael Fabian 'Xaymar' Dirks <info@xaymar.com>
# AUTOGENERATED COPYRIGHT HEADER END
# Basic Formatting
TabWidth: 4
UseTab: ForContinuationAndIndentation
ColumnLimit: 65535
LineEnding: LF
#- 0 does not respect the original line breaks!
# Language
Language: Cpp
Standard: c++17
Standard: c++20
# Indentation
AccessModifierOffset: 0
@@ -27,18 +28,10 @@ NamespaceIndentation: All
IncludeCategories:
- Regex: '^"warning-disable.hpp"$'
Priority: 50
- Regex: '^(<|")(config.hpp|common.hpp|ui-common.hpp|strings.hpp|version.hpp|obs.h)("|>)'
Priority: 100
- Regex: '^<obs-'
Priority: 150
- Regex: '^<'
Priority: 200
- Regex: '^<Q'
Priority: 250
- Regex: '^"'
Priority: 300
- Regex: '.moc"$'
Priority: 300
- Regex: '^"warning-enable.hpp"$'
Priority: 500
SortIncludes: true
@@ -49,7 +42,8 @@ AlignConsecutiveAssignments: true
AlignConsecutiveDeclarations: true
AlignEscapedNewlines: Left
AlignOperands: true
AlignTrailingComments: false
AlignTrailingComments: true
#ArrayInitializerAlignmentStyle: Right
DerivePointerAlignment: false
PointerAlignment: Left
@@ -77,16 +71,17 @@ BraceWrapping:
SplitEmptyFunction: false
SplitEmptyRecord: false
SplitEmptyNamespace: false
BinPackArguments: true
BinPackParameters: true
BinPackArguments: false
BinPackParameters: false
BreakBeforeBinaryOperators: NonAssignment
BreakBeforeBraces: Custom
BreakBeforeTernaryOperators: true
BreakConstructorInitializers: BeforeColon
#BreakInheritanceList: BeforeColon
BreakStringLiterals: true
BreakStringLiterals: false
ConstructorInitializerAllOnOneLineOrOnePerLine: false
Cpp11BracedListStyle: true
Cpp11BracedListStyle: false
PackConstructorInitializers: NextLineOnly
# Spaces
SpaceAfterCStyleCast: false
+5 -1
View File
@@ -1,5 +1,5 @@
# AUTOGENERATED COPYRIGHT HEADER START
# Copyright (C) 2017-2024 Michael Fabian 'Xaymar' Dirks <info@xaymar.com>
# Copyright (C) 2017-2025 Michael Fabian 'Xaymar' Dirks <info@xaymar.com>
# AUTOGENERATED COPYRIGHT HEADER END
project(compiler
VERSION ${PROJECT_VERSION}
@@ -15,6 +15,10 @@ target_sources(${PROJECT_NAME} PRIVATE
"source/error.cpp"
"source/parser.hpp"
"source/parser.cpp"
"source/util.hpp"
"source/util.cpp"
"source/types.hpp"
"source/types.cpp"
"source/compiler.hpp"
"source/compiler.cpp"
"source/ast/ast.hpp"
+215 -7
View File
@@ -1,14 +1,222 @@
// AUTOGENERATED COPYRIGHT HEADER START
// Copyright (C) 2017-2024 Michael Fabian 'Xaymar' Dirks <info@xaymar.com>
// Copyright (C) 2017-2025 Michael Fabian 'Xaymar' Dirks <info@xaymar.com>
// AUTOGENERATED COPYRIGHT HEADER END
#include "ast.hpp"
#include <algorithm>
#include <cctype>
#include <cstdlib>
blitz::ast::variable::~variable() {}
blitz::ast::variable::variable(blitz::token token) : _token(token), _value(nullptr) {}
void blitz::ast::variable::set_value(std::shared_ptr<blitz::ast::expression> value)
blitz::ast::variable::~variable()
{
_value = value;
/* Variable Parsing
*
* Declaration:
* - 8bit Signed Integer Variable
* Variable:Byte
* Variable:Int8
* - 8bit Unsigned Integer Variable
* Variable:UByte
* Variable:UInt8
* - 16bit Signed Integer Variable
* Variable:Short
* Variable:Int16
* - 16bit Unsigned Integer Variable
* Variable:UShort
* Variable:UInt16
* - 32bit Signed Integer Variable
* Variable
* Variable%
* Variable:Int
* Variable:Int32
* - 32bit Unsigned Integer Variable
* Variable:UInt
* Variable:UInt32
* - 64bit Signed Integer Variable
* Variable%%
* Variable:Long
* Variable:Int64
* - 64bit Unsigned Integer Variable
* Variable:ULong
* Variable:UInt64
* - 32bit Real Variable
* Variable#
* Variable:Float
* Variable:Float32
* Variable:Real
* Variable:Real32
* - 64bit Real Variable
* Variable##
* Variable:Double
* Variable:Float64
* Variable:Real64
* - UTF-8 String Variable
* Variable$
* Variable:String
* - Struct Variable
* Variable.StructName
* Variable:StructName
*
* Access:
* - Struct Access:
* Variable\Key
* - Array Access:
* Variable[IntegerIndex]
* - Dynamic Array Access:
* Variable(IntegerIndex)
* - Direct Access:
* Variable
*/
}
bool blitz::ast::variable::can_parse(std::shared_ptr<blitz::lexer> lexer)
{
return lexer->current().type == blitz::token::variant::TEXT;
}
std::shared_ptr<blitz::ast::node> blitz::ast::variable::try_parse(std::shared_ptr<blitz::lexer> lexer)
{
auto file = lexer->file();
auto name_tk = lexer->current();
if (name_tk.type != blitz::token::variant::TEXT) {
throw blitz::error(file, name_tk.location, name_tk.location, blitz::format("Unexpected %s, expected text.", name_tk.to_string().c_str()));
}
auto node = std::make_shared<blitz::ast::variable>();
node->tokens.push_back(name_tk);
node->type = blitz::types::type::UNKNOWN;
node->name = name_tk.text;
// Check if this has a type definition
auto symbol_tk = lexer->peek();
if (symbol_tk.type != blitz::token::variant::SYMBOL) {
return node;
}
if (symbol_tk.text == ":") {
// :Type
node->tokens.push_back(lexer->next()); // Advance to next token.
auto type_tk = lexer->next();
if (type_tk != blitz::token::variant::TEXT) {
throw blitz::error(file, name_tk.location, type_tk.location, blitz::format("Unexpected %s, expected text.", type_tk.to_string().c_str()));
}
auto type = blitz::types::from_string(type_tk.text);
if (type == blitz::types::type::UNKNOWN) {
throw blitz::error(file, name_tk.location, type_tk.location, blitz::format("Unexpected %s, expected built-in type name.", type_tk.text.c_str()));
}
node->tokens.push_back(type_tk);
node->type = type;
} else if (symbol_tk.text == ".") {
// .Struct
node->tokens.push_back(lexer->next()); // Advance to next token.
auto type_tk = lexer->next();
if (type_tk != blitz::token::variant::TEXT) {
throw blitz::error(file, name_tk.location, type_tk.location, blitz::format("Unexpected %s, expected text.", type_tk.to_string().c_str()));
}
node->tokens.push_back(type_tk);
node->type = blitz::types::type::STRUCT;
node->struct_name = type_tk.text;
} else if (symbol_tk.text == "%") {
// Int32
node->tokens.push_back(lexer->next()); // Advance to next token.
node->type = blitz::types::type::INT32;
} else if (symbol_tk.text == "#") {
// Float
node->tokens.push_back(lexer->next()); // Advance to next token.
node->type = blitz::types::type::FLOAT32;
} else if (symbol_tk.text == "$") {
// String
node->tokens.push_back(lexer->next()); // Advance to next token.
node->type = blitz::types::type::STRING;
}
return node;
}
blitz::ast::value::~value() {}
bool blitz::ast::value::can_parse(std::shared_ptr<blitz::lexer> lexer)
{
auto tk = lexer->current();
switch (tk.type) {
case blitz::token::variant::STRING:
case blitz::token::variant::REAL:
case blitz::token::variant::INTEGER:
return true;
case blitz::token::variant::STRING: {
// We can only parse True, False, Null
std::string text = tk.text;
std::transform(text.cbegin(), text.cend(), text.begin(), [](char from) {
if (from & 0b10000000) { // Exclude Unicode
return from;
}
return (char)std::tolower(from);
});
if (tk.text == "false") {
return true;
} else if (tk.text == "true") {
return true;
} else if (tk.text == "null") {
return true;
}
break;
}
}
return false;
}
std::shared_ptr<blitz::ast::node> blitz::ast::value::try_parse(std::shared_ptr<blitz::lexer> lexer)
{
auto tk = lexer->current();
auto utk = lexer->peek();
auto node = std::make_shared<blitz::ast::value>();
node->type = variant::UNKNOWN;
if (tk.type == blitz::token::variant::STRING) {
node->type = variant::STRING;
node->text = tk.text;
return node;
} else if (tk.type == blitz::token::variant::INTEGER) {
// Figure out which base this integer is in (and where it starts).
int base = 10;
const char* text = tk.text.c_str();
if ((tk.text.length() > 1) && (text[0] == '0')) {
if (text[1] == 'x') { // Base 16
base = 16;
text = text += 2;
} else if (text[1] == 'b') { // Base 2
base = 2;
text = text += 2;
} else if (text[1] == '0') {
base = 8;
text = text += 1;
}
}
if (utk.type == blitz::token::variant::TEXT && utk.text == "u") {
// User specific this is unsigned, so treat it as such.
node->type = variant::UNSIGNED_INTEGER;
node->number.ui = strtoull(text, nullptr, base);
if (errno == ERANGE) {
throw blitz::error(file, tk.location, tk.location, blitz::format("Value '%s' is not representable on this system.", tk.text.c_str()));
}
} else {
// Try and figure out if it is unsigned.
node->number.i = strtoll(text, nullptr, base);
if (errno == ERANGE) {
node->type = variant::UNSIGNED_INTEGER;
node->number.ui = strtoull(text, nullptr, base);
if (errno == ERANGE) {
throw blitz::error(file, tk.location, tk.location, blitz::format("Value '%s' is not representable on this system.", tk.text.c_str()));
}
} else {
node->type = variant::INTEGER;
}
}
}
}
+33 -18
View File
@@ -1,5 +1,5 @@
/// AUTOGENERATED COPYRIGHT HEADER START
// Copyright (C) 2017-2024 Michael Fabian 'Xaymar' Dirks <info@xaymar.com>
// Copyright (C) 2017-2025 Michael Fabian 'Xaymar' Dirks <info@xaymar.com>
// AUTOGENERATED COPYRIGHT HEADER END
#pragma once
#include <list>
@@ -7,6 +7,7 @@
#include <optional>
#include <string>
#include "../lexer.hpp"
#include "../types.hpp"
// BlitzBasic Built-Ins
// - Include: Followed by a String, which is the file to include at this location.
@@ -30,34 +31,48 @@
namespace blitz {
namespace ast {
class node {
public:
struct node {
std::vector<blitz::token> tokens;
virtual ~node() = default;
};
class expression : public node {};
struct variable : public node {
std::string name;
blitz::types::type type;
std::string struct_name;
class variable : public node {
blitz::token _token;
std::shared_ptr<blitz::ast::expression> _value;
public:
virtual ~variable();
variable(blitz::token token);
void set_value(std::shared_ptr<blitz::ast::expression> value);
static bool can_parse(std::shared_ptr<blitz::lexer> lexer);
static std::shared_ptr<blitz::ast::node> try_parse(std::shared_ptr<blitz::lexer> lexer);
};
class call : public node {};
struct value : public node {
enum class variant {
UNKNOWN,
NULL,
BOOL,
INTEGER,
UNSIGNED_INTEGER,
REAL,
STRING,
} type;
union {
bool b;
intmax_t i;
uintmax_t ui;
double f;
} number;
std::string text;
class local : public node {
public:
~local();
local();
virtual ~value();
static bool can_parse(std::shared_ptr<blitz::lexer> lexer);
static std::shared_ptr<blitz::ast::node> try_parse(std::shared_ptr<blitz::lexer> lexer);
};
class global : public node {};
struct expression : public node {};
} // namespace ast
} // namespace blitz
+1 -1
View File
@@ -1,5 +1,5 @@
// AUTOGENERATED COPYRIGHT HEADER START
// Copyright (C) 2024 Michael Fabian 'Xaymar' Dirks <info@xaymar.com>
// Copyright (C) 2024-2025 Michael Fabian 'Xaymar' Dirks <info@xaymar.com>
// AUTOGENERATED COPYRIGHT HEADER END
#include "error.hpp"
#include <cstdarg>
+1 -1
View File
@@ -1,5 +1,5 @@
// AUTOGENERATED COPYRIGHT HEADER START
// Copyright (C) 2024 Michael Fabian 'Xaymar' Dirks <info@xaymar.com>
// Copyright (C) 2024-2025 Michael Fabian 'Xaymar' Dirks <info@xaymar.com>
// AUTOGENERATED COPYRIGHT HEADER END
#pragma once
#include <cinttypes>
+227 -251
View File
@@ -1,10 +1,11 @@
/// AUTOGENERATED COPYRIGHT HEADER START
// Copyright (C) 2017-2024 Michael Fabian 'Xaymar' Dirks <info@xaymar.com>
// Copyright (C) 2017-2025 Michael Fabian 'Xaymar' Dirks <info@xaymar.com>
// AUTOGENERATED COPYRIGHT HEADER END
#include "lexer.hpp"
#include <codecvt>
#include <cstdarg>
#include <sstream>
#include "util.hpp"
std::string blitz::token::to_string()
{
@@ -67,7 +68,7 @@ blitz::lexer::~lexer() {}
blitz::lexer::lexer(std::filesystem::path file)
{
// Usually files start at line 1 and character 1, so we should start there too.
_location = {1, 1};
_location = { 1, 1 };
// Try and open the file for reading.
_file = file;
@@ -77,10 +78,10 @@ blitz::lexer::lexer(std::filesystem::path file)
}
// Initialize token storage to a default token.
_override = _current = blitz::token{
.location = {0, 0},
_next = _current = blitz::token{
.location = { 0, 0 },
.text = "",
.type = token::variant::UNKNOWN,
.type = token::variant::NONE,
};
}
@@ -92,121 +93,89 @@ blitz::token blitz::lexer::current()
blitz::token blitz::lexer::next()
{
_current = peek();
_next = blitz::token{
.location = { 0, 0 },
.text = "",
.type = token::variant::NONE,
};
return _current;
}
blitz::token blitz::lexer::peek()
{
enum class stage {
DEFAULT,
TEXT,
NUMBER,
STRING,
COMMENT,
} state = stage::DEFAULT;
if (_next.type == blitz::token::variant::NONE) {
// ToDo: Optimize
enum class stage {
DEFAULT,
TEXT,
NUMBER,
STRING,
COMMENT,
} state = stage::DEFAULT;
std::stringstream buffer;
blitz::token token{
.location = _location,
.text = "",
.type = blitz::token::variant::UNKNOWN,
};
std::stringstream buffer;
blitz::token token{
.location = _location,
.text = "",
.type = blitz::token::variant::UNKNOWN,
};
auto issymbol = [](int chr) {
switch (chr) {
case ';': // Comment
case ':': // Command Separator
case '=': // Equal
case '<': // Less Than
case '>': // Greater Than
case '~': // Bitwise Not
case '^': // Exponential (X ^ Y = pow(X, Y))
case '+': // Plus
case '-': // Minus
case '*': // Multiply
case '/': // Divide
case ',': // Parameter Separation
case '%': // Integer Type
case '#': // Real Type
case '$': // String Type
case '.': // Structured Type
case '\\': // Structured Type Access
// Blitz Arrays
case '[':
case ']':
// Call, Grouping, Dim
case '(':
case ')':
return true;
default:
return false;
auto issymbol = [](int chr) { return blitz::utility::is_symbol(chr); };
auto iswhitespace = [](int chr) { return blitz::utility::is_white_space(chr); };
// ToDo: Figure out why we don't ever hit chr == EOF.
if (_stream.eof()) {
token.location = _location;
token.type = blitz::token::variant::ENDOFFILE;
return token;
}
return false;
};
auto iswhitespace = [](int chr) {
switch (chr) {
case ' ':
case '\t':
return true;
default:
return false;
}
return false;
};
// ToDo: Figure out why we don't ever hit chr == EOF.
if (_stream.eof()) {
token.location = _location;
token.type = blitz::token::variant::ENDOFFILE;
return token;
}
bool complete = false;
while (!complete && _stream.good()) {
// Peek at the current byte, without advancing the read pointer until we need to.
auto chr = _stream.peek();
bool is_newline = (chr == '\r') || (chr == '\n');
bool is_returnfeed = (chr == '\r');
bool complete = false;
while (!complete && _stream.good()) {
// Peek at the current byte, without advancing the read pointer until we need to.
auto chr = _stream.peek();
bool is_newline = (chr == '\r') || (chr == '\n');
bool is_returnfeed = (chr == '\r');
if (state == stage::DEFAULT) {
if (chr == EOF) {
token.type = blitz::token::variant::ENDOFFILE;
token.text = "";
token.location = _location;
complete = true;
_location.second++;
} else if (is_newline) {
// New Line, should be handled like a control character, but with some special things.
token.type = blitz::token::variant::NEWLINE;
token.text = "\n";
token.location = _location;
complete = true;
if (state == stage::DEFAULT) {
if (chr == EOF) {
token.type = blitz::token::variant::ENDOFFILE;
token.text = "";
token.location = _location;
complete = true;
_location.second++;
} else if (is_newline) {
// New Line, should be handled like a control character, but with some special things.
token.type = blitz::token::variant::NEWLINE;
token.text = "\n";
token.location = _location;
complete = true;
// Advance the read pointer.
_stream.get();
// Is this a Windows-style \r\n?
if (is_returnfeed && (_stream.peek() == '\n')) {
// If so, advance the read pointer again.
// Advance the read pointer.
_stream.get();
}
// Then update the location.
_location.first++;
_location.second = 1;
} else if (iswhitespace(chr)) {
// This is white space, which we'll happily ignore.
_stream.get();
_location.second++;
} else if (chr < 32) {
// Likely to be a control character.
token.location = _location;
token.type = blitz::token::variant::CONTROL;
token.text = {1, char(chr)};
complete = true;
_stream.get();
_location.second++;
/*} else if (chr == ':') {
// Is this a Windows-style \r\n?
if (is_returnfeed && (_stream.peek() == '\n')) {
// If so, advance the read pointer again.
_stream.get();
}
// Then update the location.
_location.first++;
_location.second = 1;
} else if (iswhitespace(chr)) {
// This is white space, which we'll happily ignore.
_stream.get();
_location.second++;
} else if (chr < 32) {
// Likely to be a control character.
token.location = _location;
token.type = blitz::token::variant::CONTROL;
token.text = { 1, char(chr) };
complete = true;
_stream.get();
_location.second++;
/*} else if (chr == ':') {
// Allows code writers to pretend it's all one line.
token.location = _location;
token.type = blitz::token::variant::SEPARATOR;
@@ -214,165 +183,172 @@ blitz::token blitz::lexer::peek()
complete = true;
_stream.get();
_location.second++;*/
} else if (chr == ';') {
// A comment, which ends at the next new line.
state = stage::COMMENT;
token.location = _location;
token.type = blitz::token::variant::COMMENT;
} else if (isdigit(chr)) {
// Probably an Integer, or if the latter, it's a Real.
state = stage::NUMBER;
token.location = _location;
token.type = blitz::token::variant::INTEGER;
} else if (isalpha(chr)) {
// Text of some kind.
state = stage::TEXT;
token.location = _location;
token.type = blitz::token::variant::TEXT;
} else if (chr == '"') {
// A quoted string.
state = stage::STRING;
token.location = _location;
token.type = blitz::token::variant::STRING;
// Advance so we actually get anywhere.
_stream.get();
_location.second++;
} else if (issymbol(chr)) {
// Special Handling for a few symbols that could mean multiple things.
if (chr == '.') { // '.' can start a Real, Label or Structured Type Access. We don't want to decide on the latter here, that's a parser thing.
buffer << (char)chr;
// We advance the read pointer here to look at what's coming next.
_stream.get();
chr = _stream.peek();
_location.second++;
// Peek at what's coming next.
if (isdigit(chr)) {
// This is a Real number.
token.location = _location;
token.type = blitz::token::variant::REAL;
state = stage::NUMBER;
} else {
// Assume this is a symbol and return to normal behavior.
token.location = _location;
token.text = buffer.str();
token.type = blitz::token::variant::SYMBOL;
complete = true;
}
} else if ((chr == '+') || (chr == '-')) { // '+' & '-' could be prefixes to an Integer or Real.
buffer << (char)chr;
// Advance the read pointer to peek at the future.
_stream.get();
chr = _stream.peek();
_location.second++;
// Peek at what's coming up.
if (isdigit(chr) || (chr == '.')) { // Likely to be a Real or Integer.
token.location = _location;
if (chr == '.') {
token.type = blitz::token::variant::REAL;
} else {
token.type = blitz::token::variant::INTEGER;
}
state = stage::NUMBER;
} else {
token.location = _location;
token.text = buffer.str();
token.type = blitz::token::variant::SYMBOL;
complete = true;
}
} else {
} else if (chr == ';') {
// A comment, which ends at the next new line.
state = stage::COMMENT;
token.location = _location;
token.text = {1, char(chr)};
token.type = blitz::token::variant::SYMBOL;
complete = true;
token.type = blitz::token::variant::COMMENT;
} else if (isdigit(chr)) {
// Probably an Integer, or if the latter, it's a Real.
state = stage::NUMBER;
token.location = _location;
token.type = blitz::token::variant::INTEGER;
} else if (isalpha(chr)) {
// Text of some kind.
state = stage::TEXT;
token.location = _location;
token.type = blitz::token::variant::TEXT;
} else if (chr == '"') {
// A quoted string.
state = stage::STRING;
token.location = _location;
token.type = blitz::token::variant::STRING;
// Advance so we actually get anywhere.
_stream.get();
_location.second++;
}
} else {
// Everything else is an error
throw blitz::error(_file, _location, _location, "You've encountered a bug. Please report this with the file that caused it.");
}
} else if (state == stage::NUMBER) {
if ((chr == EOF) || (chr < 32) || is_newline || iswhitespace(chr) || (chr == ';')) {
// EOF, Control, NL, Whitespace, and Comments should return to default parsing.
complete = true;
} else if (isdigit(chr) || (chr == '.')) {
_stream.get();
buffer << (char)chr;
if (chr == '.') {
if (token.type != token::variant::REAL) {
token.type = blitz::token::variant::REAL;
} else if (issymbol(chr)) {
// Special Handling for a few symbols that could mean multiple things.
if (chr == '.') { // '.' can start a Real, Label or Structured Type Access. We don't want to decide on the latter here, that's a parser thing.
buffer << (char)chr;
// We advance the read pointer here to look at what's coming next.
_stream.get();
chr = _stream.peek();
_location.second++;
// Peek at what's coming next.
if (isdigit(chr)) {
// This is a Real number.
token.location = _location;
token.type = blitz::token::variant::REAL;
state = stage::NUMBER;
} else {
// Assume this is a symbol and return to normal behavior.
token.location = _location;
token.text = buffer.str();
token.type = blitz::token::variant::SYMBOL;
complete = true;
}
} else if ((chr == '+') || (chr == '-')) { // '+' & '-' could be prefixes to an Integer or Real.
buffer << (char)chr;
// Advance the read pointer to peek at the future.
_stream.get();
chr = _stream.peek();
_location.second++;
// Peek at what's coming up.
if (isdigit(chr) || (chr == '.')) { // Likely to be a Real or Integer.
token.location = _location;
if (chr == '.') {
token.type = blitz::token::variant::REAL;
} else {
token.type = blitz::token::variant::INTEGER;
}
state = stage::NUMBER;
} else {
token.location = _location;
token.text = buffer.str();
token.type = blitz::token::variant::SYMBOL;
complete = true;
}
} else {
token.text = buffer.str();
throw blitz::error(_file, token.location, _location, blitz::format("In token %s: Expected [0-9], got '%s' instead.", token.to_string().c_str(), std::string{1, (char)chr}.c_str()));
token.location = _location;
token.text = { char(chr) };
token.type = blitz::token::variant::SYMBOL;
complete = true;
// Advance so we actually get anywhere.
_stream.get();
_location.second++;
}
} else {
// Everything else is an error
throw blitz::error(_file, _location, _location, "You've encountered a bug. Please report this with the file that caused it.");
}
} else if (state == stage::NUMBER) {
if ((chr == EOF) || (chr < 32) || is_newline || iswhitespace(chr) || (chr == ';')) {
// EOF, Control, NL, Whitespace, and Comments should return to default parsing.
complete = true;
} else if (isdigit(chr) || (chr == '.') || (chr == 'b') || (chr == 'x')) {
_stream.get();
buffer << (char)chr;
if (chr == '.') {
if (token.type != token::variant::REAL) {
token.type = blitz::token::variant::REAL;
} else {
token.text = buffer.str();
throw blitz::error(_file, token.location, _location, blitz::format("In token %s: Expected [0-9], got '%s' instead.", token.to_string().c_str(), std::string{ 1, (char)chr }.c_str()));
}
}
} else if (issymbol(chr)) {
complete = true;
} else {
token.text = buffer.str();
throw blitz::error(_file, token.location, _location, blitz::format("In token %s: Expected ([0](b|x|))[0-9.], got '%s' instead.", token.to_string().c_str(), std::string{ 1, (char)chr }.c_str()));
}
} else if (issymbol(chr)) {
complete = true;
} else {
token.text = buffer.str();
throw blitz::error(_file, token.location, _location, blitz::format("In token %s: Expected [0-9.], got '%s' instead.", token.to_string().c_str(), std::string{1, (char)chr}.c_str()));
}
if (complete) {
token.text = buffer.str();
}
} else if (state == stage::TEXT) {
if ((chr == EOF) || (chr < 32) || is_newline || iswhitespace(chr) || issymbol(chr)) {
// Return to default parsing.
complete = true;
} else if (isalpha(chr) || isdigit(chr) || (chr == '_')) {
buffer << (char)chr;
_stream.get();
_location.second++;
} else {
token.text = buffer.str();
throw blitz::error(_file, token.location, _location, blitz::format("In token %s: Expected [a-zA-Z0-9_], got '%s' instead.", token.to_string().c_str(), std::string{1, (char)chr}.c_str()));
}
if (complete) {
token.text = buffer.str();
}
} else if (state == stage::TEXT) {
if ((chr == EOF) || (chr < 32) || is_newline || iswhitespace(chr) || issymbol(chr)) {
// Return to default parsing.
complete = true;
} else if (isalpha(chr) || isdigit(chr) || (chr == '_')) {
buffer << (char)chr;
_stream.get();
_location.second++;
} else {
token.text = buffer.str();
throw blitz::error(_file, token.location, _location, blitz::format("In token %s: Expected [a-zA-Z0-9_], got '%s' instead.", token.to_string().c_str(), std::string{ 1, (char)chr }.c_str()));
}
if (complete) {
token.text = buffer.str();
}
} else if (state == stage::STRING) {
if ((chr == EOF) || (chr < 32) || is_newline) {
// Return to default parsing.
complete = true;
} else if (chr == '"') { // The only true way to end a string.
complete = true;
if (complete) {
token.text = buffer.str();
}
} else if (state == stage::STRING) {
if ((chr == EOF) || (chr < 32) || is_newline) {
// Return to default parsing.
complete = true;
} else if (chr == '"') { // The only true way to end a string.
complete = true;
// Skip over the " so we don't confuse the parser.
_stream.get();
_location.second++;
} else {
buffer << (char)chr;
_stream.get();
_location.second++;
}
// Skip over the " so we don't confuse the parser.
_stream.get();
_location.second++;
} else {
buffer << (char)chr;
_stream.get();
_location.second++;
}
if (complete) {
token.text = buffer.str();
}
} else if (state == stage::COMMENT) {
if ((chr == EOF) || (chr < 32) || is_newline) {
// Return to default parsing at this point.
complete = true;
} else {
buffer << (char)chr;
_stream.get();
_location.second++;
}
if (complete) {
token.text = buffer.str();
}
} else if (state == stage::COMMENT) {
if ((chr == EOF) || (chr < 32) || is_newline) {
// Return to default parsing at this point.
complete = true;
} else {
buffer << (char)chr;
_stream.get();
_location.second++;
}
if (complete) {
token.text = buffer.str();
if (complete) {
token.text = buffer.str();
}
}
}
_next = token;
}
return token;
return _next;
}
std::filesystem::path blitz::lexer::file()
{
return std::filesystem::path(_file);
}
+6 -2
View File
@@ -1,5 +1,5 @@
/// AUTOGENERATED COPYRIGHT HEADER START
// Copyright (C) 2017-2024 Michael Fabian 'Xaymar' Dirks <info@xaymar.com>
// Copyright (C) 2017-2025 Michael Fabian 'Xaymar' Dirks <info@xaymar.com>
// AUTOGENERATED COPYRIGHT HEADER END
#pragma once
#include <cinttypes>
@@ -21,6 +21,7 @@ namespace blitz {
std::pair<uint64_t, uint64_t> location;
std::string text;
enum class variant : uint64_t {
NONE, // There is no token here.
UNKNOWN, // We have absolutely no fucking clue.
ENDOFFILE, // End of the file.
NEWLINE, // New Line.
@@ -48,7 +49,7 @@ namespace blitz {
std::pair<uint64_t, uint64_t> _location;
blitz::token _current;
blitz::token _override;
blitz::token _next;
public:
~lexer();
@@ -69,5 +70,8 @@ namespace blitz {
* The current token will remain in-tact.
*/
blitz::token peek();
public:
std::filesystem::path file();
};
} // namespace blitz
+15 -9
View File
@@ -1,20 +1,21 @@
// AUTOGENERATED COPYRIGHT HEADER START
// Copyright (C) 2017-2024 Michael Fabian 'Xaymar' Dirks <info@xaymar.com>
// Copyright (C) 2017-2025 Michael Fabian 'Xaymar' Dirks <info@xaymar.com>
// AUTOGENERATED COPYRIGHT HEADER END
#include <clocale>
#include <iostream>
#include "compiler.hpp"
#include "error.hpp"
#include "lexer.hpp"
#include "parser.hpp"
int main(int argc, char** argv)
{
std::setlocale(LC_ALL, "en_US.UTF-8");
std::cout << argv[1] << std::endl;
blitz::lexer lex(argv[1]);
try {
std::setlocale(LC_ALL, "en_US.UTF-8");
std::cout << argv[1] << std::endl;
blitz::lexer lex(argv[1]);
for (blitz::token token = lex.next(); (token.type != blitz::token::variant::ENDOFFILE); token = lex.next()) {
switch (token.type) {
case blitz::token::variant::COMMENT:
@@ -44,15 +45,19 @@ int main(int argc, char** argv)
std::cin.get();
}
}
blitz::parser pars(argv[1]);
//std::cin.get();
return 0;
} catch (blitz::error const& ex) {
std::cout << ex.file() << std::endl;
std::cout << "Line " << ex.at().first << ", Char " << ex.at().second << ": " << ex.what() << std::endl;
return 1;
} catch (std::runtime_error const& ex) {
std::cout << ex.what() << std::endl;
return 1;
}
//std::cin.get();
return 0;
}
// BlitzBasic is a strange but powerful language in the right hands. While it has
@@ -79,6 +84,7 @@ int main(int argc, char** argv)
//
// 3. Function calls don't always need Parenthesis:
// ```
// Local myName
// Function myName() : End Function
// If myName() Then : EndIf ; <- Calls myName
// myName ; <- Calls myName, because there is no = after it.
+143 -94
View File
@@ -1,5 +1,5 @@
/// AUTOGENERATED COPYRIGHT HEADER START
// Copyright (C) 2024 Michael Fabian 'Xaymar' Dirks <info@xaymar.com>
// Copyright (C) 2024-2025 Michael Fabian 'Xaymar' Dirks <info@xaymar.com>
// AUTOGENERATED COPYRIGHT HEADER END
#include "parser.hpp"
#include <algorithm>
@@ -12,101 +12,150 @@ blitz::parser::~parser() {}
blitz::parser::parser(std::filesystem::path file) : _file(file), _lexer(), _expr()
{
_lexer = std::make_shared<blitz::lexer>(file);
_lexer->next();
blitz::ast::variable::try_parse(_lexer);
}
std::shared_ptr<blitz::ast::node> blitz::parser::current()
{
return _expr;
}
std::shared_ptr<blitz::ast::node> blitz::parser::next()
{
// This should return an entire "line" of expressions in one go, i.e.:
// 1. Local a = 1, b = a, c = b+a
// -> Local(Variable(a, Expression(Integer(1))), Variable(b, Expresssion(Variable(a))), Variable(c, Expression(Add(Variable(b), Variable(a)))
// 2. Include "HelloWorld.bb"
// -> Include(String("HelloWorld.bb"))
// 3. Function HelloWorld()
// -> Function(HelloWorld, ...)(
// Not quite sure if the above makes sense, we'd be returning many expressions outside of functions, but only one inside a function? Why even bother with the current/next crap then?
// Handling Include becomes a problem too. I guess we should actually return expressions on a line by line basis, and let the "compiler" figure out scope and stuff.
//
//std::shared_ptr<blitz::ast::node> blitz::parser::current()
//{
// return _expr;
//}
//
//std::shared_ptr<blitz::ast::node> blitz::parser::next()
//{
// // This should return an entire "line" of expressions in one go, i.e.:
// // 1. Local a = 1, b = a, c = b+a
// // -> Local(Variable(a, Expression(Integer(1))), Variable(b, Expresssion(Variable(a))), Variable(c, Expression(Add(Variable(b), Variable(a)))
// // 2. Include "HelloWorld.bb"
// // -> Include(String("HelloWorld.bb"))
// // 3. Function HelloWorld()
// // -> Function(HelloWorld, ...)(
// // Not quite sure if the above makes sense, we'd be returning many expressions outside of functions, but only one inside a function? Why even bother with the current/next crap then?
// // Handling Include becomes a problem too. I guess we should actually return expressions on a line by line basis, and let the "compiler" figure out scope and stuff.
//
// // Grab the next token to figure out what behavior we should have.
// while (true) {
// auto token = _lexer->next();
// try {
// switch (token.type) {
// case blitz::token::variant::ENDOFFILE:
// // End of file means there's nothing left to parse.
// _expr.reset();
// return nullptr;
// case blitz::token::variant::COMMENT:
// case blitz::token::variant::NEWLINE:
// case blitz::token::variant::SEPARATOR:
// // Ignore some things that aren't very useful right now.
// continue;
// case blitz::token::variant::TEXT:
// return try_parse(token);
// default:
// throw nullptr;
// }
// } catch (blitz::error const& ex) {
// throw ex;
// } catch (std::exception const& ex) {
// throw new blitz::error(_file, token.location, token.location, ex.what());
// } catch (...) {
// throw new blitz::error(_file, token.location, token.location, blitz::format("Token %s unexpected at this point.", token.to_string().c_str()));
// }
// }
//}
//
//std::shared_ptr<blitz::ast::node> blitz::parser::try_parse(blitz::token token)
//{
// // ToDo: Switch to a proper Unicode library. Maybe Boost?
// std::string ltext;
// std::transform(token.text.begin(), token.text.end(), ltext.begin(), [](std::string::value_type c) { return std::tolower(c); });
//
// if ((ltext == "local") || (ltext == "global")) {
// // Local/Global have the same parsing, but different functionality.
// // Should be:
// // Text Text [Symbol(=) Expression] [Symbol(,) Text [Symbol(=) Expression] [Symbol(,) ...]]
//
// } else if (ltext == "function") {
// //Example:
// // Function FunctionName[$,%,#,:TypeName,.StructName]([Variable[, Variable=Value[, ...]])
// // [Function Content ...]
// // EndFunction
//
//
// } else if (ltext == "select") {
// } else if (ltext == "case") {
// } else if (ltext == "endselect") {
// } else if (ltext == "if") {
// } else if (ltext == "elif") {
// } else if (ltext == "endif") {
//
// } else if (ltext == "end") {
// }
//
// return nullptr;
//}
//
//std::shared_ptr<blitz::ast::node> blitz::parser::try_parse_expression() {
// // () + - / * = <> > < String Integer Float Variable
//}
//
//std::shared_ptr<blitz::ast::node> blitz::parser::try_parse_variable_expression()
//{
// // Text [Symbol(=) Expression(...)] [Symbol(,) [Text [Symbol(=) Expression(...)]]]
//
// auto label = _lexer->next();
// if (label != blitz::token::variant::TEXT) {
// throw new blitz::error(_file, label.location, label.location, blitz::format("Unexpected %s, expected Text.", label.to_string().c_str()));
// }
//
// auto node = std::make_shared<blitz::ast::variable>(label);
//
// auto operand = _lexer->next();
// if (operand == "=") {
// //node->set_value(try_parse_expression());
// } else if (operand == blitz::token::variant::NEWLINE || operand == blitz::token::variant::SEPARATOR || (operand == blitz::token::variant::SYMBOL && operand == ",")) {
// return node;
// } else {
// throw new blitz::error(_file, label.location, operand.location, blitz::format("Unexpected %s, expected Symbol(=), NewLine, Separator, or Symbol(,).", operand.to_string().c_str()));
// }
//
// return node;
//}
// Grab the next token to figure out what behavior we should have.
while (true) {
auto token = _lexer->next();
try {
switch (token.type) {
case blitz::token::variant::ENDOFFILE:
// End of file means there's nothing left to parse.
_expr.reset();
return nullptr;
case blitz::token::variant::COMMENT:
case blitz::token::variant::NEWLINE:
case blitz::token::variant::SEPARATOR:
// Ignore some things that aren't very useful right now.
continue;
case blitz::token::variant::TEXT:
return try_parse(token);
default:
throw nullptr;
}
} catch (blitz::error const& ex) {
throw ex;
} catch (std::exception const& ex) {
throw new blitz::error(_file, token.location, token.location, ex.what());
} catch (...) {
throw new blitz::error(_file, token.location, token.location, blitz::format("Token %s unexpected at this point.", token.to_string().c_str()));
}
}
}
std::shared_ptr<blitz::ast::node> blitz::parser::try_parse(blitz::token token)
{
// ToDo: Switch to a proper Unicode library. Maybe Boost?
std::string ltext;
std::transform(token.text.begin(), token.text.end(), ltext.begin(), [](std::string::value_type c) { return std::tolower(c); });
if ((ltext == "local") || (ltext == "global")) {
// Local/Global have the same parsing, but different functionality.
// Should be:
// Text Text [Symbol(=) Expression] [Symbol(,) Text [Symbol(=) Expression] [Symbol(,) ...]]
} else if (ltext == "global") {
// Global ...
} else if (ltext == "function") {
} else if (ltext == "select") {
} else if (ltext == "case") {
} else if (ltext == "endselect") {
} else if (ltext == "if") {
} else if (ltext == "elif") {
} else if (ltext == "endif") {
} else if (ltext == "end") {
}
return nullptr;
}
std::shared_ptr<blitz::ast::node> blitz::parser::try_parse_variable()
{
// Text [Symbol(=) Expression(...)] [Symbol(,) [Text [Symbol(=) Expression(...)]]]
auto label = _lexer->next();
if (label != blitz::token::variant::TEXT) {
throw new blitz::error(_file, label.location, label.location, blitz::format("Unexpected %s, expected Text.", label.to_string().c_str()));
}
auto node = std::make_shared<blitz::ast::variable>(label);
auto operand = _lexer->next();
if (operand == "=") {
//node->set_value(try_parse_expression());
} else if (operand == blitz::token::variant::NEWLINE || operand == blitz::token::variant::SEPARATOR || (operand == blitz::token::variant::SYMBOL && operand == ",")) {
return node;
} else {
throw new blitz::error(_file, label.location, operand.location, blitz::format("Unexpected %s, expected Symbol(=), NewLine, Separator, or Symbol(,).", operand.to_string().c_str()));
}
return node;
}
/* Expressions
*
* Example Locations:
* - Local Var = Expression
* - Var = Expression
* - myFunction(Expression, ...)
* - If Expression Then
*
* Example Expressions:
* - 0 + 0, 0 - 0, 0 * 0, 0 / 0, 0 Shr 0, 0 Shl 0, 0 And 0, 0 Or 0, Not 0,
* -
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*/
+1 -1
View File
@@ -1,5 +1,5 @@
/// AUTOGENERATED COPYRIGHT HEADER START
// Copyright (C) 2017-2024 Michael Fabian 'Xaymar' Dirks <info@xaymar.com>
// Copyright (C) 2017-2025 Michael Fabian 'Xaymar' Dirks <info@xaymar.com>
// AUTOGENERATED COPYRIGHT HEADER END
#pragma once
#include <filesystem>
+70
View File
@@ -0,0 +1,70 @@
// AUTOGENERATED COPYRIGHT HEADER START
// Copyright (C) 2025 Michael Fabian 'Xaymar' Dirks <info@xaymar.com>
// AUTOGENERATED COPYRIGHT HEADER END
#include "types.hpp"
#include <algorithm>
#include <cctype>
const std::pair<const char*, blitz::types::type> _map_to[] = {
{ "byte", blitz::types::type::INT8 }, //
{ "int8", blitz::types::type::INT8 }, //
{ "ubyte", blitz::types::type::UINT8 }, //
{ "uint8", blitz::types::type::UINT8 }, //
{ "short", blitz::types::type::INT16 }, //
{ "int16", blitz::types::type::INT16 }, //
{ "ushort", blitz::types::type::UINT16 }, //
{ "uint16", blitz::types::type::UINT16 }, //
{ "int", blitz::types::type::INT32 }, //
{ "int32", blitz::types::type::INT32 }, //
{ "uint", blitz::types::type::UINT32 }, //
{ "uint32", blitz::types::type::UINT32 }, //
{ "long ", blitz::types::type::INT64 }, //
{ "int64", blitz::types::type::INT64 }, //
{ "ulong", blitz::types::type::UINT64 }, //
{ "uint64", blitz::types::type::UINT64 }, //
{ "half", blitz::types::type::FLOAT16 }, //
{ "float16", blitz::types::type::FLOAT16 }, //
{ "real16", blitz::types::type::FLOAT16 }, //
{ "single", blitz::types::type::FLOAT32 }, //
{ "float", blitz::types::type::FLOAT32 }, //
{ "float32", blitz::types::type::FLOAT32 }, //
{ "real", blitz::types::type::FLOAT32 }, //
{ "real32", blitz::types::type::FLOAT32 }, //
{ "double", blitz::types::type::DOUBLE }, //
{ "float64", blitz::types::type::DOUBLE }, //
{ "real64", blitz::types::type::DOUBLE }, //
{ "string", blitz::types::type::STRING }, //
};
std::string blitz::types::to_string(blitz::types::type type)
{
if (type == type::STRUCT) {
return "struct";
}
for (auto kv : _map_to) {
if (type == kv.second) {
return kv.first;
}
}
return "Unknown";
}
blitz::types::type blitz::types::from_string(std::string text)
{
std::transform(text.cbegin(), text.cend(), text.begin(), [](char from) {
if (from & 0b10000000) { // Exclude Unicode
return from;
}
return (char)std::tolower(from);
});
for (auto kv : _map_to) {
if (text == kv.first) {
return kv.second;
}
}
return blitz::types::type::UNKNOWN;
}
+52
View File
@@ -0,0 +1,52 @@
// AUTOGENERATED COPYRIGHT HEADER START
// Copyright (C) 2025 Michael Fabian 'Xaymar' Dirks <info@xaymar.com>
// AUTOGENERATED COPYRIGHT HEADER END
#pragma once
#include <cinttypes>
#include <string>
namespace blitz {
namespace types {
enum class type : uint8_t {
UNKNOWN,
// 8-bit Integers
INT8,
BYTE = INT8,
UINT8,
UBYTE = UINT8,
// 16-bit Integers
INT16,
SHORT = INT16,
UINT16,
USHORT = UINT16,
// 32-bit Integers
INT32,
INT = INT32,
UINT32,
UINT = UINT32,
// 64-bit Integers
INT64,
LONG = INT64,
UINT64,
ULONG = UINT64,
// 16-bit Float
FLOAT16,
HALF = FLOAT16,
// 32-bit Float
FLOAT32,
FLOAT = FLOAT32,
SINGLE = FLOAT32,
// 64-bit Float
FLOAT64,
DOUBLE = FLOAT64,
// UTF-8 String
STRING,
// User-defined Struct
STRUCT,
};
std::string to_string(blitz::types::type type);
blitz::types::type from_string(std::string text);
};
} // namespace blitz
+59
View File
@@ -0,0 +1,59 @@
// AUTOGENERATED COPYRIGHT HEADER START
// Copyright (C) 2025 Michael Fabian 'Xaymar' Dirks <info@xaymar.com>
// AUTOGENERATED COPYRIGHT HEADER END
#include "util.hpp"
#include <cctype>
bool blitz::utility::is_symbol(int code)
{
switch (chr) {
case ';': // Comment
case ':': // Command Separator
case '=': // Equal
case '<': // Less Than
case '>': // Greater Than
case '~': // Bitwise Not
case '^': // Exponential (X ^ Y = pow(X, Y))
case '+': // Plus
case '-': // Minus
case '*': // Multiply
case '/': // Divide
case ',': // Parameter Separation
case '%': // Integer Type
case '#': // Real Type
case '$': // String Type
case '.': // Structured Type
case '\\': // Structured Type Access
case '[': // Blitz Arrays
case ']':
case '(': // Call, Grouping, Dim
case ')':
return true;
default:
return false;
}
return false;
}
bool blitz::utility::is_white_space(int code)
{
switch (chr) {
case ' ':
case '\t':
return true;
default:
return false;
}
return false;
}
bool blitz::utility::is_digit(int code)
{
return isdigit(code);
}
bool blitz::utility::is_alpha(int code) {
return isalpha(code);
}
+13
View File
@@ -0,0 +1,13 @@
// AUTOGENERATED COPYRIGHT HEADER START
// Copyright (C) 2025 Michael Fabian 'Xaymar' Dirks <info@xaymar.com>
// AUTOGENERATED COPYRIGHT HEADER END
namespace blitz::utility {
bool is_symbol(int code);
bool is_white_space(int code);
bool is_digit(int code);
bool is_alpha(int code);
} // namespace blitz::utility
+4
View File
@@ -0,0 +1,4 @@
; AUTOGENERATED COPYRIGHT HEADER START
; Copyright (C) 2025 Michael Fabian 'Xaymar' Dirks <info@xaymar.com>
; AUTOGENERATED COPYRIGHT HEADER END
Variable:String