Lexer done for now, moving on to ast
This commit is contained in:
+3
-2
@@ -208,11 +208,12 @@ function(init_project TARGET)
|
||||
endif()
|
||||
|
||||
if(MSVC) # Microsoft Visual C/C++
|
||||
target_compile_options(${TARGET} PRIVATE
|
||||
target_compile_definitions(${TARGET} PRIVATE
|
||||
# Disable useless/terrible behavior from MSVC
|
||||
_CRT_SECURE_NO_WARNINGS
|
||||
_ENABLE_EXTENDED_ALIGNED_STORAGE
|
||||
|
||||
)
|
||||
target_compile_options(${TARGET} PRIVATE
|
||||
# Dynamically link Microsoft C/C++ Redistributable.
|
||||
$<$<CONFIG:>:/MD>
|
||||
$<$<CONFIG:Debug>:/MDd>
|
||||
|
||||
@@ -1 +1,29 @@
|
||||
// AUTOGENERATED COPYRIGHT HEADER START
|
||||
// Copyright (C) 2017-2024 Michael Fabian 'Xaymar' Dirks <info@xaymar.com>
|
||||
// AUTOGENERATED COPYRIGHT HEADER END
|
||||
#include "ast.hpp"
|
||||
#include <cstdlib>
|
||||
|
||||
|
||||
blitz::ast::value_expression::value_expression(blitz::token token) : expression(token) {}
|
||||
|
||||
blitz::ast::integer_expression::integer_expression(blitz::token token) : value_expression(token), _value(0)
|
||||
{
|
||||
if (_token.text.length() > 0) {
|
||||
_value = atol(_token.text.c_str());
|
||||
}
|
||||
}
|
||||
|
||||
blitz::ast::real_expression::real_expression(blitz::token token) : value_expression(token), _value(0.0f)
|
||||
{
|
||||
if (_token.text.length() > 0) {
|
||||
_value = atof(_token.text.c_str());
|
||||
}
|
||||
}
|
||||
|
||||
blitz::ast::string_expression::string_expression(blitz::token token) : value_expression(token), _value()
|
||||
{
|
||||
_value = _token.text;
|
||||
}
|
||||
|
||||
blitz::ast::variable_expression::variable_expression(blitz::token token) : expression(token) {}
|
||||
|
||||
@@ -2,12 +2,113 @@
|
||||
// Copyright (C) 2017-2024 Michael Fabian 'Xaymar' Dirks <info@xaymar.com>
|
||||
// AUTOGENERATED COPYRIGHT HEADER END
|
||||
#pragma once
|
||||
#include <list>
|
||||
#include <memory>
|
||||
#include <optional>
|
||||
#include <string>
|
||||
#include "../lexer.hpp"
|
||||
|
||||
// BlitzBasic Built-Ins
|
||||
// - Include: Followed by a String, which is the file to include at this location.
|
||||
// - Const: Defines one or more constants, which can not be modified.
|
||||
// - Local: Defines one or more local variables, which can be modified.
|
||||
// - Global: Defines one or more global variables, which can be modified from anywhere.
|
||||
// - Dim: Defines a dynamically resizable array, globally accessible and modifyable.
|
||||
// - If, Then, ElseIf, EndIf: Your classic If.
|
||||
// - Select, Case, Default, End Select: Your classic Select.
|
||||
// - For, To, Step, Next: Your classic For loop.
|
||||
// - While, Wend: Your classic While loop.
|
||||
// - Repeat, Until, Forever: Similar to While/Wend, but the condition is at the end and will only loop if the condition is false.
|
||||
// - Exit: Exit from any currently active loops. I guess this allows leaving Repeat/Forever combinations.
|
||||
// - True, False: True and False. What, did you expect something else?
|
||||
// - Stop: Pause the program for the debugger, if there happens to be one.
|
||||
// - End: Terminate the program.
|
||||
// - Goto: Jump to a specific label, without setting the return address.
|
||||
// - Gosub, Return: Jump to a specific label (subroutine), setting the return address appropriately, and Return from it eventually.
|
||||
// - Function, Return, End Function: Defines a function, and allows returning values. Yes, I know, End itself terminates the program, this is a special case. Thanks younger Sibly.
|
||||
// - And, Or, Not: Logical operator, self-explanatory really.
|
||||
|
||||
namespace blitz {
|
||||
namespace ast {
|
||||
class expression {
|
||||
public:
|
||||
virtual ~expression() {};
|
||||
virtual ~expression() = default;
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
// Values
|
||||
class value_expression : public expression {
|
||||
protected:
|
||||
blitz::token _token;
|
||||
|
||||
public:
|
||||
virtual ~value_expression() = default;
|
||||
value_expression(blitz::token token);
|
||||
};
|
||||
|
||||
class integer_expression : public value_expression {
|
||||
protected:
|
||||
int32_t _value;
|
||||
|
||||
public:
|
||||
virtual ~integer_expression() = default;
|
||||
integer_expression(blitz::token token);
|
||||
};
|
||||
|
||||
class real_expression : public value_expression {
|
||||
protected:
|
||||
float _value;
|
||||
|
||||
public:
|
||||
virtual ~real_expression() = default;
|
||||
real_expression(blitz::token token);
|
||||
};
|
||||
|
||||
class string_expression : public value_expression {
|
||||
std::string _value;
|
||||
|
||||
public:
|
||||
virtual ~string_expression() = default;
|
||||
string_expression(blitz::token token);
|
||||
};
|
||||
|
||||
/** One or more constant values
|
||||
*
|
||||
* Const var = Value, var2 = value
|
||||
*/
|
||||
class const_expression : public expression {
|
||||
std::list<std::shared_ptr<variable_expression>> _values;
|
||||
|
||||
};
|
||||
|
||||
/** One or more local variables
|
||||
*
|
||||
* Local var, var2 = value, var3
|
||||
*/
|
||||
class local_expression : public expression {
|
||||
std::list<std::shared_ptr<variable_expression>> _values;
|
||||
};
|
||||
|
||||
/** One or more global variables
|
||||
*
|
||||
* Local var, var2 = value, var3
|
||||
*/
|
||||
class global_expression : public expression {
|
||||
std::list<std::shared_ptr<variable_expression>> _values;
|
||||
};
|
||||
|
||||
/** A variable definition
|
||||
*
|
||||
*
|
||||
*/
|
||||
class variable_expression : public expression {
|
||||
blitz::token _assign;
|
||||
std::string _name;
|
||||
std::shared_ptr<value_expression> _value;
|
||||
|
||||
public:
|
||||
virtual ~variable_expression() = default;
|
||||
variable_expression(blitz::token token);
|
||||
};
|
||||
|
||||
} // namespace ast
|
||||
} // namespace blitz
|
||||
|
||||
@@ -17,7 +17,7 @@ blitz::ast::NumberExpression::NumberExpression(int32_t value) : value(value) {}
|
||||
blitz::ast::NumberExpression::~NumberExpression() {}
|
||||
|
||||
blitz::ast::ValueType blitz::ast::NumberExpression::GetType() {
|
||||
return ValueType::Number;
|
||||
return ValueType::INTEGER;
|
||||
}
|
||||
|
||||
blitz::ast::DecimalExpression::DecimalExpression(float_t value) : value(value) {}
|
||||
@@ -25,7 +25,7 @@ blitz::ast::DecimalExpression::DecimalExpression(float_t value) : value(value) {
|
||||
blitz::ast::DecimalExpression::~DecimalExpression() {}
|
||||
|
||||
blitz::ast::ValueType blitz::ast::DecimalExpression::GetType() {
|
||||
return ValueType::Decimal;
|
||||
return ValueType::REAL;
|
||||
}
|
||||
|
||||
blitz::ast::StringExpression::StringExpression(std::string value) : value(value) {}
|
||||
@@ -33,7 +33,7 @@ blitz::ast::StringExpression::StringExpression(std::string value) : value(value)
|
||||
blitz::ast::StringExpression::~StringExpression() {}
|
||||
|
||||
blitz::ast::ValueType blitz::ast::StringExpression::GetType() {
|
||||
return ValueType::String;
|
||||
return ValueType::STRING;
|
||||
}
|
||||
|
||||
blitz::ast::ConstExpression::ConstExpression(std::string& name, std::unique_ptr<ValueExpression> value)
|
||||
|
||||
@@ -13,11 +13,11 @@
|
||||
namespace blitz {
|
||||
namespace ast {
|
||||
enum class ValueType : int8_t {
|
||||
Unknown,
|
||||
Number,
|
||||
Decimal,
|
||||
String,
|
||||
Type,
|
||||
UNKNOWN,
|
||||
INTEGER,
|
||||
REAL,
|
||||
STRING,
|
||||
TYPE,
|
||||
};
|
||||
|
||||
class ValueExpression : public expression {
|
||||
@@ -39,7 +39,7 @@ namespace blitz {
|
||||
|
||||
class VariableExpression : public ValueExpression {
|
||||
public:
|
||||
VariableExpression(std::string& name, ValueType type = ValueType::Number);
|
||||
VariableExpression(std::string& name, ValueType type = ValueType::INTEGER);
|
||||
virtual ~VariableExpression();
|
||||
|
||||
virtual ValueType GetType() override;
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
// Copyright (C) 2017-2024 Michael Fabian 'Xaymar' Dirks <info@xaymar.com>
|
||||
// AUTOGENERATED COPYRIGHT HEADER END
|
||||
#include "compiler.hpp"
|
||||
/*
|
||||
#include "parser.hpp"
|
||||
#include "lexer.hpp"
|
||||
#include <fstream>
|
||||
@@ -10,3 +11,4 @@
|
||||
blitz::compiler::compiler() {}
|
||||
|
||||
blitz::compiler::~compiler() {}
|
||||
*/
|
||||
|
||||
@@ -2,8 +2,6 @@
|
||||
// Copyright (C) 2017-2024 Michael Fabian 'Xaymar' Dirks <info@xaymar.com>
|
||||
// AUTOGENERATED COPYRIGHT HEADER END
|
||||
#pragma once
|
||||
#include <string>
|
||||
#include <filesystem>
|
||||
|
||||
// Compiling requires several steps
|
||||
// 1. Lexing to known tokens
|
||||
@@ -13,10 +11,16 @@
|
||||
// 3. Compiling from the AST to some kind of runnable binary.
|
||||
// - In our case, we convert the AST to LLVM IR, and then compile it with LLVM.
|
||||
|
||||
/*
|
||||
#include <filesystem>
|
||||
#include <string>
|
||||
|
||||
|
||||
namespace blitz {
|
||||
class compiler {
|
||||
public:
|
||||
compiler();
|
||||
~compiler();
|
||||
};
|
||||
}
|
||||
} // namespace blitz
|
||||
*/
|
||||
|
||||
@@ -1,4 +1,28 @@
|
||||
// AUTOGENERATED COPYRIGHT HEADER START
|
||||
// Copyright (C) NaN-NaN undefined
|
||||
// Copyright (C) 2024 Michael Fabian 'Xaymar' Dirks <info@xaymar.com>
|
||||
// AUTOGENERATED COPYRIGHT HEADER END
|
||||
#include "error.hpp"
|
||||
|
||||
blitz::error::~error() {}
|
||||
|
||||
blitz::error::error(std::filesystem::path file, std::pair<uint64_t, uint64_t> base, std::pair<uint64_t, uint64_t> at, std::string reason) : std::runtime_error(reason)
|
||||
{
|
||||
_file = file;
|
||||
_base = base;
|
||||
_at = at;
|
||||
}
|
||||
|
||||
std::filesystem::path const& blitz::error::file() const
|
||||
{
|
||||
return _file;
|
||||
}
|
||||
|
||||
std::pair<uint64_t, uint64_t> const& blitz::error::base() const
|
||||
{
|
||||
return _base;
|
||||
}
|
||||
|
||||
std::pair<uint64_t, uint64_t> const& blitz::error::at() const
|
||||
{
|
||||
return _at;
|
||||
}
|
||||
|
||||
@@ -1,4 +1,27 @@
|
||||
// AUTOGENERATED COPYRIGHT HEADER START
|
||||
// Copyright (C) NaN-NaN undefined
|
||||
// Copyright (C) 2024 Michael Fabian 'Xaymar' Dirks <info@xaymar.com>
|
||||
// AUTOGENERATED COPYRIGHT HEADER END
|
||||
#pragma once
|
||||
#include <cinttypes>
|
||||
#include <filesystem>
|
||||
#include <stdexcept>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
|
||||
namespace blitz {
|
||||
class error : public std::runtime_error {
|
||||
std::filesystem::path _file;
|
||||
std::pair<uint64_t, uint64_t> _base;
|
||||
std::pair<uint64_t, uint64_t> _at;
|
||||
|
||||
public:
|
||||
~error();
|
||||
error(std::filesystem::path file, std::pair<uint64_t, uint64_t> base, std::pair<uint64_t, uint64_t> at, std::string reason);
|
||||
|
||||
std::filesystem::path const& file() const;
|
||||
|
||||
std::pair<uint64_t, uint64_t> const& base() const;
|
||||
|
||||
std::pair<uint64_t, uint64_t> const& at() const;
|
||||
};
|
||||
} // namespace blitz
|
||||
|
||||
+314
-63
@@ -3,55 +3,88 @@
|
||||
// AUTOGENERATED COPYRIGHT HEADER END
|
||||
#include "lexer.hpp"
|
||||
#include <codecvt>
|
||||
#include <cstdarg>
|
||||
#include <sstream>
|
||||
|
||||
/*std::pair<char, blitz::tokentype> g_symbolCharacters[] = {
|
||||
//{ '\"', BlitzLLVM::Lexer::Token::TokenDoubleQuote }, // Has special meaning.
|
||||
{ '+', blitz::tokentype::TokenPlus },
|
||||
{ '-', blitz::tokentype::TokenMinus },
|
||||
{ '/', blitz:::tokentype::TokenSlashForward },
|
||||
{ '\\', blitz::tokentype::TokenSlashBackward },
|
||||
{ '*', blitz::tokentype::TokenMultiply },
|
||||
{ '=', blitz::tokentype::TokenEqual },
|
||||
{ '#', blitz::tokentype::TokenOctothorp },
|
||||
{ '%', blitz::tokentype::TokenPercent },
|
||||
{ '$', blitz::tokentype::TokenDollar },
|
||||
{ '(', blitz::tokentype::TokenRoundBracketOpen },
|
||||
{ ')', blitz::tokentype::TokenRoundBracketClose },
|
||||
{ '[', blitz::tokentype::TokenSquareBracketOpen },
|
||||
{ ']', blitz::tokentype::TokenSquareBracketClose },
|
||||
{ '<', blitz::tokentype::TokenAngleBracketOpen },
|
||||
{ '>', blitz::tokentype::TokenAngleBracketClose },
|
||||
//{ '.', BlitzLLVM::Token::TokenDot }, // Special meaning.
|
||||
{ ':', blitz::tokentype::TokenColon },
|
||||
{ ',', blitz::tokentype::TokenComma },
|
||||
//{ ';', BlitzLLVM::Token::TokenSemicolon },
|
||||
{ '^', blitz::tokentype::TokenCaret },
|
||||
{ '~', blitz::tokentype::TokenBitNot },
|
||||
};*/
|
||||
std::string format(const char* format, ...)
|
||||
{
|
||||
va_list arg1;
|
||||
va_list arg2;
|
||||
va_start(arg1, format);
|
||||
va_copy(arg2, arg1);
|
||||
int length = vsnprintf(nullptr, 0, format, arg1);
|
||||
std::vector<char> buffer(length + 1);
|
||||
vsnprintf(buffer.data(), buffer.size(), format, arg2);
|
||||
va_end(arg1);
|
||||
va_end(arg2);
|
||||
return {buffer.data(), buffer.data() + length};
|
||||
}
|
||||
|
||||
std::string blitz::token::to_string()
|
||||
{
|
||||
std::string name;
|
||||
switch (type) {
|
||||
case variant::UNKNOWN:
|
||||
name = "Unknown";
|
||||
break;
|
||||
case variant::ENDOFFILE:
|
||||
name = "EndOfFile";
|
||||
break;
|
||||
case variant::NEWLINE:
|
||||
name = "NewLine";
|
||||
break;
|
||||
case variant::CONTROL:
|
||||
name = "Control";
|
||||
break;
|
||||
case variant::COMMENT:
|
||||
name = "Comment";
|
||||
break;
|
||||
case variant::TEXT:
|
||||
name = "Text";
|
||||
break;
|
||||
case variant::STRING:
|
||||
name = "String";
|
||||
break;
|
||||
case variant::INTEGER:
|
||||
name = "Integer";
|
||||
break;
|
||||
case variant::REAL:
|
||||
name = "Real";
|
||||
break;
|
||||
case variant::SYMBOL:
|
||||
name = "Symbol";
|
||||
break;
|
||||
default:
|
||||
name = "How the fuck?!";
|
||||
break;
|
||||
}
|
||||
|
||||
if (type == variant::NEWLINE || type == variant::CONTROL) {
|
||||
return format("%s(%llu@%llu, %d)", name.c_str(), location.first, location.second, text[0]);
|
||||
} else {
|
||||
return format("%s(%llu@%llu, %s)", name.c_str(), location.first, location.second, text.c_str());
|
||||
}
|
||||
}
|
||||
|
||||
blitz::lexer::~lexer() {}
|
||||
|
||||
blitz::lexer::lexer(std::filesystem::path file)
|
||||
{
|
||||
// Usually files start at line and character 0, so we should start there too.
|
||||
_line = _character = 0;
|
||||
// Usually files start at line 1 and character 1, so we should start there too.
|
||||
_location = {1, 1};
|
||||
|
||||
// Try and open the file for reading.
|
||||
_file = file;
|
||||
_stream = std::ifstream(_file, std::ios_base::binary); // We use binary so we can eventually support UTF-8.
|
||||
if (!_stream.good() || _stream.eof() || _stream.bad() || _stream.fail()) {
|
||||
char buffer[16384];
|
||||
int len = snprintf(buffer, sizeof(buffer), "Reading file '%s' failed.\0", file.generic_string().c_str());
|
||||
throw std::runtime_error(std::string(buffer, buffer + len));
|
||||
throw std::runtime_error(format("Reading file '%s' failed.", file.generic_string().c_str()));
|
||||
}
|
||||
|
||||
// Initialize token storage to a default token.
|
||||
_override = _current = blitz::token{
|
||||
.line = 0,
|
||||
.character = 0,
|
||||
.text = "",
|
||||
.type = token::variant::UNKNOWN,
|
||||
.location = {0, 0},
|
||||
.text = "",
|
||||
.type = token::variant::UNKNOWN,
|
||||
};
|
||||
}
|
||||
|
||||
@@ -69,54 +102,272 @@ blitz::token blitz::lexer::next()
|
||||
STRING,
|
||||
COMMENT,
|
||||
} state = stage::DEFAULT;
|
||||
bool numberHasDot = false;
|
||||
|
||||
std::stringstream buffer;
|
||||
blitz::token token{
|
||||
.line = _line,
|
||||
.character = _character,
|
||||
.text = "",
|
||||
.type = blitz::token::variant::UNKNOWN,
|
||||
.location = _location,
|
||||
.text = "",
|
||||
.type = blitz::token::variant::UNKNOWN,
|
||||
};
|
||||
|
||||
// Helper function to advance text.
|
||||
auto advance = [this]() {
|
||||
_character++;
|
||||
return _stream.get();
|
||||
auto issymbol = [](int chr) {
|
||||
switch (chr) {
|
||||
case ';': // Comment
|
||||
case ':': // Command Separator
|
||||
case '=': // Equal
|
||||
case '<': // Less Than
|
||||
case '>': // Greater Than
|
||||
case '~': // Bitwise Not
|
||||
case '^': // Exponential (X ^ Y = pow(X, Y))
|
||||
case '+': // Plus
|
||||
case '-': // Minus
|
||||
case '*': // Multiply
|
||||
case '/': // Divide
|
||||
case ',': // Parameter Separation
|
||||
case '%': // Integer Type
|
||||
case '#': // Real Type
|
||||
case '$': // String Type
|
||||
case '.': // Structured Type
|
||||
case '\\': // Structured Type Access
|
||||
// Blitz Arrays
|
||||
case '[':
|
||||
case ']':
|
||||
// Call, Grouping, Dim
|
||||
case '(':
|
||||
case ')':
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
return false;
|
||||
};
|
||||
auto iswhitespace = [](int chr) {
|
||||
switch (chr) {
|
||||
case ' ':
|
||||
case '\t':
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
return false;
|
||||
};
|
||||
|
||||
while ((token.type == blitz::token::variant::UNKNOWN) && _stream.good() && !_stream.eof()) {
|
||||
auto chr = advance();
|
||||
// ToDo: Figure out why we don't ever hit chr == EOF.
|
||||
if (_stream.eof()) {
|
||||
token.location = _location;
|
||||
token.type = blitz::token::variant::ENDOFFILE;
|
||||
return token;
|
||||
}
|
||||
|
||||
bool complete = false;
|
||||
while (!complete && _stream.good()) {
|
||||
// Peek at the current byte, without advancing the read pointer until we need to.
|
||||
auto chr = _stream.peek();
|
||||
bool is_newline = (chr == '\r') || (chr == '\n');
|
||||
bool is_returnfeed = (chr == '\r');
|
||||
|
||||
if (state == stage::DEFAULT) {
|
||||
if (chr == ';') { // We've encountered a comment, so we should change state and ignore this symbol.
|
||||
state = stage::COMMENT;
|
||||
token.line = _line;
|
||||
token.character = _character;
|
||||
token.type = blitz::token::variant::COMMENT;
|
||||
if (chr == EOF) {
|
||||
token.type = blitz::token::variant::ENDOFFILE;
|
||||
token.text = "";
|
||||
token.location = _location;
|
||||
complete = true;
|
||||
_location.second++;
|
||||
} else if (is_newline) {
|
||||
// New Line, should be handled like a control character, but with some special things.
|
||||
token.type = blitz::token::variant::NEWLINE;
|
||||
token.text = "\n";
|
||||
token.location = _location;
|
||||
complete = true;
|
||||
|
||||
// Advance the read pointer.
|
||||
_stream.get();
|
||||
|
||||
// Is this a Windows-style \r\n?
|
||||
if (is_returnfeed && (_stream.peek() == '\n')) {
|
||||
// If so, advance the read pointer again.
|
||||
_stream.get();
|
||||
}
|
||||
|
||||
// Then update the location.
|
||||
_location.first++;
|
||||
_location.second = 1;
|
||||
} else if (iswhitespace(chr)) {
|
||||
// This is white space, which we'll happily ignore.
|
||||
_stream.get();
|
||||
_location.second++;
|
||||
} else if (chr < 32) {
|
||||
// Likely to be a control character.
|
||||
token.location = _location;
|
||||
token.type = blitz::token::variant::CONTROL;
|
||||
token.text = {1, char(chr)};
|
||||
complete = true;
|
||||
_stream.get();
|
||||
_location.second++;
|
||||
} else if (chr == ':') {
|
||||
// Allows code writers to pretend it's all one line.
|
||||
token.location = _location;
|
||||
token.type = blitz::token::variant::SEPARATOR;
|
||||
token.text = {1, char(chr)};
|
||||
complete = true;
|
||||
_stream.get();
|
||||
_location.second++;
|
||||
} else if (chr == ';') {
|
||||
// A comment, which ends at the next new line.
|
||||
state = stage::COMMENT;
|
||||
token.location = _location;
|
||||
token.type = blitz::token::variant::COMMENT;
|
||||
} else if (isdigit(chr)) {
|
||||
// Probably an Integer, or if the latter, it's a Real.
|
||||
state = stage::NUMBER;
|
||||
token.location = _location;
|
||||
token.type = blitz::token::variant::INTEGER;
|
||||
} else if (isalpha(chr)) {
|
||||
// Text of some kind.
|
||||
state = stage::TEXT;
|
||||
token.location = _location;
|
||||
token.type = blitz::token::variant::TEXT;
|
||||
} else if (chr == '"') {
|
||||
// A quoted string.
|
||||
state = stage::STRING;
|
||||
token.location = _location;
|
||||
token.type = blitz::token::variant::STRING;
|
||||
|
||||
// Advance so we actually get anywhere.
|
||||
_stream.get();
|
||||
_location.second++;
|
||||
} else if (issymbol(chr)) {
|
||||
// Special Handling for a few symbols that could mean multiple things.
|
||||
if (chr == '.') { // '.' can start a Real, Label or Structured Type Access. We don't want to decide on the latter here, that's a parser thing.
|
||||
buffer << (char)chr;
|
||||
|
||||
// We advance the read pointer here to look at what's coming next.
|
||||
_stream.get();
|
||||
chr = _stream.peek();
|
||||
_location.second++;
|
||||
|
||||
// Peek at what's coming next.
|
||||
if (isdigit(chr)) {
|
||||
// This is a Real number.
|
||||
token.location = _location;
|
||||
token.type = blitz::token::variant::REAL;
|
||||
state = stage::NUMBER;
|
||||
} else {
|
||||
// Assume this is a symbol and return to normal behavior.
|
||||
token.location = _location;
|
||||
token.text = buffer.str();
|
||||
token.type = blitz::token::variant::SYMBOL;
|
||||
complete = true;
|
||||
}
|
||||
} else if ((chr == '+') || (chr == '-')) { // '+' & '-' could be prefixes to an Integer or Real.
|
||||
buffer << (char)chr;
|
||||
|
||||
// Advance the read pointer to peek at the future.
|
||||
_stream.get();
|
||||
chr = _stream.peek();
|
||||
_location.second++;
|
||||
|
||||
// Peek at what's coming up.
|
||||
if (isdigit(chr) || (chr == '.')) { // Likely to be a Real or Integer.
|
||||
token.location = _location;
|
||||
if (chr == '.') {
|
||||
token.type = blitz::token::variant::REAL;
|
||||
} else {
|
||||
token.type = blitz::token::variant::INTEGER;
|
||||
}
|
||||
state = stage::NUMBER;
|
||||
} else {
|
||||
token.location = _location;
|
||||
token.text = buffer.str();
|
||||
token.type = blitz::token::variant::SYMBOL;
|
||||
complete = true;
|
||||
}
|
||||
} else {
|
||||
token.location = _location;
|
||||
token.text = {1, char(chr)};
|
||||
token.type = blitz::token::variant::SYMBOL;
|
||||
complete = true;
|
||||
|
||||
// Advance so we actually get anywhere.
|
||||
_stream.get();
|
||||
_location.second++;
|
||||
}
|
||||
} else {
|
||||
buffer << chr;
|
||||
// Everything else is an error
|
||||
throw blitz::error(_file, _location, _location, "You've encountered a bug. Please report this with the file that caused it.");
|
||||
}
|
||||
} else if (state == stage::NUMBER) {
|
||||
if (isdigit(chr)) {
|
||||
buffer << chr;
|
||||
} else if (chr == '.')
|
||||
{
|
||||
if (numberHasDot) {
|
||||
throw std::runtime_error("")
|
||||
if ((chr == EOF) || (chr < 32) || is_newline || iswhitespace(chr) || (chr == ';')) {
|
||||
// EOF, Control, NL, Whitespace, and Comments should return to default parsing.
|
||||
complete = true;
|
||||
} else if (isdigit(chr) || (chr == '.')) {
|
||||
_stream.get();
|
||||
buffer << (char)chr;
|
||||
if (chr == '.') {
|
||||
if (token.type != token::variant::REAL) {
|
||||
token.type = blitz::token::variant::REAL;
|
||||
} else {
|
||||
token.text = buffer.str();
|
||||
throw blitz::error(_file, token.location, _location, format("In token %s: Expected [0-9], got '%s' instead.", token.to_string().c_str(), std::string{1, (char)chr}.c_str()));
|
||||
}
|
||||
}
|
||||
numberHasDot = true;
|
||||
} else if (issymbol(chr)) {
|
||||
complete = true;
|
||||
} else {
|
||||
|
||||
token.text = buffer.str();
|
||||
throw blitz::error(_file, token.location, _location, format("In token %s: Expected [0-9.], got '%s' instead.", token.to_string().c_str(), std::string{1, (char)chr}.c_str()));
|
||||
}
|
||||
|
||||
} else if (state == stage::TEXT) {
|
||||
} else if (state == stage::STRING) {
|
||||
} else if (state == stage::COMMENT) {
|
||||
if (chr == '\r' && _stream.peek() == '\n') {
|
||||
if (complete) {
|
||||
token.text = buffer.str();
|
||||
}
|
||||
} else if (state == stage::TEXT) {
|
||||
if ((chr == EOF) || (chr < 32) || is_newline || iswhitespace(chr) || issymbol(chr)) {
|
||||
// Return to default parsing.
|
||||
complete = true;
|
||||
} else if (isalpha(chr) || isdigit(chr) || (chr == '_')) {
|
||||
buffer << (char)chr;
|
||||
_stream.get();
|
||||
_location.second++;
|
||||
} else {
|
||||
buffer << chr;
|
||||
token.text = buffer.str();
|
||||
throw blitz::error(_file, token.location, _location, format("In token %s: Expected [a-zA-Z0-9_], got '%s' instead.", token.to_string().c_str(), std::string{1, (char)chr}.c_str()));
|
||||
}
|
||||
|
||||
if (complete) {
|
||||
token.text = buffer.str();
|
||||
}
|
||||
} else if (state == stage::STRING) {
|
||||
if ((chr == EOF) || (chr < 32) || is_newline) {
|
||||
// Return to default parsing.
|
||||
complete = true;
|
||||
} else if (chr == '"') { // The only true way to end a string.
|
||||
complete = true;
|
||||
|
||||
// Skip over the " so we don't confuse the parser.
|
||||
_stream.get();
|
||||
_location.second++;
|
||||
} else {
|
||||
buffer << (char)chr;
|
||||
_stream.get();
|
||||
_location.second++;
|
||||
}
|
||||
|
||||
if (complete) {
|
||||
token.text = buffer.str();
|
||||
}
|
||||
} else if (state == stage::COMMENT) {
|
||||
if ((chr == EOF) || (chr < 32) || is_newline) {
|
||||
// Return to default parsing at this point.
|
||||
complete = true;
|
||||
} else {
|
||||
buffer << (char)chr;
|
||||
_stream.get();
|
||||
_location.second++;
|
||||
}
|
||||
|
||||
if (complete) {
|
||||
token.text = buffer.str();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2,34 +2,39 @@
|
||||
// Copyright (C) 2017-2024 Michael Fabian 'Xaymar' Dirks <info@xaymar.com>
|
||||
// AUTOGENERATED COPYRIGHT HEADER END
|
||||
#pragma once
|
||||
#include <cinttypes>
|
||||
#include <filesystem>
|
||||
#include <fstream>
|
||||
#include <inttypes.h>
|
||||
#include <iostream>
|
||||
#include <istream>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
|
||||
#include "error.hpp"
|
||||
|
||||
// ToDo:
|
||||
// - Figure out a way to let the lexer output line and character information?
|
||||
|
||||
namespace blitz {
|
||||
struct token {
|
||||
uint64_t line;
|
||||
uint64_t character;
|
||||
|
||||
std::string text;
|
||||
std::pair<uint64_t, uint64_t> location;
|
||||
std::string text;
|
||||
enum class variant : uint64_t {
|
||||
UNKNOWN, // We have absolutely no fucking clue.
|
||||
CONTROL, // All kinds of control signals, like NewLine, EndOfFile, ...
|
||||
ENDOFFILE, // End of the file.
|
||||
NEWLINE, // New Line.
|
||||
SEPARATOR, // Command Separator.
|
||||
CONTROL, // All kinds of control signals
|
||||
SYMBOL, // All kinds of symbols.
|
||||
TEXT, // HelloWorld
|
||||
NUMBER, // 1, 1%
|
||||
DECIMAL, // 1.0, 1#
|
||||
STRING, // "HelloWorld"
|
||||
COMMENT, // ; Whatever
|
||||
TEXT, // HelloWorld
|
||||
STRING, // "HelloWorld"
|
||||
INTEGER, // 1, 1% (without the %)
|
||||
REAL, // 1.0, 1# (without the #)
|
||||
} type;
|
||||
|
||||
std::string to_string();
|
||||
};
|
||||
|
||||
class lexer {
|
||||
@@ -37,8 +42,7 @@ namespace blitz {
|
||||
std::ifstream _stream;
|
||||
|
||||
// Current location in the file.
|
||||
uint64_t _line;
|
||||
uint64_t _character;
|
||||
std::pair<uint64_t, uint64_t> _location;
|
||||
|
||||
blitz::token _current;
|
||||
blitz::token _override;
|
||||
|
||||
@@ -1,9 +1,92 @@
|
||||
// AUTOGENERATED COPYRIGHT HEADER START
|
||||
// Copyright (C) 2017-2024 Michael Fabian 'Xaymar' Dirks <info@xaymar.com>
|
||||
// AUTOGENERATED COPYRIGHT HEADER END
|
||||
#include <iostream>
|
||||
#include "compiler.hpp"
|
||||
#include "error.hpp"
|
||||
#include "lexer.hpp"
|
||||
|
||||
int main(int argc, char** argv)
|
||||
{
|
||||
std::cout << argv[1] << std::endl;
|
||||
blitz::lexer lex(argv[1]);
|
||||
|
||||
try {
|
||||
for (blitz::token token = lex.next(); (token.type != blitz::token::variant::ENDOFFILE); token = lex.next()) {
|
||||
switch (token.type) {
|
||||
case blitz::token::variant::COMMENT:
|
||||
std::cout << token.text;
|
||||
break;
|
||||
case blitz::token::variant::SYMBOL:
|
||||
std::cout << token.text << " ";
|
||||
break;
|
||||
case blitz::token::variant::TEXT:
|
||||
case blitz::token::variant::INTEGER:
|
||||
case blitz::token::variant::REAL:
|
||||
std::cout << token.text << " ";
|
||||
break;
|
||||
case blitz::token::variant::STRING:
|
||||
std::cout << "\"" << token.text << "\""
|
||||
<< " ";
|
||||
break;
|
||||
case blitz::token::variant::NEWLINE:
|
||||
std::cout << std::endl;
|
||||
break;
|
||||
default:
|
||||
std::cout << token.to_string() << " ";
|
||||
break;
|
||||
}
|
||||
if (token.type == blitz::token::variant::UNKNOWN) {
|
||||
std::cin.get();
|
||||
}
|
||||
}
|
||||
} catch (blitz::error const& ex) {
|
||||
std::cout << ex.file() << std::endl;
|
||||
std::cout << "Line " << ex.at().first << ", Char " << ex.at().second << ": " << ex.what() << std::endl;
|
||||
} catch (std::runtime_error const& ex) {
|
||||
std::cout << ex.what() << std::endl;
|
||||
}
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
blitz::compiler comp;
|
||||
comp.compile(argv[1], std::string(argv[1]) + ".exe");
|
||||
std::cin.get();
|
||||
return 0;
|
||||
}
|
||||
|
||||
// BlitzBasic is a strange but powerful language in the right hands. While it has
|
||||
// somewhat unusual syntax and rules at times, it does not usually have ambigious
|
||||
// syntax and rules like C and C++ do. Overall, the quirks can be easily explained
|
||||
// and shouldn't cause odd problems.
|
||||
//
|
||||
// 1. Variables can be automatically defined if you did not define them before.
|
||||
// ```
|
||||
// Local var1 ; Local Variable definition of var1
|
||||
// Global var2 ; Global Variable definition of var2
|
||||
// var1 = var3 ; Automatic definition of var3 as Local Variable
|
||||
// ```
|
||||
//
|
||||
// 2. Names are not unique, and case-insensitive
|
||||
// ```
|
||||
// Local myName ; Defines myName as Local
|
||||
// Local MyName ; Defines MyName as Local, should error because myName has already been defined.
|
||||
// Function myName() : End Function ; Defines myName as Function
|
||||
// Type myName ; Defines myName as Type
|
||||
// Field Bla
|
||||
// End Type
|
||||
// ```
|
||||
//
|
||||
// 3. Function calls don't always need Parenthesis:
|
||||
// ```
|
||||
// Function myName() : End Function
|
||||
// If myName() Then : EndIf ; <- Calls myName
|
||||
// myName ; <- Calls myName, because there is no = after it.
|
||||
// ```
|
||||
//
|
||||
// 4. Int(TypeVariable) returns the pointer to the TypeVariable:
|
||||
// ```
|
||||
// Type myName
|
||||
// Field Bla
|
||||
// End Type
|
||||
// Local myName.myName = New myName
|
||||
// Print Int(myName) ; <- Prints the address of the object contained in myName.
|
||||
// ```
|
||||
//
|
||||
// As this is a Basic language, there is no concept of undefined or uninitialized anything. Every behavior is well defined.
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
/// AUTOGENERATED COPYRIGHT HEADER START
|
||||
// Copyright (C) 2024 Michael Fabian 'Xaymar' Dirks <info@xaymar.com>
|
||||
// AUTOGENERATED COPYRIGHT HEADER END
|
||||
/*
|
||||
#include "parser.hpp"
|
||||
#include "ast/function.hpp"
|
||||
#include <iostream>
|
||||
@@ -111,3 +112,4 @@ std::unique_ptr<blitz::ast::DecimalExpression> blitz::parser::parse_decimal(blit
|
||||
|
||||
return std::make_unique<blitz::ast::DecimalExpression>(parsed);
|
||||
}
|
||||
*/
|
||||
|
||||
@@ -2,14 +2,28 @@
|
||||
// Copyright (C) 2017-2024 Michael Fabian 'Xaymar' Dirks <info@xaymar.com>
|
||||
// AUTOGENERATED COPYRIGHT HEADER END
|
||||
#pragma once
|
||||
#include "lexer.hpp"
|
||||
#include <filesystem>
|
||||
#include "ast/ast.hpp"
|
||||
#include "ast/value.hpp"
|
||||
#include "lexer.hpp"
|
||||
|
||||
namespace blitz {
|
||||
class parser {
|
||||
std::filesystem::path _file;
|
||||
|
||||
public:
|
||||
~parser();
|
||||
parser(std::filesystem::path file);
|
||||
};
|
||||
} // namespace blitz
|
||||
|
||||
/*
|
||||
#include <fstream>
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <stack>
|
||||
#include <string>
|
||||
#include "ast/value.hpp"
|
||||
#include "lexer.hpp"
|
||||
|
||||
namespace blitz {
|
||||
class parser {
|
||||
@@ -37,3 +51,4 @@ namespace blitz {
|
||||
|
||||
};
|
||||
}
|
||||
*/
|
||||
|
||||
Reference in New Issue
Block a user