diff --git a/examples/math.funk b/examples/math.funk index 7f8c3a086cd5475c60a2c2838af7b7fb0e33349d..f18cbf8bdf043618ab95486c96ec9f58dabd7805 100644 --- a/examples/math.funk +++ b/examples/math.funk @@ -1 +1 @@ -(1 + 1) / 2 * 100 +(1 + 1) / 2 * 100; diff --git a/include/parser/Parser.h b/include/parser/Parser.h new file mode 100644 index 0000000000000000000000000000000000000000..46e06eab937fbb2a43e8c2cb7d37bd20cea9d3e3 --- /dev/null +++ b/include/parser/Parser.h @@ -0,0 +1,55 @@ +#pragma once + +#include "lexer/Lexer.h" +#include "token/Token.h" +#include "utils/Common.h" + +#include "ast/BinaryOpNode.h" +#include "ast/LiteralNode.h" +#include "ast/Node.h" +#include "ast/UnaryOpNode.h" + +namespace funk +{ +class Parser +{ +public: + Parser(const Vector<Token>& tokens, const String& filename); + ~Parser(); + Node* parse(); + + static Parser load(String filename); + +private: + Vector<Token> tokens; + String filename; + int index{0}; + + Token next(); + Token peek() const; + Token peek_next() const; + Token peek_prev() const; + + bool done() const; + bool check(TokenType type) const; + bool match(TokenType expected); + + BinaryOp to_binary_op(const Token& token) const; + UnaryOp to_unary_op(const Token& token) const; + + Node* parse_statement(); + Node* parse_expression(); + Node* parse_assignment(); + + Node* parse_logical_or(); + Node* parse_logical_and(); + Node* parse_equality(); + Node* parse_comparison(); + Node* parse_additive(); + Node* parse_multiplicative(); + Node* parse_unary(); + Node* parse_factor(); + Node* parse_literal(); + Node* parse_identifier(); +}; +} // namespace funk diff --git a/source/main.cc b/source/main.cc index 6a036099d3037d9cabd069bc1dbc48d1c277e502..4eb5d77d3fa4c2198d6d51f312c175a6cd2f15f4 100644 --- a/source/main.cc +++ b/source/main.cc @@ -1,5 +1,5 @@ -#include "lexer/Lexer.h" #include "logging/LogMacros.h" +#include "parser/Parser.h" #include "utils/ArgParser.h" #include "utils/Common.h" @@ -43,10 +43,10 @@ int main(int argc, char* argv[]) // Lex each file for (const auto& file : parser.get_files()) { - LOG_INFO("Lexing file: " + file); - Lexer lexer(read_file(file), file); - for (const auto& token : lexer.tokenize()) { LOG_DEBUG(token); } - LOG_INFO("Finished lexing file: " + file); + Parser parser{Parser::load(file)}; + Node* result{parser.parse()}; + + cout << result->to_s() << " = " << result->evaluate()->to_s() << '\n'; } return 0; diff --git a/source/parser/Parser.cc b/source/parser/Parser.cc new file mode 100644 index 0000000000000000000000000000000000000000..1abb26d01128a22a16966dcccea173816f5783d7 --- /dev/null +++ b/source/parser/Parser.cc @@ -0,0 +1,250 @@ +#include "parser/Parser.h" +#include "logging/LogMacros.h" + +namespace funk +{ +Parser::Parser(const Vector<Token>& tokens, const String& filename) : tokens(tokens), filename(filename) {} + +Parser::~Parser() {} + +Node* Parser::parse() +{ + LOG_DEBUG("Parse program"); + return parse_statement(); +} + +Parser Parser::load(String filename) +{ + Lexer lexer{read_file(filename), filename}; + return Parser(lexer.tokenize(), filename); +} + +Token Parser::next() +{ + if (!done()) index++; + return peek_prev(); +} + +Token Parser::peek() const +{ + return tokens.at(index); +} + +Token Parser::peek_prev() const +{ + return tokens.at(index > 0 ? index - 1 : 0); +} + +Token Parser::peek_next() const +{ + return tokens.at(!done() ? index + 1 : tokens.size() - 1); +} + +bool Parser::done() const +{ + return index >= static_cast<int>(tokens.size()); +} + +bool Parser::check(TokenType type) const +{ + return tokens.at(index).get_type() == type; +} + +bool Parser::match(TokenType expected) +{ + if (!check(expected)) return false; + next(); + return true; +} + +BinaryOp Parser::to_binary_op(const Token& token) const +{ + switch (token.get_type()) + { + case TokenType::PLUS: return BinaryOp::PLUS; + case TokenType::MINUS: return BinaryOp::MINUS; + case TokenType::MULTIPLY: return BinaryOp::MULTIPLY; + case TokenType::DIVIDE: return BinaryOp::DIVIDE; + case TokenType::MODULO: return BinaryOp::MODULO; + case TokenType::POWER: return BinaryOp::POWER; + case TokenType::EQUAL: return BinaryOp::EQUAL; + case TokenType::NOT_EQUAL: return BinaryOp::NOT_EQUAL; + case TokenType::LESS: return BinaryOp::LESS; + case TokenType::LESS_EQUAL: return BinaryOp::LESS_EQUAL; + case TokenType::GREATER: return BinaryOp::GREATER; + case TokenType::GREATER_EQUAL: return BinaryOp::GREATER_EQUAL; + case TokenType::AND: return BinaryOp::AND; + case TokenType::OR: return BinaryOp::OR; + + default: throw SyntaxError(token.get_location(), "Invalid binary operator: " + token.get_lexeme()); + } +} + +UnaryOp Parser::to_unary_op(const Token& token) const +{ + switch (token.get_type()) + { + case TokenType::MINUS: return UnaryOp::NEGATE; + case TokenType::NOT: return UnaryOp::NOT; + + default: throw SyntaxError(token.get_location(), "Invalid unary operator: " + token.get_lexeme()); + } +} + +Node* Parser::parse_statement() +{ + LOG_DEBUG("Parse statement"); + Node* expr{parse_expression()}; + if (!match(TokenType::SEMICOLON)) throw SyntaxError(peek_prev().get_location(), "Expected ';'"); + return expr; +} + +Node* Parser::parse_expression() +{ + LOG_DEBUG("Parse expression"); + return parse_assignment(); +} + +Node* Parser::parse_assignment() +{ + LOG_DEBUG("Parse assignment"); + return parse_logical_or(); +} + +Node* Parser::parse_logical_or() +{ + LOG_DEBUG("Parse logical or"); + ExpressionNode* left{dynamic_cast<ExpressionNode*>(parse_logical_and())}; + + while (match(TokenType::OR)) + { + Token op{peek_prev()}; + ExpressionNode* right{dynamic_cast<ExpressionNode*>(parse_logical_and())}; + left = new BinaryOpNode(left, to_binary_op(op), right); + } + + return left; +} + +Node* Parser::parse_logical_and() +{ + LOG_DEBUG("Parse logical and"); + ExpressionNode* left{dynamic_cast<ExpressionNode*>(parse_equality())}; + + while (match(TokenType::AND)) + { + Token op{peek_prev()}; + ExpressionNode* right{dynamic_cast<ExpressionNode*>(parse_equality())}; + left = new BinaryOpNode(left, to_binary_op(op), right); + } + + return left; +} + +Node* Parser::parse_equality() +{ + LOG_DEBUG("Parse equality"); + ExpressionNode* left{dynamic_cast<ExpressionNode*>(parse_comparison())}; + + while (match(TokenType::EQUAL) || match(TokenType::NOT_EQUAL)) + { + Token op{peek_prev()}; + ExpressionNode* right{dynamic_cast<ExpressionNode*>(parse_comparison())}; + left = new BinaryOpNode(left, to_binary_op(op), right); + } + + return left; +} + +Node* Parser::parse_comparison() +{ + LOG_DEBUG("Parse comparison"); + ExpressionNode* left{dynamic_cast<ExpressionNode*>(parse_additive())}; + + while (match(TokenType::LESS) || match(TokenType::LESS_EQUAL) || match(TokenType::GREATER) || + match(TokenType::GREATER_EQUAL)) + { + Token op{peek_prev()}; + ExpressionNode* right{dynamic_cast<ExpressionNode*>(parse_additive())}; + left = new BinaryOpNode(left, to_binary_op(op), right); + } + + return left; +} + +Node* Parser::parse_additive() +{ + LOG_DEBUG("Parse addative"); + ExpressionNode* left{dynamic_cast<ExpressionNode*>(parse_multiplicative())}; + + while (match(TokenType::PLUS) || match(TokenType::MINUS)) + { + Token op{peek_prev()}; + ExpressionNode* right{dynamic_cast<ExpressionNode*>(parse_multiplicative())}; + left = new BinaryOpNode(left, to_binary_op(op), right); + } + + return left; +} + +Node* Parser::parse_multiplicative() +{ + LOG_DEBUG("Parse multiplicative"); + ExpressionNode* left{dynamic_cast<ExpressionNode*>(parse_unary())}; + + while ( + match(TokenType::MULTIPLY) || match(TokenType::DIVIDE) || match(TokenType::MODULO) || match(TokenType::POWER)) + { + Token op{peek_prev()}; + ExpressionNode* right{dynamic_cast<ExpressionNode*>(parse_unary())}; + left = new BinaryOpNode(left, to_binary_op(op), right); + } + + return left; +} + +Node* Parser::parse_unary() +{ + LOG_DEBUG("Parse unary"); + if (match(TokenType::MINUS) || match(TokenType::NOT)) + { + Token op{peek_prev()}; + ExpressionNode* right{dynamic_cast<ExpressionNode*>(parse_factor())}; + return new UnaryOpNode(to_unary_op(op), right); + } + + return parse_factor(); +} + +Node* Parser::parse_factor() +{ + LOG_DEBUG("Parse factor"); + if (match(TokenType::IDENTIFIER)) + return parse_identifier(); + else if (match(TokenType::NUMB) || match(TokenType::REAL) || match(TokenType::BOOL) || match(TokenType::CHAR) || + match(TokenType::TEXT)) + { + return parse_literal(); + } + else if (match(TokenType::L_PAR)) + { + Node* expr{parse_expression()}; + if (!match(TokenType::R_PAR)) throw SyntaxError(peek_prev().get_location(), "Expected ')'"); + return expr; + } + + throw SyntaxError(peek().get_location(), "???"); +} + +Node* Parser::parse_literal() +{ + LOG_DEBUG("Parse literal"); + return new LiteralNode(peek_prev().get_location(), NodeValue(peek_prev().get_value())); +} + +Node* Parser::parse_identifier() +{ + return nullptr; +} + +} // namespace funk