// Copyright (c) 2020 Pantor. All rights reserved. #ifndef INCLUDE_INJA_PARSER_HPP_ #define INCLUDE_INJA_PARSER_HPP_ #include #include #include #include #include #include #include "config.hpp" #include "exceptions.hpp" #include "function_storage.hpp" #include "lexer.hpp" #include "node.hpp" #include "template.hpp" #include "token.hpp" #include "utils.hpp" #include namespace inja { /*! * \brief Class for parsing an inja Template. */ class Parser { const ParserConfig &config; Lexer lexer; TemplateStorage &template_storage; const FunctionStorage &function_storage; Token tok, peek_tok; bool have_peek_tok {false}; size_t current_paren_level {0}; size_t current_bracket_level {0}; size_t current_brace_level {0}; nonstd::string_view json_literal_start; BlockNode *current_block {nullptr}; ExpressionListNode *current_expression_list {nullptr}; std::stack> function_stack; std::stack> operator_stack; std::stack if_statement_stack; std::stack for_statement_stack; void throw_parser_error(const std::string &message) { INJA_THROW(ParserError(message, lexer.current_position())); } void get_next_token() { if (have_peek_tok) { tok = peek_tok; have_peek_tok = false; } else { tok = lexer.scan(); } } void get_peek_token() { if (!have_peek_tok) { peek_tok = lexer.scan(); have_peek_tok = true; } } void add_json_literal(const char* content_ptr) { nonstd::string_view json_text(json_literal_start.data(), tok.text.data() - json_literal_start.data() + tok.text.size()); current_expression_list->rpn_output.emplace_back(std::make_shared(json::parse(json_text), json_text.data() - content_ptr)); } bool parse_expression(Template &tmpl, Token::Kind closing) { while (tok.kind != closing && tok.kind != Token::Kind::Eof) { // Literals switch (tok.kind) { case Token::Kind::String: { if (current_brace_level == 0 && current_bracket_level == 0) { json_literal_start = tok.text; add_json_literal(tmpl.content.c_str()); } } break; case Token::Kind::Number: { if (current_brace_level == 0 && current_bracket_level == 0) { json_literal_start = tok.text; add_json_literal(tmpl.content.c_str()); } } break; case Token::Kind::LeftBracket: { if (current_brace_level == 0 && current_bracket_level == 0) { json_literal_start = tok.text; } current_bracket_level += 1; } break; case Token::Kind::LeftBrace: { if (current_brace_level == 0 && current_bracket_level == 0) { json_literal_start = tok.text; } current_brace_level += 1; } break; case Token::Kind::RightBracket: { if (current_bracket_level == 0) { throw_parser_error("unexpected ']'"); } current_bracket_level -= 1; if (current_brace_level == 0 && current_bracket_level == 0) { add_json_literal(tmpl.content.c_str()); } } break; case Token::Kind::RightBrace: { if (current_brace_level == 0) { throw_parser_error("unexpected '}'"); } current_brace_level -= 1; if (current_brace_level == 0 && current_bracket_level == 0) { add_json_literal(tmpl.content.c_str()); } } break; case Token::Kind::Id: { get_peek_token(); // Json Literal if (tok.text == static_cast("true") || tok.text == static_cast("false") || tok.text == static_cast("null")) { if (current_brace_level == 0 && current_bracket_level == 0) { json_literal_start = tok.text; add_json_literal(tmpl.content.c_str()); } // Operator } else if (tok.text == "and" || tok.text == "or" || tok.text == "in" || tok.text == "not") { goto parse_operator; // Functions } else if (peek_tok.kind == Token::Kind::LeftParen) { operator_stack.emplace(std::make_shared(static_cast(tok.text), tok.text.data() - tmpl.content.c_str())); function_stack.emplace(operator_stack.top().get(), current_paren_level); // Variables } else { current_expression_list->rpn_output.emplace_back(std::make_shared(static_cast(tok.text), tok.text.data() - tmpl.content.c_str())); } // Operators } break; case Token::Kind::Equal: case Token::Kind::NotEqual: case Token::Kind::GreaterThan: case Token::Kind::GreaterEqual: case Token::Kind::LessThan: case Token::Kind::LessEqual: case Token::Kind::Plus: case Token::Kind::Minus: case Token::Kind::Times: case Token::Kind::Slash: case Token::Kind::Power: case Token::Kind::Percent: case Token::Kind::Dot: { parse_operator: FunctionStorage::Operation operation; switch (tok.kind) { case Token::Kind::Id: { if (tok.text == "and") { operation = FunctionStorage::Operation::And; } else if (tok.text == "or") { operation = FunctionStorage::Operation::Or; } else if (tok.text == "in") { operation = FunctionStorage::Operation::In; } else if (tok.text == "not") { operation = FunctionStorage::Operation::Not; } else { throw_parser_error("unknown operator in parser."); } } break; case Token::Kind::Equal: { operation = FunctionStorage::Operation::Equal; } break; case Token::Kind::NotEqual: { operation = FunctionStorage::Operation::NotEqual; } break; case Token::Kind::GreaterThan: { operation = FunctionStorage::Operation::Greater; } break; case Token::Kind::GreaterEqual: { operation = FunctionStorage::Operation::GreaterEqual; } break; case Token::Kind::LessThan: { operation = FunctionStorage::Operation::Less; } break; case Token::Kind::LessEqual: { operation = FunctionStorage::Operation::LessEqual; } break; case Token::Kind::Plus: { operation = FunctionStorage::Operation::Add; } break; case Token::Kind::Minus: { operation = FunctionStorage::Operation::Subtract; } break; case Token::Kind::Times: { operation = FunctionStorage::Operation::Multiplication; } break; case Token::Kind::Slash: { operation = FunctionStorage::Operation::Division; } break; case Token::Kind::Power: { operation = FunctionStorage::Operation::Power; } break; case Token::Kind::Percent: { operation = FunctionStorage::Operation::Modulo; } break; case Token::Kind::Dot: { operation = FunctionStorage::Operation::AtId; } break; default: { throw_parser_error("unknown operator in parser."); } } auto function_node = std::make_shared(operation, tok.text.data() - tmpl.content.c_str()); while (!operator_stack.empty() && ((operator_stack.top()->precedence > function_node->precedence) || (operator_stack.top()->precedence == function_node->precedence && function_node->associativity == FunctionNode::Associativity::Left)) && (operator_stack.top()->operation != FunctionStorage::Operation::ParenLeft)) { current_expression_list->rpn_output.emplace_back(operator_stack.top()); operator_stack.pop(); } operator_stack.emplace(function_node); } break; case Token::Kind::Comma: { if (current_brace_level == 0 && current_bracket_level == 0) { if (function_stack.empty()) { throw_parser_error("unexpected ','"); } function_stack.top().first->number_args += 1; } } break; case Token::Kind::Colon: { if (current_brace_level == 0 && current_bracket_level == 0) { throw_parser_error("unexpected ':'"); } } break; case Token::Kind::LeftParen: { current_paren_level += 1; operator_stack.emplace(std::make_shared(FunctionStorage::Operation::ParenLeft, tok.text.data() - tmpl.content.c_str())); get_peek_token(); if (peek_tok.kind == Token::Kind::RightParen) { if (!function_stack.empty() && function_stack.top().second == current_paren_level - 1) { function_stack.top().first->number_args = 0; } } } break; case Token::Kind::RightParen: { current_paren_level -= 1; while (!operator_stack.empty() && operator_stack.top()->operation != FunctionStorage::Operation::ParenLeft) { current_expression_list->rpn_output.emplace_back(operator_stack.top()); operator_stack.pop(); } if (!operator_stack.empty() && operator_stack.top()->operation == FunctionStorage::Operation::ParenLeft) { operator_stack.pop(); } if (!function_stack.empty() && function_stack.top().second == current_paren_level) { auto func = function_stack.top().first; auto function_data = function_storage.find_function(func->name, func->number_args); if (function_data.operation == FunctionStorage::Operation::None) { throw_parser_error("unknown function " + func->name); } func->operation = function_data.operation; if (function_data.operation == FunctionStorage::Operation::Callback) { func->callback = function_data.callback; } if (operator_stack.empty()) { throw_parser_error("internal error at function " + func->name); } current_expression_list->rpn_output.emplace_back(operator_stack.top()); operator_stack.pop(); function_stack.pop(); } } default: break; } get_next_token(); } while (!operator_stack.empty()) { current_expression_list->rpn_output.emplace_back(operator_stack.top()); operator_stack.pop(); } return true; } bool parse_statement(Template &tmpl, Token::Kind closing, nonstd::string_view path) { if (tok.kind != Token::Kind::Id) { return false; } if (tok.text == static_cast("if")) { get_next_token(); auto if_statement_node = std::make_shared(current_block, tok.text.data() - tmpl.content.c_str()); current_block->nodes.emplace_back(if_statement_node); if_statement_stack.emplace(if_statement_node.get()); current_block = &if_statement_node->true_statement; current_expression_list = &if_statement_node->condition; if (!parse_expression(tmpl, closing)) { return false; } } else if (tok.text == static_cast("else")) { if (if_statement_stack.empty()) { throw_parser_error("else without matching if"); } auto &if_statement_data = if_statement_stack.top(); get_next_token(); if_statement_data->has_false_statement = true; current_block = &if_statement_data->false_statement; // Chained else if if (tok.kind == Token::Kind::Id && tok.text == static_cast("if")) { get_next_token(); auto if_statement_node = std::make_shared(true, current_block, tok.text.data() - tmpl.content.c_str()); current_block->nodes.emplace_back(if_statement_node); if_statement_stack.emplace(if_statement_node.get()); current_block = &if_statement_node->true_statement; current_expression_list = &if_statement_node->condition; if (!parse_expression(tmpl, closing)) { return false; } } } else if (tok.text == static_cast("endif")) { if (if_statement_stack.empty()) { throw_parser_error("endif without matching if"); } // Nested if statements while (if_statement_stack.top()->is_nested) { if_statement_stack.pop(); } auto &if_statement_data = if_statement_stack.top(); get_next_token(); current_block = if_statement_data->parent; if_statement_stack.pop(); } else if (tok.text == static_cast("for")) { get_next_token(); // options: for a in arr; for a, b in obj if (tok.kind != Token::Kind::Id) { throw_parser_error("expected id, got '" + tok.describe() + "'"); } Token value_token = tok; get_next_token(); // Object type std::shared_ptr for_statement_node; if (tok.kind == Token::Kind::Comma) { get_next_token(); if (tok.kind != Token::Kind::Id) { throw_parser_error("expected id, got '" + tok.describe() + "'"); } Token key_token = std::move(value_token); value_token = tok; get_next_token(); for_statement_node = std::make_shared(static_cast(key_token.text), static_cast(value_token.text), current_block, tok.text.data() - tmpl.content.c_str()); // Array type } else { for_statement_node = std::make_shared(static_cast(value_token.text), current_block, tok.text.data() - tmpl.content.c_str()); } current_block->nodes.emplace_back(for_statement_node); for_statement_stack.emplace(for_statement_node.get()); current_block = &for_statement_node->body; current_expression_list = &for_statement_node->condition; if (tok.kind != Token::Kind::Id || tok.text != static_cast("in")) { throw_parser_error("expected 'in', got '" + tok.describe() + "'"); } get_next_token(); if (!parse_expression(tmpl, closing)) { return false; } } else if (tok.text == static_cast("endfor")) { if (for_statement_stack.empty()) { throw_parser_error("endfor without matching for"); } auto &for_statement_data = for_statement_stack.top(); get_next_token(); current_block = for_statement_data->parent; for_statement_stack.pop(); } else if (tok.text == static_cast("include")) { get_next_token(); if (tok.kind != Token::Kind::String) { throw_parser_error("expected string, got '" + tok.describe() + "'"); } // Build the relative path json json_name = json::parse(tok.text); std::string pathname = static_cast(path); pathname += json_name.get_ref(); if (pathname.compare(0, 2, "./") == 0) { pathname.erase(0, 2); } // sys::path::remove_dots(pathname, true, sys::path::Style::posix); if (config.search_included_templates_in_files && template_storage.find(pathname) == template_storage.end()) { auto include_template = Template(load_file(pathname)); template_storage.emplace(pathname, include_template); parse_into_template(template_storage[pathname], pathname); } current_block->nodes.emplace_back(std::make_shared(pathname, tok.text.data() - tmpl.content.c_str())); get_next_token(); } else if (tok.text == static_cast("set")) { get_next_token(); if (tok.kind != Token::Kind::Id) { throw_parser_error("expected variable name, got '" + tok.describe() + "'"); } std::string key = static_cast(tok.text); get_next_token(); auto set_statement_node = std::make_shared(key, tok.text.data() - tmpl.content.c_str()); current_block->nodes.emplace_back(set_statement_node); current_expression_list = &set_statement_node->expression; if (tok.text != static_cast("=")) { throw_parser_error("expected '=', got '" + tok.describe() + "'"); } get_next_token(); if (!parse_expression(tmpl, closing)) { return false; } } else { return false; } return true; } void parse_into(Template &tmpl, nonstd::string_view path) { lexer.start(tmpl.content); current_block = &tmpl.root; for (;;) { get_next_token(); switch (tok.kind) { case Token::Kind::Eof: { if (!if_statement_stack.empty()) { throw_parser_error("unmatched if"); } if (!for_statement_stack.empty()) { throw_parser_error("unmatched for"); } } return; case Token::Kind::Text: { current_block->nodes.emplace_back(std::make_shared(tok.text.data() - tmpl.content.c_str(), tok.text.size())); } break; case Token::Kind::StatementOpen: { get_next_token(); if (!parse_statement(tmpl, Token::Kind::StatementClose, path)) { throw_parser_error("expected statement, got '" + tok.describe() + "'"); } if (tok.kind != Token::Kind::StatementClose) { throw_parser_error("expected statement close, got '" + tok.describe() + "'"); } } break; case Token::Kind::LineStatementOpen: { get_next_token(); if (!parse_statement(tmpl, Token::Kind::LineStatementClose, path)) { throw_parser_error("expected statement, got '" + tok.describe() + "'"); } if (tok.kind != Token::Kind::LineStatementClose && tok.kind != Token::Kind::Eof) { throw_parser_error("expected line statement close, got '" + tok.describe() + "'"); } } break; case Token::Kind::ExpressionOpen: { get_next_token(); auto expression_list_node = std::make_shared(tok.text.data() - tmpl.content.c_str()); current_block->nodes.emplace_back(expression_list_node); current_expression_list = expression_list_node.get(); if (!parse_expression(tmpl, Token::Kind::ExpressionClose)) { throw_parser_error("expected expression, got '" + tok.describe() + "'"); } if (tok.kind != Token::Kind::ExpressionClose) { throw_parser_error("expected expression close, got '" + tok.describe() + "'"); } } break; case Token::Kind::CommentOpen: { get_next_token(); if (tok.kind != Token::Kind::CommentClose) { throw_parser_error("expected comment close, got '" + tok.describe() + "'"); } } break; default: { throw_parser_error("unexpected token '" + tok.describe() + "'"); } break; } } } public: explicit Parser(const ParserConfig &parser_config, const LexerConfig &lexer_config, TemplateStorage &template_storage, const FunctionStorage &function_storage) : config(parser_config), lexer(lexer_config), template_storage(template_storage), function_storage(function_storage) { } Template parse(nonstd::string_view input, nonstd::string_view path) { auto result = Template(static_cast(input)); parse_into(result, path); return result; } Template parse(nonstd::string_view input) { return parse(input, "./"); } void parse_into_template(Template& tmpl, nonstd::string_view filename) { nonstd::string_view path = filename.substr(0, filename.find_last_of("/\\") + 1); // StringRef path = sys::path::parent_path(filename); auto sub_parser = Parser(config, lexer.get_config(), template_storage, function_storage); sub_parser.parse_into(tmpl, path); } std::string load_file(nonstd::string_view filename) { std::ifstream file; open_file_or_throw(static_cast(filename), file); std::string text((std::istreambuf_iterator(file)), std::istreambuf_iterator()); return text; } }; } // namespace inja #endif // INCLUDE_INJA_PARSER_HPP_