aboutsummaryrefslogtreecommitdiff
path: root/include/inja/parser.hpp
diff options
context:
space:
mode:
authorToni Uhlig <matzeton@googlemail.com>2021-04-27 11:23:17 +0200
committerToni Uhlig <matzeton@googlemail.com>2021-04-27 11:23:17 +0200
commit514cb71a6a3e116c229c5dc874369f8632530dc7 (patch)
treedbc61581e04809fca19fefb3f4954b76e1e3e2c8 /include/inja/parser.hpp
Squashed 'deps/inja/' content from commit 811e173
git-subtree-dir: deps/inja git-subtree-split: 811e1730e13bca4ea1805a42d5f0a4b5c91046e1
Diffstat (limited to 'include/inja/parser.hpp')
-rw-r--r--include/inja/parser.hpp582
1 files changed, 582 insertions, 0 deletions
diff --git a/include/inja/parser.hpp b/include/inja/parser.hpp
new file mode 100644
index 0000000..6266c4a
--- /dev/null
+++ b/include/inja/parser.hpp
@@ -0,0 +1,582 @@
+// Copyright (c) 2020 Pantor. All rights reserved.
+
+#ifndef INCLUDE_INJA_PARSER_HPP_
+#define INCLUDE_INJA_PARSER_HPP_
+
+#include <limits>
+#include <stack>
+#include <string>
+#include <utility>
+#include <queue>
+#include <vector>
+
+#include "config.hpp"
+#include "exceptions.hpp"
+#include "function_storage.hpp"
+#include "lexer.hpp"
+#include "node.hpp"
+#include "template.hpp"
+#include "token.hpp"
+#include "utils.hpp"
+
+#include <nlohmann/json.hpp>
+
+namespace inja {
+
+/*!
+ * \brief Class for parsing an inja Template.
+ */
+class Parser {
+ const ParserConfig &config;
+
+ Lexer lexer;
+ TemplateStorage &template_storage;
+ const FunctionStorage &function_storage;
+
+ Token tok, peek_tok;
+ bool have_peek_tok {false};
+
+ size_t current_paren_level {0};
+ size_t current_bracket_level {0};
+ size_t current_brace_level {0};
+
+ nonstd::string_view json_literal_start;
+
+ BlockNode *current_block {nullptr};
+ ExpressionListNode *current_expression_list {nullptr};
+ std::stack<std::pair<FunctionNode*, size_t>> function_stack;
+
+ std::stack<std::shared_ptr<FunctionNode>> operator_stack;
+ std::stack<IfStatementNode*> if_statement_stack;
+ std::stack<ForStatementNode*> for_statement_stack;
+
+ void throw_parser_error(const std::string &message) {
+ INJA_THROW(ParserError(message, lexer.current_position()));
+ }
+
+ void get_next_token() {
+ if (have_peek_tok) {
+ tok = peek_tok;
+ have_peek_tok = false;
+ } else {
+ tok = lexer.scan();
+ }
+ }
+
+ void get_peek_token() {
+ if (!have_peek_tok) {
+ peek_tok = lexer.scan();
+ have_peek_tok = true;
+ }
+ }
+
+ void add_json_literal(const char* content_ptr) {
+ nonstd::string_view json_text(json_literal_start.data(), tok.text.data() - json_literal_start.data() + tok.text.size());
+ current_expression_list->rpn_output.emplace_back(std::make_shared<LiteralNode>(json::parse(json_text), json_text.data() - content_ptr));
+ }
+
+ bool parse_expression(Template &tmpl, Token::Kind closing) {
+ while (tok.kind != closing && tok.kind != Token::Kind::Eof) {
+ // Literals
+ switch (tok.kind) {
+ case Token::Kind::String: {
+ if (current_brace_level == 0 && current_bracket_level == 0) {
+ json_literal_start = tok.text;
+ add_json_literal(tmpl.content.c_str());
+ }
+
+ } break;
+ case Token::Kind::Number: {
+ if (current_brace_level == 0 && current_bracket_level == 0) {
+ json_literal_start = tok.text;
+ add_json_literal(tmpl.content.c_str());
+ }
+
+ } break;
+ case Token::Kind::LeftBracket: {
+ if (current_brace_level == 0 && current_bracket_level == 0) {
+ json_literal_start = tok.text;
+ }
+ current_bracket_level += 1;
+
+ } break;
+ case Token::Kind::LeftBrace: {
+ if (current_brace_level == 0 && current_bracket_level == 0) {
+ json_literal_start = tok.text;
+ }
+ current_brace_level += 1;
+
+ } break;
+ case Token::Kind::RightBracket: {
+ if (current_bracket_level == 0) {
+ throw_parser_error("unexpected ']'");
+ }
+
+ current_bracket_level -= 1;
+ if (current_brace_level == 0 && current_bracket_level == 0) {
+ add_json_literal(tmpl.content.c_str());
+ }
+
+ } break;
+ case Token::Kind::RightBrace: {
+ if (current_brace_level == 0) {
+ throw_parser_error("unexpected '}'");
+ }
+
+ current_brace_level -= 1;
+ if (current_brace_level == 0 && current_bracket_level == 0) {
+ add_json_literal(tmpl.content.c_str());
+ }
+
+ } break;
+ case Token::Kind::Id: {
+ get_peek_token();
+
+ // Json Literal
+ if (tok.text == static_cast<decltype(tok.text)>("true") || tok.text == static_cast<decltype(tok.text)>("false") || tok.text == static_cast<decltype(tok.text)>("null")) {
+ if (current_brace_level == 0 && current_bracket_level == 0) {
+ json_literal_start = tok.text;
+ add_json_literal(tmpl.content.c_str());
+ }
+
+ // Operator
+ } else if (tok.text == "and" || tok.text == "or" || tok.text == "in" || tok.text == "not") {
+ goto parse_operator;
+
+ // Functions
+ } else if (peek_tok.kind == Token::Kind::LeftParen) {
+ operator_stack.emplace(std::make_shared<FunctionNode>(static_cast<std::string>(tok.text), tok.text.data() - tmpl.content.c_str()));
+ function_stack.emplace(operator_stack.top().get(), current_paren_level);
+
+ // Variables
+ } else {
+ current_expression_list->rpn_output.emplace_back(std::make_shared<JsonNode>(static_cast<std::string>(tok.text), tok.text.data() - tmpl.content.c_str()));
+ }
+
+ // Operators
+ } break;
+ case Token::Kind::Equal:
+ case Token::Kind::NotEqual:
+ case Token::Kind::GreaterThan:
+ case Token::Kind::GreaterEqual:
+ case Token::Kind::LessThan:
+ case Token::Kind::LessEqual:
+ case Token::Kind::Plus:
+ case Token::Kind::Minus:
+ case Token::Kind::Times:
+ case Token::Kind::Slash:
+ case Token::Kind::Power:
+ case Token::Kind::Percent:
+ case Token::Kind::Dot: {
+
+ parse_operator:
+ FunctionStorage::Operation operation;
+ switch (tok.kind) {
+ case Token::Kind::Id: {
+ if (tok.text == "and") {
+ operation = FunctionStorage::Operation::And;
+ } else if (tok.text == "or") {
+ operation = FunctionStorage::Operation::Or;
+ } else if (tok.text == "in") {
+ operation = FunctionStorage::Operation::In;
+ } else if (tok.text == "not") {
+ operation = FunctionStorage::Operation::Not;
+ } else {
+ throw_parser_error("unknown operator in parser.");
+ }
+ } break;
+ case Token::Kind::Equal: {
+ operation = FunctionStorage::Operation::Equal;
+ } break;
+ case Token::Kind::NotEqual: {
+ operation = FunctionStorage::Operation::NotEqual;
+ } break;
+ case Token::Kind::GreaterThan: {
+ operation = FunctionStorage::Operation::Greater;
+ } break;
+ case Token::Kind::GreaterEqual: {
+ operation = FunctionStorage::Operation::GreaterEqual;
+ } break;
+ case Token::Kind::LessThan: {
+ operation = FunctionStorage::Operation::Less;
+ } break;
+ case Token::Kind::LessEqual: {
+ operation = FunctionStorage::Operation::LessEqual;
+ } break;
+ case Token::Kind::Plus: {
+ operation = FunctionStorage::Operation::Add;
+ } break;
+ case Token::Kind::Minus: {
+ operation = FunctionStorage::Operation::Subtract;
+ } break;
+ case Token::Kind::Times: {
+ operation = FunctionStorage::Operation::Multiplication;
+ } break;
+ case Token::Kind::Slash: {
+ operation = FunctionStorage::Operation::Division;
+ } break;
+ case Token::Kind::Power: {
+ operation = FunctionStorage::Operation::Power;
+ } break;
+ case Token::Kind::Percent: {
+ operation = FunctionStorage::Operation::Modulo;
+ } break;
+ case Token::Kind::Dot: {
+ operation = FunctionStorage::Operation::AtId;
+ } break;
+ default: {
+ throw_parser_error("unknown operator in parser.");
+ }
+ }
+ auto function_node = std::make_shared<FunctionNode>(operation, tok.text.data() - tmpl.content.c_str());
+
+ while (!operator_stack.empty() && ((operator_stack.top()->precedence > function_node->precedence) || (operator_stack.top()->precedence == function_node->precedence && function_node->associativity == FunctionNode::Associativity::Left)) && (operator_stack.top()->operation != FunctionStorage::Operation::ParenLeft)) {
+ current_expression_list->rpn_output.emplace_back(operator_stack.top());
+ operator_stack.pop();
+ }
+
+ operator_stack.emplace(function_node);
+
+ } break;
+ case Token::Kind::Comma: {
+ if (current_brace_level == 0 && current_bracket_level == 0) {
+ if (function_stack.empty()) {
+ throw_parser_error("unexpected ','");
+ }
+
+ function_stack.top().first->number_args += 1;
+ }
+
+ } break;
+ case Token::Kind::Colon: {
+ if (current_brace_level == 0 && current_bracket_level == 0) {
+ throw_parser_error("unexpected ':'");
+ }
+
+ } break;
+ case Token::Kind::LeftParen: {
+ current_paren_level += 1;
+ operator_stack.emplace(std::make_shared<FunctionNode>(FunctionStorage::Operation::ParenLeft, tok.text.data() - tmpl.content.c_str()));
+
+ get_peek_token();
+ if (peek_tok.kind == Token::Kind::RightParen) {
+ if (!function_stack.empty() && function_stack.top().second == current_paren_level - 1) {
+ function_stack.top().first->number_args = 0;
+ }
+ }
+
+ } break;
+ case Token::Kind::RightParen: {
+ current_paren_level -= 1;
+ while (!operator_stack.empty() && operator_stack.top()->operation != FunctionStorage::Operation::ParenLeft) {
+ current_expression_list->rpn_output.emplace_back(operator_stack.top());
+ operator_stack.pop();
+ }
+
+ if (!operator_stack.empty() && operator_stack.top()->operation == FunctionStorage::Operation::ParenLeft) {
+ operator_stack.pop();
+ }
+
+ if (!function_stack.empty() && function_stack.top().second == current_paren_level) {
+ auto func = function_stack.top().first;
+ auto function_data = function_storage.find_function(func->name, func->number_args);
+ if (function_data.operation == FunctionStorage::Operation::None) {
+ throw_parser_error("unknown function " + func->name);
+ }
+ func->operation = function_data.operation;
+ if (function_data.operation == FunctionStorage::Operation::Callback) {
+ func->callback = function_data.callback;
+ }
+
+ if (operator_stack.empty()) {
+ throw_parser_error("internal error at function " + func->name);
+ }
+
+ current_expression_list->rpn_output.emplace_back(operator_stack.top());
+ operator_stack.pop();
+ function_stack.pop();
+ }
+ }
+ default:
+ break;
+ }
+
+ get_next_token();
+ }
+
+ while (!operator_stack.empty()) {
+ current_expression_list->rpn_output.emplace_back(operator_stack.top());
+ operator_stack.pop();
+ }
+
+ return true;
+ }
+
+ bool parse_statement(Template &tmpl, Token::Kind closing, nonstd::string_view path) {
+ if (tok.kind != Token::Kind::Id) {
+ return false;
+ }
+
+ if (tok.text == static_cast<decltype(tok.text)>("if")) {
+ get_next_token();
+
+ auto if_statement_node = std::make_shared<IfStatementNode>(current_block, tok.text.data() - tmpl.content.c_str());
+ current_block->nodes.emplace_back(if_statement_node);
+ if_statement_stack.emplace(if_statement_node.get());
+ current_block = &if_statement_node->true_statement;
+ current_expression_list = &if_statement_node->condition;
+
+ if (!parse_expression(tmpl, closing)) {
+ return false;
+ }
+
+ } else if (tok.text == static_cast<decltype(tok.text)>("else")) {
+ if (if_statement_stack.empty()) {
+ throw_parser_error("else without matching if");
+ }
+ auto &if_statement_data = if_statement_stack.top();
+ get_next_token();
+
+ if_statement_data->has_false_statement = true;
+ current_block = &if_statement_data->false_statement;
+
+ // Chained else if
+ if (tok.kind == Token::Kind::Id && tok.text == static_cast<decltype(tok.text)>("if")) {
+ get_next_token();
+
+ auto if_statement_node = std::make_shared<IfStatementNode>(true, current_block, tok.text.data() - tmpl.content.c_str());
+ current_block->nodes.emplace_back(if_statement_node);
+ if_statement_stack.emplace(if_statement_node.get());
+ current_block = &if_statement_node->true_statement;
+ current_expression_list = &if_statement_node->condition;
+
+ if (!parse_expression(tmpl, closing)) {
+ return false;
+ }
+ }
+
+ } else if (tok.text == static_cast<decltype(tok.text)>("endif")) {
+ if (if_statement_stack.empty()) {
+ throw_parser_error("endif without matching if");
+ }
+
+ // Nested if statements
+ while (if_statement_stack.top()->is_nested) {
+ if_statement_stack.pop();
+ }
+
+ auto &if_statement_data = if_statement_stack.top();
+ get_next_token();
+
+ current_block = if_statement_data->parent;
+ if_statement_stack.pop();
+
+ } else if (tok.text == static_cast<decltype(tok.text)>("for")) {
+ get_next_token();
+
+ // options: for a in arr; for a, b in obj
+ if (tok.kind != Token::Kind::Id) {
+ throw_parser_error("expected id, got '" + tok.describe() + "'");
+ }
+
+ Token value_token = tok;
+ get_next_token();
+
+ // Object type
+ std::shared_ptr<ForStatementNode> for_statement_node;
+ if (tok.kind == Token::Kind::Comma) {
+ get_next_token();
+ if (tok.kind != Token::Kind::Id) {
+ throw_parser_error("expected id, got '" + tok.describe() + "'");
+ }
+
+ Token key_token = std::move(value_token);
+ value_token = tok;
+ get_next_token();
+
+ for_statement_node = std::make_shared<ForObjectStatementNode>(static_cast<std::string>(key_token.text), static_cast<std::string>(value_token.text), current_block, tok.text.data() - tmpl.content.c_str());
+
+ // Array type
+ } else {
+ for_statement_node = std::make_shared<ForArrayStatementNode>(static_cast<std::string>(value_token.text), current_block, tok.text.data() - tmpl.content.c_str());
+ }
+
+ current_block->nodes.emplace_back(for_statement_node);
+ for_statement_stack.emplace(for_statement_node.get());
+ current_block = &for_statement_node->body;
+ current_expression_list = &for_statement_node->condition;
+
+ if (tok.kind != Token::Kind::Id || tok.text != static_cast<decltype(tok.text)>("in")) {
+ throw_parser_error("expected 'in', got '" + tok.describe() + "'");
+ }
+ get_next_token();
+
+ if (!parse_expression(tmpl, closing)) {
+ return false;
+ }
+
+ } else if (tok.text == static_cast<decltype(tok.text)>("endfor")) {
+ if (for_statement_stack.empty()) {
+ throw_parser_error("endfor without matching for");
+ }
+
+ auto &for_statement_data = for_statement_stack.top();
+ get_next_token();
+
+ current_block = for_statement_data->parent;
+ for_statement_stack.pop();
+
+ } else if (tok.text == static_cast<decltype(tok.text)>("include")) {
+ get_next_token();
+
+ if (tok.kind != Token::Kind::String) {
+ throw_parser_error("expected string, got '" + tok.describe() + "'");
+ }
+
+ // Build the relative path
+ json json_name = json::parse(tok.text);
+ std::string pathname = static_cast<std::string>(path);
+ pathname += json_name.get_ref<const std::string &>();
+ if (pathname.compare(0, 2, "./") == 0) {
+ pathname.erase(0, 2);
+ }
+ // sys::path::remove_dots(pathname, true, sys::path::Style::posix);
+
+ if (config.search_included_templates_in_files && template_storage.find(pathname) == template_storage.end()) {
+ auto include_template = Template(load_file(pathname));
+ template_storage.emplace(pathname, include_template);
+ parse_into_template(template_storage[pathname], pathname);
+ }
+
+ current_block->nodes.emplace_back(std::make_shared<IncludeStatementNode>(pathname, tok.text.data() - tmpl.content.c_str()));
+
+ get_next_token();
+
+ } else if (tok.text == static_cast<decltype(tok.text)>("set")) {
+ get_next_token();
+
+ if (tok.kind != Token::Kind::Id) {
+ throw_parser_error("expected variable name, got '" + tok.describe() + "'");
+ }
+
+ std::string key = static_cast<std::string>(tok.text);
+ get_next_token();
+
+ auto set_statement_node = std::make_shared<SetStatementNode>(key, tok.text.data() - tmpl.content.c_str());
+ current_block->nodes.emplace_back(set_statement_node);
+ current_expression_list = &set_statement_node->expression;
+
+ if (tok.text != static_cast<decltype(tok.text)>("=")) {
+ throw_parser_error("expected '=', got '" + tok.describe() + "'");
+ }
+ get_next_token();
+
+ if (!parse_expression(tmpl, closing)) {
+ return false;
+ }
+
+ } else {
+ return false;
+ }
+ return true;
+ }
+
+ void parse_into(Template &tmpl, nonstd::string_view path) {
+ lexer.start(tmpl.content);
+ current_block = &tmpl.root;
+
+ for (;;) {
+ get_next_token();
+ switch (tok.kind) {
+ case Token::Kind::Eof: {
+ if (!if_statement_stack.empty()) {
+ throw_parser_error("unmatched if");
+ }
+ if (!for_statement_stack.empty()) {
+ throw_parser_error("unmatched for");
+ }
+ } return;
+ case Token::Kind::Text: {
+ current_block->nodes.emplace_back(std::make_shared<TextNode>(tok.text.data() - tmpl.content.c_str(), tok.text.size()));
+ } break;
+ case Token::Kind::StatementOpen: {
+ get_next_token();
+ if (!parse_statement(tmpl, Token::Kind::StatementClose, path)) {
+ throw_parser_error("expected statement, got '" + tok.describe() + "'");
+ }
+ if (tok.kind != Token::Kind::StatementClose) {
+ throw_parser_error("expected statement close, got '" + tok.describe() + "'");
+ }
+ } break;
+ case Token::Kind::LineStatementOpen: {
+ get_next_token();
+ if (!parse_statement(tmpl, Token::Kind::LineStatementClose, path)) {
+ throw_parser_error("expected statement, got '" + tok.describe() + "'");
+ }
+ if (tok.kind != Token::Kind::LineStatementClose && tok.kind != Token::Kind::Eof) {
+ throw_parser_error("expected line statement close, got '" + tok.describe() + "'");
+ }
+ } break;
+ case Token::Kind::ExpressionOpen: {
+ get_next_token();
+
+ auto expression_list_node = std::make_shared<ExpressionListNode>(tok.text.data() - tmpl.content.c_str());
+ current_block->nodes.emplace_back(expression_list_node);
+ current_expression_list = expression_list_node.get();
+
+ if (!parse_expression(tmpl, Token::Kind::ExpressionClose)) {
+ throw_parser_error("expected expression, got '" + tok.describe() + "'");
+ }
+
+ if (tok.kind != Token::Kind::ExpressionClose) {
+ throw_parser_error("expected expression close, got '" + tok.describe() + "'");
+ }
+ } break;
+ case Token::Kind::CommentOpen: {
+ get_next_token();
+ if (tok.kind != Token::Kind::CommentClose) {
+ throw_parser_error("expected comment close, got '" + tok.describe() + "'");
+ }
+ } break;
+ default: {
+ throw_parser_error("unexpected token '" + tok.describe() + "'");
+ } break;
+ }
+ }
+ }
+
+
+public:
+ explicit Parser(const ParserConfig &parser_config, const LexerConfig &lexer_config,
+ TemplateStorage &template_storage, const FunctionStorage &function_storage)
+ : config(parser_config), lexer(lexer_config), template_storage(template_storage), function_storage(function_storage) { }
+
+ Template parse(nonstd::string_view input, nonstd::string_view path) {
+ auto result = Template(static_cast<std::string>(input));
+ parse_into(result, path);
+ return result;
+ }
+
+ Template parse(nonstd::string_view input) {
+ return parse(input, "./");
+ }
+
+ void parse_into_template(Template& tmpl, nonstd::string_view filename) {
+ nonstd::string_view path = filename.substr(0, filename.find_last_of("/\\") + 1);
+
+ // StringRef path = sys::path::parent_path(filename);
+ auto sub_parser = Parser(config, lexer.get_config(), template_storage, function_storage);
+ sub_parser.parse_into(tmpl, path);
+ }
+
+ std::string load_file(nonstd::string_view filename) {
+ std::ifstream file;
+ open_file_or_throw(static_cast<std::string>(filename), file);
+ std::string text((std::istreambuf_iterator<char>(file)), std::istreambuf_iterator<char>());
+ return text;
+ }
+};
+
+} // namespace inja
+
+#endif // INCLUDE_INJA_PARSER_HPP_