diff options
Diffstat (limited to 'src/compiler/parser.c')
-rw-r--r-- | src/compiler/parser.c | 1550 |
1 files changed, 1550 insertions, 0 deletions
diff --git a/src/compiler/parser.c b/src/compiler/parser.c new file mode 100644 index 0000000..4f31e0b --- /dev/null +++ b/src/compiler/parser.c @@ -0,0 +1,1550 @@ +/* + * FlatBuffers IDL parser. + * + * Originally based on the numeric parser in the Luthor lexer project. + * + * We are moving away from TDOP approach because the grammer doesn't + * really benefit from it. We use the same overall framework. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <assert.h> +#include <stdarg.h> + +#include "semantics.h" +#include "codegen.h" +#include "fileio.h" +#include "pstrutil.h" +#include "flatcc/portable/pparseint.h" + +void fb_default_error_out(void *err_ctx, const char *buf, size_t len) +{ + (void)err_ctx; + + fwrite(buf, 1, len, stderr); +} + +int fb_print_error(fb_parser_t *P, const char * format, ...) +{ + int n; + va_list ap; + char buf[ERROR_BUFSIZ]; + + va_start (ap, format); + n = vsnprintf (buf, ERROR_BUFSIZ, format, ap); + va_end (ap); + if (n >= ERROR_BUFSIZ) { + strcpy(buf + ERROR_BUFSIZ - 5, "...\n"); + n = ERROR_BUFSIZ - 1; + } + P->error_out(P->error_ctx, buf, (size_t)n); + return n; +} + +const char *error_find_file_of_token(fb_parser_t *P, fb_token_t *t) +{ + /* + * Search token in dependent buffers if not in current token + * buffer. We can do this as a linear search because we limit the + * number of output errors. + */ + while (P) { + if (P->ts <= t && P->te > t) { + return P->schema.errorname; + } + P = P->dependencies; + } + return ""; +} + +void error_report(fb_parser_t *P, fb_token_t *t, const char *msg, fb_token_t *peer, const char *s, size_t len) +{ + const char *file, *peer_file; + + if (t && !s) { + s = t->text; + len = (size_t)t->len; + } + if (!msg) { + msg = ""; + } + if (!s) { + s = ""; + len = 0; + } + if (t && !peer) { + file = error_find_file_of_token(P, t); + fb_print_error(P, "%s:%ld:%ld: error: '%.*s': %s\n", + file, (long)t->linenum, (long)t->pos, len, s, msg); + } else if (t && peer) { + file = error_find_file_of_token(P, t); + peer_file = error_find_file_of_token(P, peer); + fb_print_error(P, "%s:%ld:%ld: error: '%.*s': %s: %s:%ld:%ld: '%.*s'\n", + file, (long)t->linenum, (long)t->pos, len, s, msg, + peer_file, (long)peer->linenum, (long)peer->pos, (int)peer->len, peer->text); + } else if (!t && !peer) { + fb_print_error(P, "error: %s\n", msg); + } else if (peer) { + peer_file = error_find_file_of_token(P, peer); + fb_print_error(P, "error: %s: %s:%ld:%ld: '%.*s'\n", + msg, + peer_file, (long)peer->linenum, (long)peer->pos, (int)peer->len, peer->text); + } else { + fb_print_error(P, "internal error: unexpected state\n"); + } + ++P->failed; +} + +void error_ref_sym(fb_parser_t *P, fb_ref_t *ref, const char *msg, fb_symbol_t *s2) +{ + fb_ref_t *p; + char buf[FLATCC_MAX_IDENT_SHOW + 1]; + size_t k = FLATCC_MAX_IDENT_SHOW; + size_t n = 0; + size_t n0 = 0; + int truncated = 0; + + p = ref; + while (p && k > 0) { + if (n0 > 0) { + buf[n0] = '.'; + --k; + ++n0; + } + n = (size_t)p->ident->len; + if (k < n) { + n = k; + truncated = 1; + } + memcpy(buf + n0, p->ident->text, n); + k -= n; + n0 += n; + p = p->link; + } + if (p) truncated = 1; + buf[n0] = '\0'; + if (n0 > 0) { + --n0; + } + if (truncated) { + memcpy(buf + FLATCC_MAX_IDENT_SHOW + 1 - 4, "...\0", 4); + n0 = FLATCC_MAX_IDENT_SHOW; + } + error_report(P, ref->ident, msg, s2 ? s2->ident : 0, buf, n0); +} + +//#define LEX_DEBUG + +/* Flatbuffers reserve keywords. */ +#define LEX_KEYWORDS + +#define LEX_C_BLOCK_COMMENT +/* + * Flatbuffers also support /// on a single line for documentation but + * we can handle that within the normal line comment parsing logic. + */ +#define LEX_C99_LINE_COMMENT +/* + * String escapes are not defined in fb schema but it only uses strings + * for attribute, namespace, file ext, and file id. For JSON objects we + * use C string escapes but control characters must be detected. + */ +#define LEX_C_STRING + +/* Accept numbers like -0x42 as integer literals. */ +#define LEX_HEX_NUMERIC + +#define lex_isblank(c) ((c) == ' ' || (c) == '\t') + +#include "parser.h" + +#ifdef LEX_DEBUG + +static void print_token(fb_token_t *t) +{ + lex_fprint_token(stderr, t->id, t->text, t->text + t->len, t->linenum, t->pos); +} + +static void debug_token(const char *info, fb_token_t *t) +{ + fprintf(stderr, "%s\n ", info); + print_token(t); +} +#else +#define debug_token(info, t) ((void)0) +#endif + +static void revert_metadata(fb_metadata_t **list) +{ + REVERT_LIST(fb_metadata_t, link, list); +} + +static void revert_symbols(fb_symbol_t **list) +{ + REVERT_LIST(fb_symbol_t, link, list); +} + +static void revert_names(fb_name_t **list) +{ + REVERT_LIST(fb_name_t, link, list); +} + +static inline fb_doc_t *fb_add_doc(fb_parser_t *P, fb_token_t *t) +{ + fb_doc_t *p; + + p = new_elem(P, sizeof(*p)); + p->ident = t; + p->link = P->doc; + P->doc = p; + return p; +} + +#define fb_assign_doc(P, p) {\ + revert_symbols(&P->doc); p->doc = P->doc; P->doc = 0; } + +static inline fb_compound_type_t *fb_add_table(fb_parser_t *P) +{ + fb_compound_type_t *p; + + p = new_elem(P, sizeof(*p)); + p->symbol.link = P->schema.symbols; + p->symbol.kind = fb_is_table; + P->schema.symbols = &p->symbol; + p->scope = P->current_scope; + fb_assign_doc(P, p); + return p; +} + +static inline fb_compound_type_t *fb_add_struct(fb_parser_t *P) +{ + fb_compound_type_t *p; + + p = new_elem(P, sizeof(*p)); + p->symbol.link = P->schema.symbols; + p->symbol.kind = fb_is_struct; + P->schema.symbols = &p->symbol; + p->scope = P->current_scope; + fb_assign_doc(P, p); + return p; +} + +static inline fb_compound_type_t *fb_add_rpc_service(fb_parser_t *P) +{ + fb_compound_type_t *p; + + p = new_elem(P, sizeof(*p)); + p->symbol.link = P->schema.symbols; + p->symbol.kind = fb_is_rpc_service; + P->schema.symbols = &p->symbol; + p->scope = P->current_scope; + fb_assign_doc(P, p); + return p; +} + +static inline fb_compound_type_t *fb_add_enum(fb_parser_t *P) +{ + fb_compound_type_t *p; + + p = new_elem(P, sizeof(*p)); + p->symbol.link = P->schema.symbols; + p->symbol.kind = fb_is_enum; + P->schema.symbols = &p->symbol; + p->scope = P->current_scope; + fb_assign_doc(P, p); + return p; +} + +static inline fb_compound_type_t *fb_add_union(fb_parser_t *P) +{ + fb_compound_type_t *p; + + p = new_elem(P, sizeof(*p)); + p->symbol.link = P->schema.symbols; + p->symbol.kind = fb_is_union; + P->schema.symbols = &p->symbol; + p->scope = P->current_scope; + fb_assign_doc(P, p); + return p; +} + +static inline fb_ref_t *fb_add_ref(fb_parser_t *P, fb_token_t *t) +{ + fb_ref_t *p; + + p = new_elem(P, sizeof(*p)); + p->ident = t; + return p; +} + +static inline fb_attribute_t *fb_add_attribute(fb_parser_t *P) +{ + fb_attribute_t *p; + + p = new_elem(P, sizeof(*p)); + p->name.link = P->schema.attributes; + P->schema.attributes = &p->name; + return p; +} + +static inline fb_include_t *fb_add_include(fb_parser_t *P) +{ + fb_include_t *p; + p = new_elem(P, sizeof(*p)); + p->link = P->schema.includes; + return P->schema.includes = p; +} + +static inline fb_scope_t *fb_add_scope(fb_parser_t *P, fb_ref_t *name) +{ + fb_scope_t *p; + + p = fb_scope_table_find(&P->schema.root_schema->scope_index, name, 0); + if (p) { + return p; + } + p = new_elem(P, sizeof(*p)); + p->name = name; + p->prefix = P->schema.prefix; + + fb_scope_table_insert_item(&P->schema.root_schema->scope_index, p, ht_keep); + return p; +} + +static inline fb_metadata_t *fb_add_metadata(fb_parser_t *P, fb_metadata_t **metadata) +{ + fb_metadata_t *p; + p = new_elem(P, sizeof(*p)); + p->link = *metadata; + return *metadata = p; +} + +static inline fb_member_t *fb_add_member(fb_parser_t *P, fb_symbol_t **members) +{ + fb_member_t *p; + p = new_elem(P, sizeof(*p)); + p->symbol.link = *members; + p->symbol.kind = fb_is_member; + *members = (fb_symbol_t *)p; + fb_assign_doc(P, p); + return p; +} + +static inline int is_end(fb_token_t *t) +{ + return t->id == LEX_TOK_EOF; +} + +static fb_token_t *next(fb_parser_t *P) +{ +again: + ++P->token; + if (P->token == P->te) { + /* We keep returning end of token to help binary operators etc., if any. */ + --P->token; + assert(0); + switch (P->token->id) { + case LEX_TOK_EOS: case LEX_TOK_EOB: case LEX_TOK_EOF: + P->token->id = LEX_TOK_EOF; + return P->token; + } + error_tok(P, P->token, "unexpected end of input"); + } + if (P->token->id == tok_kw_doc_comment) { + /* Note: we can have blanks that are control characters here, such as \t. */ + fb_add_doc(P, P->token); + goto again; + } + debug_token("next", P->token); + return P->token; +} + +static void recover(fb_parser_t *P, long token_id, int consume) +{ + while (!is_end(P->token)) { + if (P->token->id == token_id) { + if (consume) { + next(P); + } + P->doc = 0; + return; + } + next(P); + } +} + +static void recover2(fb_parser_t *P, long token_id, int consume, long token_id_2, int consume_2) +{ + while (!is_end(P->token)) { + if (P->token->id == token_id) { + if (consume) { + next(P); + } + P->doc = 0; + return; + } + if (P->token->id == token_id_2) { + if (consume_2) { + next(P); + } + P->doc = 0; + return; + } + next(P); + } +} + +static inline fb_token_t *optional(fb_parser_t *P, long id) { + fb_token_t *t = 0; + if (P->token->id == id) { + t = P->token; + next(P); + } + return t; +} + +static inline fb_token_t *match(fb_parser_t *P, long id, char *msg) { + fb_token_t *t = 0; + if (P->token->id == id) { + t = P->token; + next(P); + } else { + error_tok(P, P->token, msg); + } + return t; +} + +/* + * When a keyword should also be accepted as an identifier. + * This is useful for JSON where field naems are visible. + * Since field names are not referenced within the schema, + * this is generally safe. Enums can also be resererved but + * they can then not be used as default values. Table names + * and other type names should not be remapped as they can then + * not by used as a type name for other fields. + */ +#if FLATCC_ALLOW_KW_FIELDS +static inline void remap_field_ident(fb_parser_t *P) +{ + if (P->token->id >= LEX_TOK_KW_BASE && P->token->id < LEX_TOK_KW_END) { + P->token->id = LEX_TOK_ID; + } +} +#else +static inline void remap_field_ident(fb_parser_t *P) { (void)P; } +#endif + +#if FLATCC_ALLOW_KW_ENUMS +static inline void remap_enum_ident(fb_parser_t *P) +{ + if (P->token->id >= LEX_TOK_KW_BASE && P->token->id < LEX_TOK_KW_END) { + P->token->id = LEX_TOK_ID; + } +} +#else +static inline void remap_enum_ident(fb_parser_t *P) { (void)P; } +#endif + +static fb_token_t *advance(fb_parser_t *P, long id, const char *msg, fb_token_t *peer) +{ + /* + * `advance` is generally used at end of statements so it is a + * convenient place to get rid of rogue doc comments we can't attach + * to anything meaningful. + */ + P->doc = 0; + if (P->token->id != id) { + error_tok_2(P, P->token, msg, peer); + return P->token; + } + return next(P); +} + +static void read_integer_value(fb_parser_t *P, fb_token_t *t, fb_value_t *v, int sign) +{ + int status; + + v->type = vt_uint; + /* The token does not store the sign internally. */ + parse_integer(t->text, (size_t)t->len, &v->u, &status); + if (status != PARSE_INTEGER_UNSIGNED) { + v->type = vt_invalid; + error_tok(P, t, "invalid integer format"); + } + if (sign) { + v->i = -(int64_t)v->u; + v->type = vt_int; +#ifdef FLATCC_FAIL_ON_INT_SIGN_OVERFLOW + /* Sometimes we might want this, so don't fail by default. */ + if (v->i > 0) { + v->type = vt_invalid; + error_tok(P, t, "sign overflow in integer format"); + } +#endif + } +} + +static void read_hex_value(fb_parser_t *P, fb_token_t *t, fb_value_t *v, int sign) +{ + int status; + + v->type = vt_uint; + /* The token does not store the sign internally. */ + parse_hex_integer(t->text, (size_t)t->len, &v->u, &status); + if (status != PARSE_INTEGER_UNSIGNED) { + v->type = vt_invalid; + error_tok(P, t, "invalid hex integer format"); + } + if (sign) { + v->i = -(int64_t)v->u; + v->type = vt_int; +#ifdef FLATCC_FAIL_ON_INT_SIGN_OVERFLOW + /* Sometimes we might want this, so don't fail by default. */ + if (v->i > 0) { + v->type = vt_invalid; + error_tok(P, t, "sign overflow in hex integer format"); + } +#endif + } +} + +static void read_float_value(fb_parser_t *P, fb_token_t *t, fb_value_t *v, int sign) +{ + char *end; + + v->type = vt_float; + v->f = strtod(t->text, &end); + if (end != t->text + t->len) { + v->type = vt_invalid; + error_tok(P, t, "invalid float format"); + } else if (t->text[0] == '.') { + v->type = vt_invalid; + /* The FB spec requires this, in line with the JSON format. */ + error_tok(P, t, "numeric values must start with a digit"); + } else if (sign) { + v->f = -v->f; + } +} + +/* + * We disallow escape characters, newlines and other control characters, + * but especially escape characters because they would require us to + * reallocate the string and convert the escaped characters. We also + * disallow non-utf8 characters, but we do not check for it. The tab + * character could meaningfully be accepted, but we don't. + * + * String literals are only used to name attributes, namespaces, + * file identifiers and file externsions, so we really have no need + * for these extra featuresescape . + * + * JSON strings should be handled separately, if or when supported - + * either by converting escapes and reallocating the string, or + * simply by ignoring the escape errors and use the string unmodified. + */ +static void parse_string_literal(fb_parser_t *P, fb_value_t *v) +{ + fb_token_t *t; + + v->type = vt_string; + v->s.s = 0; + v->s.len = 0; + + for (;;) { + t = P->token; + switch (t->id) { + case LEX_TOK_STRING_PART: + if (v->s.s == 0) { + v->s.s = (char *)t->text; + } + break; + case LEX_TOK_STRING_ESCAPE: + v->type = vt_invalid; + error_tok(P, t, "escape not allowed in strings"); + break; + case LEX_TOK_STRING_CTRL: + v->type = vt_invalid; + error_tok_as_string(P, t, "control characters not allowed in strings", "?", 1); + break; + case LEX_TOK_STRING_NEWLINE: + v->type = vt_invalid; + error_tok(P, t, "newline not allowed in strings"); + break; + case LEX_TOK_STRING_UNTERMINATED: + case LEX_TOK_STRING_END: + goto done; + + default: + error_tok(P, t, "internal error: unexpected token in string"); + v->type = vt_invalid; + goto done; + } + next(P); + } +done: + /* + * If we were to ignore all errors, we would get the full + * string as is excluding delimiting quotes. + */ + if (v->s.s) { + v->s.len = (int)(P->token->text - v->s.s); + } + if (!match(P, LEX_TOK_STRING_END, "unterminated string")) { + v->type = vt_invalid; + } +} + +/* Current token must be an identifier. */ +static void parse_ref(fb_parser_t *P, fb_ref_t **ref) +{ + *ref = fb_add_ref(P, P->token); + next(P); + ref = &((*ref)->link); + while (optional(P, '.')) { + if (P->token->id != LEX_TOK_ID) { + error_tok(P, P->token, "namespace prefix expected identifier"); + break; + } + *ref = fb_add_ref(P, P->token); + ref = &((*ref)->link); + next(P); + } +} + +/* `flags` */ +enum { allow_string_value = 1, allow_id_value = 2, allow_null_value = 4 }; +static void parse_value(fb_parser_t *P, fb_value_t *v, int flags, const char *error_msg) +{ + fb_token_t *t; + fb_token_t *sign; + + sign = optional(P, '-'); + t = P->token; + + switch (t->id) { + case LEX_TOK_HEX: + read_hex_value(P, t, v, sign != 0); + break; + case LEX_TOK_INT: + read_integer_value(P, t, v, sign != 0); + break; + case LEX_TOK_FLOAT: + read_float_value(P, t, v, sign != 0); + break; + case tok_kw_true: + v->b = 1; + v->type = vt_bool; + break; + case tok_kw_false: + v->b = 0; + v->type = vt_bool; + break; + case tok_kw_null: + if (!(flags & allow_null_value)) { + v->type = vt_invalid; + error_tok(P, t, error_msg); + return; + } + v->type = vt_null; + break; + case LEX_TOK_STRING_BEGIN: + next(P); + parse_string_literal(P, v); + if (!(flags & allow_string_value)) { + v->type = vt_invalid; + error_tok(P, t, error_msg); + return; + } + if (sign) { + v->type = vt_invalid; + error_tok(P, t, "string constants cannot be signed"); + return; + } + return; + case LEX_TOK_ID: + parse_ref(P, &v->ref); + v->type = vt_name_ref; + if (sign) { + v->type = vt_invalid; + /* Technically they could, but we do not allow it. */ + error_tok(P, t, "named values cannot be signed"); + } + return; + default: + /* We might have consumed a sign, but never mind that. */ + error_tok(P, t, error_msg); + return; + } + if (sign && v->type == vt_bool) { + v->type = vt_invalid; + error_tok(P, t, "boolean constants cannot be signed"); + } + next(P); +} + +static void parse_fixed_array_size(fb_parser_t *P, fb_token_t *ttype, fb_value_t *v) +{ + const char *error_msg = "fixed length array length expected to be an unsigned integer"; + fb_value_t vsize; + fb_token_t *tlen = P->token; + + parse_value(P, &vsize, 0, error_msg); + if (vsize.type != vt_uint) { + error_tok(P, tlen, error_msg); + v->type = vt_invalid; + return; + } + if (v->type == vt_invalid) return; + switch (v->type) { + case vt_vector_type: + v->type = vt_fixed_array_type; + break; + case vt_vector_type_ref: + v->type = vt_fixed_array_type_ref; + break; + case vt_vector_string_type: + v->type = vt_fixed_array_string_type; + break; + case vt_invalid: + return; + default: + error_tok(P, ttype, "invalid fixed length array type"); + v->type = vt_invalid; + return; + } + if (vsize.u == 0) { + error_tok(P, tlen, "fixed length array length cannot be 0"); + v->type = vt_invalid; + return; + } + /* + * This allows for safe 64-bit multiplication by elements no + * larger than 2^32-1 and also fits into the value len field. + * without extra size cost. + */ + if (vsize.u > UINT32_MAX) { + error_tok(P, tlen, "fixed length array length overflow"); + v->type = vt_invalid; + return; + } + v->len = (uint32_t)vsize.u; +} + +/* ':' must already be matched */ +static void parse_type(fb_parser_t *P, fb_value_t *v) +{ + fb_token_t *t = 0; + fb_token_t *ttype = 0; + fb_token_t *t0 = P->token; + int vector = 0; + + v->len = 1; + v->type = vt_invalid; + while ((t = optional(P, '['))) { + ++vector; + } + if (vector > 1) { + error_tok(P, t0, "vector type can only be one-dimensional"); + } + ttype = P->token; + switch (ttype->id) { + case tok_kw_int: + case tok_kw_bool: + case tok_kw_byte: + case tok_kw_long: + case tok_kw_uint: + case tok_kw_float: + case tok_kw_short: + case tok_kw_char: + case tok_kw_ubyte: + case tok_kw_ulong: + case tok_kw_ushort: + case tok_kw_double: + case tok_kw_int8: + case tok_kw_int16: + case tok_kw_int32: + case tok_kw_int64: + case tok_kw_uint8: + case tok_kw_uint16: + case tok_kw_uint32: + case tok_kw_uint64: + case tok_kw_float32: + case tok_kw_float64: + v->t = P->token; + v->type = vector ? vt_vector_type : vt_scalar_type; + next(P); + break; + case tok_kw_string: + v->t = P->token; + v->type = vector ? vt_vector_string_type : vt_string_type; + next(P); + break; + case LEX_TOK_ID: + parse_ref(P, &v->ref); + v->type = vector ? vt_vector_type_ref : vt_type_ref; + break; + case ']': + error_tok(P, t, "vector type cannot be empty"); + break; + default: + error_tok(P, ttype, "invalid type specifier"); + break; + } + if (vector && optional(P, ':')) { + parse_fixed_array_size(P, ttype, v); + } + while (optional(P, ']') && vector--) { + } + if (vector) { + error_tok_2(P, t, "vector type missing ']' to match", t0); + } + if ((t = optional(P, ']'))) { + error_tok_2(P, t, "extra ']' not matching", t0); + while (optional(P, ']')) { + } + } + if (ttype->id == tok_kw_char && v->type != vt_invalid) { + if (v->type != vt_fixed_array_type) { + error_tok(P, ttype, "char can only be used as a fixed length array type [char:<n>]"); + v->type = vt_invalid; + } + } +} + +static fb_metadata_t *parse_metadata(fb_parser_t *P) +{ + fb_token_t *t, *t0; + fb_metadata_t *md = 0; + + if (!(t0 = optional(P, '('))) { + return 0; + } + if ((t = optional(P, LEX_TOK_ID))) + for (;;) { + fb_add_metadata(P, &md); + md->ident = t; + if (optional(P, ':')) { + parse_value(P, &md->value, allow_string_value, "scalar or string value expected"); + } + if (P->failed >= FLATCC_MAX_ERRORS) { + return md; + } + if (!optional(P, ',')) { + break; + } + if (!(t = match(P, LEX_TOK_ID, "attribute name expected identifier after ','"))) { + break; + } + } + advance(P, ')', "metadata expected ')' to match", t0); + revert_metadata(&md); + return md; +} + +static void parse_field(fb_parser_t *P, fb_member_t *fld) +{ + fb_token_t *t; + + remap_field_ident(P); + if (!(t = match(P, LEX_TOK_ID, "field expected identifier"))) { + goto fail; + } + fld->symbol.ident = t; + if (!match(P, ':', "field expected ':' before mandatory type")) { + goto fail; + } + parse_type(P, &fld->type); + if (optional(P, '=')) { + /* + * Because types can be named references, we do not check the + * default assignment before the schema is fully parsed. + * We allow the initializer to be a name in case it is an enum + * name. + */ + parse_value(P, &fld->value, allow_id_value | allow_null_value, "initializer must be of scalar type or null"); + } + fld->metadata = parse_metadata(P); + advance(P, ';', "field must be terminated with ';'", 0); + return; +fail: + recover2(P, ';', 1, '}', 0); +} + +static void parse_method(fb_parser_t *P, fb_member_t *fld) +{ + fb_token_t *t; + if (!(t = match(P, LEX_TOK_ID, "method expected identifier"))) { + goto fail; + } + fld->symbol.ident = t; + if (!match(P, '(', "method expected '(' after identifier")) { + goto fail; + } + parse_type(P, &fld->req_type); + if (!match(P, ')', "method expected ')' after request type")) { + goto fail; + } + if (!match(P, ':', "method expected ':' before mandatory response type")) { + goto fail; + } + parse_type(P, &fld->type); + if ((t = optional(P, '='))) { + error_tok(P, t, "method does not accept an initializer"); + goto fail; + } + fld->metadata = parse_metadata(P); + advance(P, ';', "method must be terminated with ';'", 0); + return; +fail: + recover2(P, ';', 1, '}', 0); +} + +/* `enum` must already be matched. */ +static void parse_enum_decl(fb_parser_t *P, fb_compound_type_t *ct) +{ + fb_token_t *t, *t0; + fb_member_t *member; + + if (!(ct->symbol.ident = match(P, LEX_TOK_ID, "enum declaration expected identifier"))) { + goto fail; + } + if (optional(P, ':')) { + parse_type(P, &ct->type); + if (ct->type.type != vt_scalar_type) { + error_tok(P, ct->type.t, "integral type expected"); + } else { + switch (ct->type.t->id) { + case tok_kw_float: + case tok_kw_double: + case tok_kw_float32: + case tok_kw_float64: + error_tok(P, ct->type.t, "integral type expected"); + break; + default: + break; + } + } + } + ct->metadata = parse_metadata(P); + if (!((t0 = match(P, '{', "enum declaration expected '{'")))) { + goto fail; + } + for (;;) { + remap_enum_ident(P); + if (!(t = match(P, LEX_TOK_ID, + "member identifier expected"))) { + goto fail; + } + if (P->failed >= FLATCC_MAX_ERRORS) { + goto fail; + } + member = fb_add_member(P, &ct->members); + member->symbol.ident = t; + if (optional(P, '=')) { + t = P->token; + parse_value(P, &member->value, 0, "integral constant expected"); + /* Leave detailed type (e.g. no floats) and range checking to a later stage. */ + } + /* + * Trailing comma is optional in flatc but not in grammar, we + * follow flatc. + */ + if (!optional(P, ',') || P->token->id == '}') { + break; + } + P->doc = 0; + } + if (t0) { + advance(P, '}', "enum missing closing '}' to match", t0); + } + revert_symbols(&ct->members); + return; +fail: + recover(P, '}', 1); +} + +/* `union` must already be matched. */ +static void parse_union_decl(fb_parser_t *P, fb_compound_type_t *ct) +{ + fb_token_t *t0; + fb_member_t *member; + fb_ref_t *ref; + fb_token_t *t; + + if (!(ct->symbol.ident = match(P, LEX_TOK_ID, "union declaration expected identifier"))) { + goto fail; + } + ct->metadata = parse_metadata(P); + if (!((t0 = match(P, '{', "union declaration expected '{'")))) { + goto fail; + } + for (;;) { + if (P->token->id != LEX_TOK_ID) { + error_tok(P, P->token, "union expects an identifier"); + goto fail; + } + if (P->failed >= FLATCC_MAX_ERRORS) { + goto fail; + } + t = P->token; + member = fb_add_member(P, &ct->members); + parse_ref(P, &ref); + member->type.ref = ref; + member->type.type = vt_type_ref; + while (ref->link) { + ref = ref->link; + } + /* The union member name is the unqualified reference. */ + member->symbol.ident = ref->ident; + if (optional(P, ':')) { + if (member->type.ref->link) { + error_tok(P, t, "qualified union member name cannot have an explicit type"); + } + parse_type(P, &member->type); + /* Leave type checking to later stage. */ + } + if (optional(P, '=')) { + parse_value(P, &member->value, 0, "integral constant expected"); + /* Leave detailed type (e.g. no floats) and range checking to a later stage. */ + } + if (!optional(P, ',') || P->token->id == '}') { + break; + } + P->doc = 0; + } + advance(P, '}', "union missing closing '}' to match", t0); + revert_symbols(&ct->members); + /* Add implicit `NONE` member first in the list. */ + member = fb_add_member(P, &ct->members); + member->symbol.ident = &P->t_none; + return; +fail: + recover2(P, ';', 1, '}', 0); +} + +/* `struct` , `table`, or 'rpc_service' must already be matched. */ +static void parse_compound_type(fb_parser_t *P, fb_compound_type_t *ct, long token) +{ + fb_token_t *t = 0; + + if (!(t = match(P, LEX_TOK_ID, "Declaration expected an identifier"))) { + goto fail; + } + ct->symbol.ident = t; + ct->metadata = parse_metadata(P); + if (!(match(P, '{', "Declaration expected '{'"))) { + goto fail; + } + t = P->token; + +/* Allow empty tables and structs. */ +#if 0 + if (P->token->id == '}') { + error_tok(P, t, "table / struct declaration cannot be empty"); + } +#endif + while (P->token->id != '}') { + if (token == tok_kw_rpc_service) { + parse_method(P, fb_add_member(P, &ct->members)); + } else { + parse_field(P, fb_add_member(P, &ct->members)); + } + if (P->failed >= FLATCC_MAX_ERRORS) { + goto fail; + } + } + if (!optional(P, '}') && t) { + error_tok_2(P, P->token, "Declaration missing closing '}' to match", t); + } + revert_symbols(&ct->members); + return; +fail: + recover(P, '}', 1); +} + +static void parse_namespace(fb_parser_t *P) +{ + fb_ref_t *ref = 0; + fb_token_t *t = P->token; + + if (optional(P, ';') && t) { + /* Revert to global namespace. */ + P->current_scope = 0; + return; + } + if (P->token->id != LEX_TOK_ID) { + error_tok(P, P->token, "namespace expects an identifier"); + recover(P, ';', 1); + return; + } + parse_ref(P, &ref); + advance(P, ';', "missing ';' expected by namespace at", t); + P->current_scope = fb_add_scope(P, ref); +} + +static void parse_root_type(fb_parser_t *P, fb_root_type_t *rt) +{ + fb_token_t *t = P->token; + + if (rt->name) { + error_tok(P, P->token, "root_type already set"); + } + parse_ref(P, &rt->name); + rt->scope = P->current_scope; + advance(P, ';', "missing ';' expected by root_type at", t); +} + +static void parse_include(fb_parser_t *P) +{ + fb_token_t *t = P->token; + + while (optional(P, tok_kw_include)) { + if (P->opts.disable_includes) { + error_tok(P, t, "include statements not supported by current environment"); + } + if (P->failed >= FLATCC_MAX_ERRORS) { + return; + } + if (!match(P, LEX_TOK_STRING_BEGIN, + "include expected a string literal as filename")) { + recover(P, ';', 1); + } + parse_string_literal(P, &fb_add_include(P)->name); + match(P, ';', "include statement expected ';'"); + } +} + +static void parse_attribute(fb_parser_t *P, fb_attribute_t *a) +{ + fb_token_t *t = P->token; + + if (match(P, LEX_TOK_STRING_BEGIN, "attribute expected string literal")) { + parse_string_literal(P, &a->name.name); + if (a->name.name.s.len == 0) { + error_tok_as_string(P, t, "attribute name cannot be empty", 0, 0); + } + } + match(P, ';', "attribute expected ';'"); +} + +static void parse_file_extension(fb_parser_t *P, fb_value_t *v) +{ + if (v->type == vt_string) { + error_tok_as_string(P, P->token, "file extension already set", v->s.s, (size_t)v->s.len); + } + if (!match(P, LEX_TOK_STRING_BEGIN, "file_extension expected string literal")) { + goto fail; + } + parse_string_literal(P, v); + match(P, ';', "file_extension expected ';'"); + return; +fail: + recover(P, ';', 1); +} + +static void parse_file_identifier(fb_parser_t *P, fb_value_t *v) +{ + fb_token_t *t; + if (v->type != vt_missing) { + error_tok_as_string(P, P->token, "file identifier already set", v->s.s, (size_t)v->s.len); + } + if (!match(P, LEX_TOK_STRING_BEGIN, "file_identifier expected string literal")) { + goto fail; + } + t = P->token; + parse_string_literal(P, v); + if (v->s.s && v->s.len != 4) { + v->type = vt_invalid; + error_tok(P, t, "file_identifier must be 4 characters"); + } + match(P, ';', "file_identifier expected ';'"); + return; +fail: + recover(P, ';', 1); +} + +static void parse_schema_decl(fb_parser_t *P) +{ + switch(P->token->id) { + case tok_kw_namespace: + next(P); + parse_namespace(P); + break; + case tok_kw_file_extension: + next(P); + parse_file_extension(P, &P->schema.file_extension); + break; + case tok_kw_file_identifier: + next(P); + parse_file_identifier(P, &P->schema.file_identifier); + break; + case tok_kw_root_type: + next(P); + parse_root_type(P, &P->schema.root_type); + break; + case tok_kw_attribute: + next(P); + parse_attribute(P, fb_add_attribute(P)); + break; + case tok_kw_struct: + next(P); + parse_compound_type(P, fb_add_struct(P), tok_kw_struct); + break; + case tok_kw_table: + next(P); + parse_compound_type(P, fb_add_table(P), tok_kw_table); + break; + case tok_kw_rpc_service: + next(P); + parse_compound_type(P, fb_add_rpc_service(P), tok_kw_rpc_service); + break; + case tok_kw_enum: + next(P); + parse_enum_decl(P, fb_add_enum(P)); + break; + case tok_kw_union: + next(P); + parse_union_decl(P, fb_add_union(P)); + break; + case tok_kw_include: + error_tok(P, P->token, "include statements must be placed first in the schema"); + break; + case '{': + error_tok(P, P->token, "JSON objects in schema file is not supported - but a schema specific JSON parser can be generated"); + break; + case LEX_TOK_CTRL: + error_tok_as_string(P, P->token, "unexpected control character in schema definition", "?", 1); + break; + case LEX_TOK_COMMENT_CTRL: + error_tok_as_string(P, P->token, "unexpected control character in comment", "?", 1); + break; + case LEX_TOK_COMMENT_UNTERMINATED: + error_tok_as_string(P, P->token, "unterminated comment", "<eof>", 5); + break; + default: + error_tok(P, P->token, "unexpected token in schema definition"); + break; + } +} + +static int parse_schema(fb_parser_t *P) +{ + fb_token_t *t, *t0; + parse_include(P); + t = P->token; + for (;;) { + if (is_end(t)) { + break; + } + if (P->failed >= FLATCC_MAX_ERRORS) { + return -1; + } + t0 = t; + parse_schema_decl(P); + t = P->token; + if (t == t0) { + if (P->failed) { + return -1; + } + error_tok(P, t, "extra tokens in input"); + return -1; + } + } + revert_names(&P->schema.attributes); + revert_symbols(&P->schema.symbols); + return 0; +} + +static inline void clear_elem_buffers(fb_parser_t *P) +{ + void **p, **p2; + + p = P->elem_buffers; + while (p) { + p2 = *((void**)p); + free(p); + p = p2; + }; +} + +static void push_token(fb_parser_t *P, long id, const char *first, const char *last) +{ + size_t offset; + fb_token_t *t; + + P->te = P->ts + P->tcapacity; + if (P->token == P->te) { + offset = (size_t)(P->token - P->ts); + P->tcapacity = P->tcapacity ? 2 * P->tcapacity : 1024; + P->ts = realloc(P->ts, (size_t)P->tcapacity * sizeof(fb_token_t)); + checkmem(P->ts); + P->te = P->ts + P->tcapacity; + P->token = P->ts + offset; + } + t = P->token; + t->id = id; + t->text = first; + t->len = (long)(last - first); + t->linenum = P->linenum; + t->pos = (long)(first - P->line + 1); + ++P->token; +} + +/* + * If the file contains a control character, we can get multiple + * comments per line. + */ +static inline void push_comment(fb_parser_t *P, const char *first, const char *last) +{ + if (P->doc_mode) { + push_token(P, tok_kw_doc_comment, first, last); + } +} + +static void inject_token(fb_token_t *t, const char *lex, long id) +{ + t->id = id; + t->text = lex; + t->len = (long)strlen(lex); + t->pos = 0; + t->linenum = 0; +} + +/* --- Customize lexer --- */ + +/* Depends on the `context` argument given to the lex function. */ +#define ctx(name) (((fb_parser_t *)context)->name) + +#define lex_emit_newline(first, last) (ctx(linenum)++, ctx(line) = last) + +#define lex_emit_string_newline(first, last) \ + (ctx(linenum)++, ctx(line) = last, \ + push_token((fb_parser_t*)context, LEX_TOK_STRING_NEWLINE, first, last)) + +/* + * Add emtpy comment on comment start - otherwise we miss empty lines. + * Save is_doc becuase comment_part does not remember. + */ +#define lex_emit_comment_begin(first, last, is_doc) \ + { ctx(doc_mode) = is_doc; push_comment((fb_parser_t*)context, last, last); } +#define lex_emit_comment_part(first, last) push_comment((fb_parser_t*)context, first, last) +#define lex_emit_comment_end(first, last) (ctx(doc_mode) = 0) + +/* By default emitted as lex_emit_other which would be ignored. */ +#define lex_emit_comment_unterminated(pos) \ + push_token((fb_parser_t*)context, LEX_TOK_COMMENT_UNTERMINATED, pos, pos) + +#define lex_emit_comment_ctrl(pos) \ + if (lex_isblank(*pos)) { \ + push_comment((fb_parser_t*)context, pos, pos + 1); \ + } else { \ + push_token((fb_parser_t*)context, LEX_TOK_COMMENT_CTRL, \ + pos, pos + 1); \ + } + +/* + * Provide hook to lexer for emitting tokens. We can override many + * things, but most default to calling lex_emit, so that is all we need + * to handle. + * + * `context` is a magic name available to macros in the lexer. + */ +#define lex_emit(token, first, last) \ + push_token((fb_parser_t*)context, token, first, last) + +/* + * We could just eos directly as it defaults to emit, but formally we + * should use the eof marker which is always zero, so parser can check + * for it easily, if needed. + */ +#define lex_emit_eos(first, last) \ + push_token((fb_parser_t*)context, LEX_TOK_EOF, first, last) + +/* + * This event happens in place of eos if we exhaust the input buffer. + * In this case we treat this as end of input, but this choice prevents + * us from parsing across multiple buffers. + */ +#define lex_emit_eob(pos) \ + push_token((fb_parser_t*)context, LEX_TOK_EOF, pos, pos) + +/* + * Luthor is our speedy generic lexer - it knows most common operators + * and therefore allows us to fail meaningfully on those that we don't + * support here, which is most. + */ +#include "lex/luthor.c" + +#include "keywords.h" + +/* Root schema `rs` is null for top level parser. */ +int fb_init_parser(fb_parser_t *P, fb_options_t *opts, const char *name, + fb_error_fun error_out, void *error_ctx, fb_root_schema_t *rs) +{ + size_t n, name_len; + char *s; + + memset(P, 0, sizeof(*P)); + + if (error_out) { + P->error_out = error_out; + P->error_ctx = error_ctx; + } else { + P->error_out = fb_default_error_out; + } + if (opts) { + memcpy(&P->opts, opts, sizeof(*opts)); + } else { + flatcc_init_options(&P->opts); + } + P->schema.root_schema = rs ? rs : &P->schema.root_schema_instance; + switch (P->opts.offset_size) { + case 2: + case 4: + case 8: + break; + default: + error(P, "invalid offset configured, must be 2, 4 (default), or 8"); + return -1; + } + switch (P->opts.voffset_size) { + case 2: + case 4: + case 8: + break; + default: + error(P, "invalid voffset configured, must be 2 (default), 4, or 8"); + return -1; + } + if (!name) { + /* Mostly for testing, just so we always have a name. */ + name = FLATCC_DEFAULT_FILENAME; + } + if (name == 0) { + name = ""; + } + name_len = strlen(name); + checkmem((P->schema.basename = fb_create_basename(name, name_len, opts->default_schema_ext))); + n = strlen(P->schema.basename); + checkmem(s = fb_copy_path_n(P->schema.basename, n)); + pstrntoupper(s, n); + P->schema.basenameup = s; + P->schema.name.name.s.s = s; + P->schema.name.name.s.len = (int)n; + checkmem((P->schema.errorname = fb_create_basename(name, name_len, ""))); + if (opts->ns) { + P->schema.prefix.s = (char *)opts->ns; + P->schema.prefix.len = (int)strlen(opts->ns); + } + P->current_scope = fb_add_scope(P, 0); + assert(P->current_scope == fb_scope_table_find(&P->schema.root_schema->scope_index, 0, 0)); + return 0; +} + +/* + * Main entry function for this specific parser type. + * We expect a zero terminated string. + * + * The parser structure is uninitialized upon entry, and should be + * cleared with `clear_flatbuffer_parser` subsequently. + * + * Datastructures point into the token buffer and into the input + * buffer, so the parser and input should not be cleared prematurely. + * + * The input buffer must remain valid until the parser is cleared + * because the internal represenation stores pointers into the buffer. + * + * `own_buffer` indicates that the the buffer should be deallocated when + * the parser is cleaned up. + */ +int fb_parse(fb_parser_t *P, const char *input, size_t len, int own_buffer) +{ + static const char *id_none = "NONE"; + static const char *id_ubyte = "ubyte"; + + P->line = input; + P->linenum = 1; + + /* Used with union defaults. */ + inject_token(&P->t_none, id_none, LEX_TOK_ID); + inject_token(&P->t_ubyte, id_ubyte, tok_kw_ubyte); + + if (own_buffer) { + P->managed_input = input; + } + lex(input, len, 0, P); + + P->te = P->token; + P->token = P->ts; + /* Only used while processing table id's. */ + checkmem((P->tmp_field_marker = malloc(sizeof(P->tmp_field_marker[0]) * (size_t)P->opts.vt_max_count))); + checkmem((P->tmp_field_index = malloc(sizeof(P->tmp_field_index[0]) * (size_t)P->opts.vt_max_count))); + if (P->token->id == tok_kw_doc_comment) { + next(P); + } + parse_schema(P); + return P->failed; +} + +static void __destroy_scope_item(void *item, fb_scope_t *scope) +{ + /* Each scope points into table that is cleared separately. */ + (void)item; + + fb_symbol_table_clear(&scope->symbol_index); +} + +void fb_clear_parser(fb_parser_t *P) +{ + fb_symbol_t *sym; + fb_compound_type_t *ct; + + for (sym = P->schema.symbols; sym; sym = sym->link) { + switch (sym->kind) { + case fb_is_struct: + case fb_is_table: + case fb_is_rpc_service: + case fb_is_enum: + case fb_is_union: + ct = (fb_compound_type_t *)sym; + fb_symbol_table_clear(&ct->index); + fb_value_set_clear(&ct->value_set); + } + } + fb_schema_table_clear(&P->schema.root_schema_instance.include_index); + fb_name_table_clear(&P->schema.root_schema_instance.attribute_index); + ptr_set_clear(&P->schema.visible_schema); + if (P->tmp_field_marker) { + free(P->tmp_field_marker); + } + if (P->tmp_field_index) { + free(P->tmp_field_index); + } + if (P->ts) { + free(P->ts); + } + if (P->schema.basename) { + free((void *)P->schema.basename); + } + if (P->schema.basenameup) { + free((void *)P->schema.basenameup); + } + if (P->schema.errorname) { + free((void *)P->schema.errorname); + } + /* + * P->referer_path in included files points to parent P->path, so + * don't free it, and don't access it after this point. + */ + if (P->path) { + free((void *)P->path); + } + fb_scope_table_destroy(&P->schema.root_schema_instance.scope_index, + __destroy_scope_item, 0); + /* Destroy last since destructor has references into elem buffer. */ + clear_elem_buffers(P); + if (P->managed_input) { + free((void *)P->managed_input); + } + memset(P, 0, sizeof(*P)); +} |