/* * FlatBuffers IDL parser. * * Originally based on the numeric parser in the Luthor lexer project. * * We are moving away from TDOP approach because the grammer doesn't * really benefit from it. We use the same overall framework. */ #include <stdio.h> #include <stdlib.h> #include <assert.h> #include <stdarg.h> #include "semantics.h" #include "codegen.h" #include "fileio.h" #include "pstrutil.h" #include "flatcc/portable/pparseint.h" void fb_default_error_out(void *err_ctx, const char *buf, size_t len) { (void)err_ctx; fwrite(buf, 1, len, stderr); } int fb_print_error(fb_parser_t *P, const char * format, ...) { int n; va_list ap; char buf[ERROR_BUFSIZ]; va_start (ap, format); n = vsnprintf (buf, ERROR_BUFSIZ, format, ap); va_end (ap); if (n >= ERROR_BUFSIZ) { strcpy(buf + ERROR_BUFSIZ - 5, "...\n"); n = ERROR_BUFSIZ - 1; } P->error_out(P->error_ctx, buf, (size_t)n); return n; } const char *error_find_file_of_token(fb_parser_t *P, fb_token_t *t) { /* * Search token in dependent buffers if not in current token * buffer. We can do this as a linear search because we limit the * number of output errors. */ while (P) { if (P->ts <= t && P->te > t) { return P->schema.errorname; } P = P->dependencies; } return ""; } void error_report(fb_parser_t *P, fb_token_t *t, const char *msg, fb_token_t *peer, const char *s, size_t len) { const char *file, *peer_file; if (t && !s) { s = t->text; len = (size_t)t->len; } if (!msg) { msg = ""; } if (!s) { s = ""; len = 0; } if (t && !peer) { file = error_find_file_of_token(P, t); fb_print_error(P, "%s:%ld:%ld: error: '%.*s': %s\n", file, (long)t->linenum, (long)t->pos, len, s, msg); } else if (t && peer) { file = error_find_file_of_token(P, t); peer_file = error_find_file_of_token(P, peer); fb_print_error(P, "%s:%ld:%ld: error: '%.*s': %s: %s:%ld:%ld: '%.*s'\n", file, (long)t->linenum, (long)t->pos, len, s, msg, peer_file, (long)peer->linenum, (long)peer->pos, (int)peer->len, peer->text); } else if (!t && !peer) { fb_print_error(P, "error: %s\n", msg); } else if (peer) { peer_file = error_find_file_of_token(P, peer); fb_print_error(P, "error: %s: %s:%ld:%ld: '%.*s'\n", msg, peer_file, (long)peer->linenum, (long)peer->pos, (int)peer->len, peer->text); } else { fb_print_error(P, "internal error: unexpected state\n"); } ++P->failed; } void error_ref_sym(fb_parser_t *P, fb_ref_t *ref, const char *msg, fb_symbol_t *s2) { fb_ref_t *p; char buf[FLATCC_MAX_IDENT_SHOW + 1]; size_t k = FLATCC_MAX_IDENT_SHOW; size_t n = 0; size_t n0 = 0; int truncated = 0; p = ref; while (p && k > 0) { if (n0 > 0) { buf[n0] = '.'; --k; ++n0; } n = (size_t)p->ident->len; if (k < n) { n = k; truncated = 1; } memcpy(buf + n0, p->ident->text, n); k -= n; n0 += n; p = p->link; } if (p) truncated = 1; buf[n0] = '\0'; if (n0 > 0) { --n0; } if (truncated) { memcpy(buf + FLATCC_MAX_IDENT_SHOW + 1 - 4, "...\0", 4); n0 = FLATCC_MAX_IDENT_SHOW; } error_report(P, ref->ident, msg, s2 ? s2->ident : 0, buf, n0); } //#define LEX_DEBUG /* Flatbuffers reserve keywords. */ #define LEX_KEYWORDS #define LEX_C_BLOCK_COMMENT /* * Flatbuffers also support /// on a single line for documentation but * we can handle that within the normal line comment parsing logic. */ #define LEX_C99_LINE_COMMENT /* * String escapes are not defined in fb schema but it only uses strings * for attribute, namespace, file ext, and file id. For JSON objects we * use C string escapes but control characters must be detected. */ #define LEX_C_STRING /* Accept numbers like -0x42 as integer literals. */ #define LEX_HEX_NUMERIC #define lex_isblank(c) ((c) == ' ' || (c) == '\t') #include "parser.h" #ifdef LEX_DEBUG static void print_token(fb_token_t *t) { lex_fprint_token(stderr, t->id, t->text, t->text + t->len, t->linenum, t->pos); } static void debug_token(const char *info, fb_token_t *t) { fprintf(stderr, "%s\n ", info); print_token(t); } #else #define debug_token(info, t) ((void)0) #endif static void revert_metadata(fb_metadata_t **list) { REVERT_LIST(fb_metadata_t, link, list); } static void revert_symbols(fb_symbol_t **list) { REVERT_LIST(fb_symbol_t, link, list); } static void revert_names(fb_name_t **list) { REVERT_LIST(fb_name_t, link, list); } static inline fb_doc_t *fb_add_doc(fb_parser_t *P, fb_token_t *t) { fb_doc_t *p; p = new_elem(P, sizeof(*p)); p->ident = t; p->link = P->doc; P->doc = p; return p; } #define fb_assign_doc(P, p) {\ revert_symbols(&P->doc); p->doc = P->doc; P->doc = 0; } static inline fb_compound_type_t *fb_add_table(fb_parser_t *P) { fb_compound_type_t *p; p = new_elem(P, sizeof(*p)); p->symbol.link = P->schema.symbols; p->symbol.kind = fb_is_table; P->schema.symbols = &p->symbol; p->scope = P->current_scope; fb_assign_doc(P, p); return p; } static inline fb_compound_type_t *fb_add_struct(fb_parser_t *P) { fb_compound_type_t *p; p = new_elem(P, sizeof(*p)); p->symbol.link = P->schema.symbols; p->symbol.kind = fb_is_struct; P->schema.symbols = &p->symbol; p->scope = P->current_scope; fb_assign_doc(P, p); return p; } static inline fb_compound_type_t *fb_add_rpc_service(fb_parser_t *P) { fb_compound_type_t *p; p = new_elem(P, sizeof(*p)); p->symbol.link = P->schema.symbols; p->symbol.kind = fb_is_rpc_service; P->schema.symbols = &p->symbol; p->scope = P->current_scope; fb_assign_doc(P, p); return p; } static inline fb_compound_type_t *fb_add_enum(fb_parser_t *P) { fb_compound_type_t *p; p = new_elem(P, sizeof(*p)); p->symbol.link = P->schema.symbols; p->symbol.kind = fb_is_enum; P->schema.symbols = &p->symbol; p->scope = P->current_scope; fb_assign_doc(P, p); return p; } static inline fb_compound_type_t *fb_add_union(fb_parser_t *P) { fb_compound_type_t *p; p = new_elem(P, sizeof(*p)); p->symbol.link = P->schema.symbols; p->symbol.kind = fb_is_union; P->schema.symbols = &p->symbol; p->scope = P->current_scope; fb_assign_doc(P, p); return p; } static inline fb_ref_t *fb_add_ref(fb_parser_t *P, fb_token_t *t) { fb_ref_t *p; p = new_elem(P, sizeof(*p)); p->ident = t; return p; } static inline fb_attribute_t *fb_add_attribute(fb_parser_t *P) { fb_attribute_t *p; p = new_elem(P, sizeof(*p)); p->name.link = P->schema.attributes; P->schema.attributes = &p->name; return p; } static inline fb_include_t *fb_add_include(fb_parser_t *P) { fb_include_t *p; p = new_elem(P, sizeof(*p)); p->link = P->schema.includes; return P->schema.includes = p; } static inline fb_scope_t *fb_add_scope(fb_parser_t *P, fb_ref_t *name) { fb_scope_t *p; p = fb_scope_table_find(&P->schema.root_schema->scope_index, name, 0); if (p) { return p; } p = new_elem(P, sizeof(*p)); p->name = name; p->prefix = P->schema.prefix; fb_scope_table_insert_item(&P->schema.root_schema->scope_index, p, ht_keep); return p; } static inline fb_metadata_t *fb_add_metadata(fb_parser_t *P, fb_metadata_t **metadata) { fb_metadata_t *p; p = new_elem(P, sizeof(*p)); p->link = *metadata; return *metadata = p; } static inline fb_member_t *fb_add_member(fb_parser_t *P, fb_symbol_t **members) { fb_member_t *p; p = new_elem(P, sizeof(*p)); p->symbol.link = *members; p->symbol.kind = fb_is_member; *members = (fb_symbol_t *)p; fb_assign_doc(P, p); return p; } static inline int is_end(fb_token_t *t) { return t->id == LEX_TOK_EOF; } static fb_token_t *next(fb_parser_t *P) { again: ++P->token; if (P->token == P->te) { /* We keep returning end of token to help binary operators etc., if any. */ --P->token; assert(0); switch (P->token->id) { case LEX_TOK_EOS: case LEX_TOK_EOB: case LEX_TOK_EOF: P->token->id = LEX_TOK_EOF; return P->token; } error_tok(P, P->token, "unexpected end of input"); } if (P->token->id == tok_kw_doc_comment) { /* Note: we can have blanks that are control characters here, such as \t. */ fb_add_doc(P, P->token); goto again; } debug_token("next", P->token); return P->token; } static void recover(fb_parser_t *P, long token_id, int consume) { while (!is_end(P->token)) { if (P->token->id == token_id) { if (consume) { next(P); } P->doc = 0; return; } next(P); } } static void recover2(fb_parser_t *P, long token_id, int consume, long token_id_2, int consume_2) { while (!is_end(P->token)) { if (P->token->id == token_id) { if (consume) { next(P); } P->doc = 0; return; } if (P->token->id == token_id_2) { if (consume_2) { next(P); } P->doc = 0; return; } next(P); } } static inline fb_token_t *optional(fb_parser_t *P, long id) { fb_token_t *t = 0; if (P->token->id == id) { t = P->token; next(P); } return t; } static inline fb_token_t *match(fb_parser_t *P, long id, char *msg) { fb_token_t *t = 0; if (P->token->id == id) { t = P->token; next(P); } else { error_tok(P, P->token, msg); } return t; } /* * When a keyword should also be accepted as an identifier. * This is useful for JSON where field naems are visible. * Since field names are not referenced within the schema, * this is generally safe. Enums can also be resererved but * they can then not be used as default values. Table names * and other type names should not be remapped as they can then * not by used as a type name for other fields. */ #if FLATCC_ALLOW_KW_FIELDS static inline void remap_field_ident(fb_parser_t *P) { if (P->token->id >= LEX_TOK_KW_BASE && P->token->id < LEX_TOK_KW_END) { P->token->id = LEX_TOK_ID; } } #else static inline void remap_field_ident(fb_parser_t *P) { (void)P; } #endif #if FLATCC_ALLOW_KW_ENUMS static inline void remap_enum_ident(fb_parser_t *P) { if (P->token->id >= LEX_TOK_KW_BASE && P->token->id < LEX_TOK_KW_END) { P->token->id = LEX_TOK_ID; } } #else static inline void remap_enum_ident(fb_parser_t *P) { (void)P; } #endif static fb_token_t *advance(fb_parser_t *P, long id, const char *msg, fb_token_t *peer) { /* * `advance` is generally used at end of statements so it is a * convenient place to get rid of rogue doc comments we can't attach * to anything meaningful. */ P->doc = 0; if (P->token->id != id) { error_tok_2(P, P->token, msg, peer); return P->token; } return next(P); } static void read_integer_value(fb_parser_t *P, fb_token_t *t, fb_value_t *v, int sign) { int status; v->type = vt_uint; /* The token does not store the sign internally. */ parse_integer(t->text, (size_t)t->len, &v->u, &status); if (status != PARSE_INTEGER_UNSIGNED) { v->type = vt_invalid; error_tok(P, t, "invalid integer format"); } if (sign) { v->i = -(int64_t)v->u; v->type = vt_int; #ifdef FLATCC_FAIL_ON_INT_SIGN_OVERFLOW /* Sometimes we might want this, so don't fail by default. */ if (v->i > 0) { v->type = vt_invalid; error_tok(P, t, "sign overflow in integer format"); } #endif } } static void read_hex_value(fb_parser_t *P, fb_token_t *t, fb_value_t *v, int sign) { int status; v->type = vt_uint; /* The token does not store the sign internally. */ parse_hex_integer(t->text, (size_t)t->len, &v->u, &status); if (status != PARSE_INTEGER_UNSIGNED) { v->type = vt_invalid; error_tok(P, t, "invalid hex integer format"); } if (sign) { v->i = -(int64_t)v->u; v->type = vt_int; #ifdef FLATCC_FAIL_ON_INT_SIGN_OVERFLOW /* Sometimes we might want this, so don't fail by default. */ if (v->i > 0) { v->type = vt_invalid; error_tok(P, t, "sign overflow in hex integer format"); } #endif } } static void read_float_value(fb_parser_t *P, fb_token_t *t, fb_value_t *v, int sign) { char *end; v->type = vt_float; v->f = strtod(t->text, &end); if (end != t->text + t->len) { v->type = vt_invalid; error_tok(P, t, "invalid float format"); } else if (t->text[0] == '.') { v->type = vt_invalid; /* The FB spec requires this, in line with the JSON format. */ error_tok(P, t, "numeric values must start with a digit"); } else if (sign) { v->f = -v->f; } } /* * We disallow escape characters, newlines and other control characters, * but especially escape characters because they would require us to * reallocate the string and convert the escaped characters. We also * disallow non-utf8 characters, but we do not check for it. The tab * character could meaningfully be accepted, but we don't. * * String literals are only used to name attributes, namespaces, * file identifiers and file externsions, so we really have no need * for these extra featuresescape . * * JSON strings should be handled separately, if or when supported - * either by converting escapes and reallocating the string, or * simply by ignoring the escape errors and use the string unmodified. */ static void parse_string_literal(fb_parser_t *P, fb_value_t *v) { fb_token_t *t; v->type = vt_string; v->s.s = 0; v->s.len = 0; for (;;) { t = P->token; switch (t->id) { case LEX_TOK_STRING_PART: if (v->s.s == 0) { v->s.s = (char *)t->text; } break; case LEX_TOK_STRING_ESCAPE: v->type = vt_invalid; error_tok(P, t, "escape not allowed in strings"); break; case LEX_TOK_STRING_CTRL: v->type = vt_invalid; error_tok_as_string(P, t, "control characters not allowed in strings", "?", 1); break; case LEX_TOK_STRING_NEWLINE: v->type = vt_invalid; error_tok(P, t, "newline not allowed in strings"); break; case LEX_TOK_STRING_UNTERMINATED: case LEX_TOK_STRING_END: goto done; default: error_tok(P, t, "internal error: unexpected token in string"); v->type = vt_invalid; goto done; } next(P); } done: /* * If we were to ignore all errors, we would get the full * string as is excluding delimiting quotes. */ if (v->s.s) { v->s.len = (int)(P->token->text - v->s.s); } if (!match(P, LEX_TOK_STRING_END, "unterminated string")) { v->type = vt_invalid; } } /* Current token must be an identifier. */ static void parse_ref(fb_parser_t *P, fb_ref_t **ref) { *ref = fb_add_ref(P, P->token); next(P); ref = &((*ref)->link); while (optional(P, '.')) { if (P->token->id != LEX_TOK_ID) { error_tok(P, P->token, "namespace prefix expected identifier"); break; } *ref = fb_add_ref(P, P->token); ref = &((*ref)->link); next(P); } } /* `flags` */ enum { allow_string_value = 1, allow_id_value = 2, allow_null_value = 4 }; static void parse_value(fb_parser_t *P, fb_value_t *v, int flags, const char *error_msg) { fb_token_t *t; fb_token_t *sign; sign = optional(P, '-'); t = P->token; switch (t->id) { case LEX_TOK_HEX: read_hex_value(P, t, v, sign != 0); break; case LEX_TOK_INT: read_integer_value(P, t, v, sign != 0); break; case LEX_TOK_FLOAT: read_float_value(P, t, v, sign != 0); break; case tok_kw_true: v->b = 1; v->type = vt_bool; break; case tok_kw_false: v->b = 0; v->type = vt_bool; break; case tok_kw_null: if (!(flags & allow_null_value)) { v->type = vt_invalid; error_tok(P, t, error_msg); return; } v->type = vt_null; break; case LEX_TOK_STRING_BEGIN: next(P); parse_string_literal(P, v); if (!(flags & allow_string_value)) { v->type = vt_invalid; error_tok(P, t, error_msg); return; } if (sign) { v->type = vt_invalid; error_tok(P, t, "string constants cannot be signed"); return; } return; case LEX_TOK_ID: parse_ref(P, &v->ref); v->type = vt_name_ref; if (sign) { v->type = vt_invalid; /* Technically they could, but we do not allow it. */ error_tok(P, t, "named values cannot be signed"); } return; default: /* We might have consumed a sign, but never mind that. */ error_tok(P, t, error_msg); return; } if (sign && v->type == vt_bool) { v->type = vt_invalid; error_tok(P, t, "boolean constants cannot be signed"); } next(P); } static void parse_fixed_array_size(fb_parser_t *P, fb_token_t *ttype, fb_value_t *v) { const char *error_msg = "fixed length array length expected to be an unsigned integer"; fb_value_t vsize; fb_token_t *tlen = P->token; parse_value(P, &vsize, 0, error_msg); if (vsize.type != vt_uint) { error_tok(P, tlen, error_msg); v->type = vt_invalid; return; } if (v->type == vt_invalid) return; switch (v->type) { case vt_vector_type: v->type = vt_fixed_array_type; break; case vt_vector_type_ref: v->type = vt_fixed_array_type_ref; break; case vt_vector_string_type: v->type = vt_fixed_array_string_type; break; case vt_invalid: return; default: error_tok(P, ttype, "invalid fixed length array type"); v->type = vt_invalid; return; } if (vsize.u == 0) { error_tok(P, tlen, "fixed length array length cannot be 0"); v->type = vt_invalid; return; } /* * This allows for safe 64-bit multiplication by elements no * larger than 2^32-1 and also fits into the value len field. * without extra size cost. */ if (vsize.u > UINT32_MAX) { error_tok(P, tlen, "fixed length array length overflow"); v->type = vt_invalid; return; } v->len = (uint32_t)vsize.u; } /* ':' must already be matched */ static void parse_type(fb_parser_t *P, fb_value_t *v) { fb_token_t *t = 0; fb_token_t *ttype = 0; fb_token_t *t0 = P->token; int vector = 0; v->len = 1; v->type = vt_invalid; while ((t = optional(P, '['))) { ++vector; } if (vector > 1) { error_tok(P, t0, "vector type can only be one-dimensional"); } ttype = P->token; switch (ttype->id) { case tok_kw_int: case tok_kw_bool: case tok_kw_byte: case tok_kw_long: case tok_kw_uint: case tok_kw_float: case tok_kw_short: case tok_kw_char: case tok_kw_ubyte: case tok_kw_ulong: case tok_kw_ushort: case tok_kw_double: case tok_kw_int8: case tok_kw_int16: case tok_kw_int32: case tok_kw_int64: case tok_kw_uint8: case tok_kw_uint16: case tok_kw_uint32: case tok_kw_uint64: case tok_kw_float32: case tok_kw_float64: v->t = P->token; v->type = vector ? vt_vector_type : vt_scalar_type; next(P); break; case tok_kw_string: v->t = P->token; v->type = vector ? vt_vector_string_type : vt_string_type; next(P); break; case LEX_TOK_ID: parse_ref(P, &v->ref); v->type = vector ? vt_vector_type_ref : vt_type_ref; break; case ']': error_tok(P, t, "vector type cannot be empty"); break; default: error_tok(P, ttype, "invalid type specifier"); break; } if (vector && optional(P, ':')) { parse_fixed_array_size(P, ttype, v); } while (optional(P, ']') && vector--) { } if (vector) { error_tok_2(P, t, "vector type missing ']' to match", t0); } if ((t = optional(P, ']'))) { error_tok_2(P, t, "extra ']' not matching", t0); while (optional(P, ']')) { } } if (ttype->id == tok_kw_char && v->type != vt_invalid) { if (v->type != vt_fixed_array_type) { error_tok(P, ttype, "char can only be used as a fixed length array type [char:<n>]"); v->type = vt_invalid; } } } static fb_metadata_t *parse_metadata(fb_parser_t *P) { fb_token_t *t, *t0; fb_metadata_t *md = 0; if (!(t0 = optional(P, '('))) { return 0; } if ((t = optional(P, LEX_TOK_ID))) for (;;) { fb_add_metadata(P, &md); md->ident = t; if (optional(P, ':')) { parse_value(P, &md->value, allow_string_value, "scalar or string value expected"); } if (P->failed >= FLATCC_MAX_ERRORS) { return md; } if (!optional(P, ',')) { break; } if (!(t = match(P, LEX_TOK_ID, "attribute name expected identifier after ','"))) { break; } } advance(P, ')', "metadata expected ')' to match", t0); revert_metadata(&md); return md; } static void parse_field(fb_parser_t *P, fb_member_t *fld) { fb_token_t *t; remap_field_ident(P); if (!(t = match(P, LEX_TOK_ID, "field expected identifier"))) { goto fail; } fld->symbol.ident = t; if (!match(P, ':', "field expected ':' before mandatory type")) { goto fail; } parse_type(P, &fld->type); if (optional(P, '=')) { /* * Because types can be named references, we do not check the * default assignment before the schema is fully parsed. * We allow the initializer to be a name in case it is an enum * name. */ parse_value(P, &fld->value, allow_id_value | allow_null_value, "initializer must be of scalar type or null"); } fld->metadata = parse_metadata(P); advance(P, ';', "field must be terminated with ';'", 0); return; fail: recover2(P, ';', 1, '}', 0); } static void parse_method(fb_parser_t *P, fb_member_t *fld) { fb_token_t *t; if (!(t = match(P, LEX_TOK_ID, "method expected identifier"))) { goto fail; } fld->symbol.ident = t; if (!match(P, '(', "method expected '(' after identifier")) { goto fail; } parse_type(P, &fld->req_type); if (!match(P, ')', "method expected ')' after request type")) { goto fail; } if (!match(P, ':', "method expected ':' before mandatory response type")) { goto fail; } parse_type(P, &fld->type); if ((t = optional(P, '='))) { error_tok(P, t, "method does not accept an initializer"); goto fail; } fld->metadata = parse_metadata(P); advance(P, ';', "method must be terminated with ';'", 0); return; fail: recover2(P, ';', 1, '}', 0); } /* `enum` must already be matched. */ static void parse_enum_decl(fb_parser_t *P, fb_compound_type_t *ct) { fb_token_t *t, *t0; fb_member_t *member; if (!(ct->symbol.ident = match(P, LEX_TOK_ID, "enum declaration expected identifier"))) { goto fail; } if (optional(P, ':')) { parse_type(P, &ct->type); if (ct->type.type != vt_scalar_type) { error_tok(P, ct->type.t, "integral type expected"); } else { switch (ct->type.t->id) { case tok_kw_float: case tok_kw_double: case tok_kw_float32: case tok_kw_float64: error_tok(P, ct->type.t, "integral type expected"); break; default: break; } } } ct->metadata = parse_metadata(P); if (!((t0 = match(P, '{', "enum declaration expected '{'")))) { goto fail; } for (;;) { remap_enum_ident(P); if (!(t = match(P, LEX_TOK_ID, "member identifier expected"))) { goto fail; } if (P->failed >= FLATCC_MAX_ERRORS) { goto fail; } member = fb_add_member(P, &ct->members); member->symbol.ident = t; if (optional(P, '=')) { t = P->token; parse_value(P, &member->value, 0, "integral constant expected"); /* Leave detailed type (e.g. no floats) and range checking to a later stage. */ } /* * Trailing comma is optional in flatc but not in grammar, we * follow flatc. */ if (!optional(P, ',') || P->token->id == '}') { break; } P->doc = 0; } if (t0) { advance(P, '}', "enum missing closing '}' to match", t0); } revert_symbols(&ct->members); return; fail: recover(P, '}', 1); } /* `union` must already be matched. */ static void parse_union_decl(fb_parser_t *P, fb_compound_type_t *ct) { fb_token_t *t0; fb_member_t *member; fb_ref_t *ref; fb_token_t *t; if (!(ct->symbol.ident = match(P, LEX_TOK_ID, "union declaration expected identifier"))) { goto fail; } ct->metadata = parse_metadata(P); if (!((t0 = match(P, '{', "union declaration expected '{'")))) { goto fail; } for (;;) { if (P->token->id != LEX_TOK_ID) { error_tok(P, P->token, "union expects an identifier"); goto fail; } if (P->failed >= FLATCC_MAX_ERRORS) { goto fail; } t = P->token; member = fb_add_member(P, &ct->members); parse_ref(P, &ref); member->type.ref = ref; member->type.type = vt_type_ref; while (ref->link) { ref = ref->link; } /* The union member name is the unqualified reference. */ member->symbol.ident = ref->ident; if (optional(P, ':')) { if (member->type.ref->link) { error_tok(P, t, "qualified union member name cannot have an explicit type"); } parse_type(P, &member->type); /* Leave type checking to later stage. */ } if (optional(P, '=')) { parse_value(P, &member->value, 0, "integral constant expected"); /* Leave detailed type (e.g. no floats) and range checking to a later stage. */ } if (!optional(P, ',') || P->token->id == '}') { break; } P->doc = 0; } advance(P, '}', "union missing closing '}' to match", t0); revert_symbols(&ct->members); /* Add implicit `NONE` member first in the list. */ member = fb_add_member(P, &ct->members); member->symbol.ident = &P->t_none; return; fail: recover2(P, ';', 1, '}', 0); } /* `struct` , `table`, or 'rpc_service' must already be matched. */ static void parse_compound_type(fb_parser_t *P, fb_compound_type_t *ct, long token) { fb_token_t *t = 0; if (!(t = match(P, LEX_TOK_ID, "Declaration expected an identifier"))) { goto fail; } ct->symbol.ident = t; ct->metadata = parse_metadata(P); if (!(match(P, '{', "Declaration expected '{'"))) { goto fail; } t = P->token; /* Allow empty tables and structs. */ #if 0 if (P->token->id == '}') { error_tok(P, t, "table / struct declaration cannot be empty"); } #endif while (P->token->id != '}') { if (token == tok_kw_rpc_service) { parse_method(P, fb_add_member(P, &ct->members)); } else { parse_field(P, fb_add_member(P, &ct->members)); } if (P->failed >= FLATCC_MAX_ERRORS) { goto fail; } } if (!optional(P, '}') && t) { error_tok_2(P, P->token, "Declaration missing closing '}' to match", t); } revert_symbols(&ct->members); return; fail: recover(P, '}', 1); } static void parse_namespace(fb_parser_t *P) { fb_ref_t *ref = 0; fb_token_t *t = P->token; if (optional(P, ';') && t) { /* Revert to global namespace. */ P->current_scope = 0; return; } if (P->token->id != LEX_TOK_ID) { error_tok(P, P->token, "namespace expects an identifier"); recover(P, ';', 1); return; } parse_ref(P, &ref); advance(P, ';', "missing ';' expected by namespace at", t); P->current_scope = fb_add_scope(P, ref); } static void parse_root_type(fb_parser_t *P, fb_root_type_t *rt) { fb_token_t *t = P->token; if (rt->name) { error_tok(P, P->token, "root_type already set"); } parse_ref(P, &rt->name); rt->scope = P->current_scope; advance(P, ';', "missing ';' expected by root_type at", t); } static void parse_include(fb_parser_t *P) { fb_token_t *t = P->token; while (optional(P, tok_kw_include)) { if (P->opts.disable_includes) { error_tok(P, t, "include statements not supported by current environment"); } if (P->failed >= FLATCC_MAX_ERRORS) { return; } if (!match(P, LEX_TOK_STRING_BEGIN, "include expected a string literal as filename")) { recover(P, ';', 1); } parse_string_literal(P, &fb_add_include(P)->name); match(P, ';', "include statement expected ';'"); } } static void parse_attribute(fb_parser_t *P, fb_attribute_t *a) { fb_token_t *t = P->token; if (match(P, LEX_TOK_STRING_BEGIN, "attribute expected string literal")) { parse_string_literal(P, &a->name.name); if (a->name.name.s.len == 0) { error_tok_as_string(P, t, "attribute name cannot be empty", 0, 0); } } match(P, ';', "attribute expected ';'"); } static void parse_file_extension(fb_parser_t *P, fb_value_t *v) { if (v->type == vt_string) { error_tok_as_string(P, P->token, "file extension already set", v->s.s, (size_t)v->s.len); } if (!match(P, LEX_TOK_STRING_BEGIN, "file_extension expected string literal")) { goto fail; } parse_string_literal(P, v); match(P, ';', "file_extension expected ';'"); return; fail: recover(P, ';', 1); } static void parse_file_identifier(fb_parser_t *P, fb_value_t *v) { fb_token_t *t; if (v->type != vt_missing) { error_tok_as_string(P, P->token, "file identifier already set", v->s.s, (size_t)v->s.len); } if (!match(P, LEX_TOK_STRING_BEGIN, "file_identifier expected string literal")) { goto fail; } t = P->token; parse_string_literal(P, v); if (v->s.s && v->s.len != 4) { v->type = vt_invalid; error_tok(P, t, "file_identifier must be 4 characters"); } match(P, ';', "file_identifier expected ';'"); return; fail: recover(P, ';', 1); } static void parse_schema_decl(fb_parser_t *P) { switch(P->token->id) { case tok_kw_namespace: next(P); parse_namespace(P); break; case tok_kw_file_extension: next(P); parse_file_extension(P, &P->schema.file_extension); break; case tok_kw_file_identifier: next(P); parse_file_identifier(P, &P->schema.file_identifier); break; case tok_kw_root_type: next(P); parse_root_type(P, &P->schema.root_type); break; case tok_kw_attribute: next(P); parse_attribute(P, fb_add_attribute(P)); break; case tok_kw_struct: next(P); parse_compound_type(P, fb_add_struct(P), tok_kw_struct); break; case tok_kw_table: next(P); parse_compound_type(P, fb_add_table(P), tok_kw_table); break; case tok_kw_rpc_service: next(P); parse_compound_type(P, fb_add_rpc_service(P), tok_kw_rpc_service); break; case tok_kw_enum: next(P); parse_enum_decl(P, fb_add_enum(P)); break; case tok_kw_union: next(P); parse_union_decl(P, fb_add_union(P)); break; case tok_kw_include: error_tok(P, P->token, "include statements must be placed first in the schema"); break; case '{': error_tok(P, P->token, "JSON objects in schema file is not supported - but a schema specific JSON parser can be generated"); break; case LEX_TOK_CTRL: error_tok_as_string(P, P->token, "unexpected control character in schema definition", "?", 1); break; case LEX_TOK_COMMENT_CTRL: error_tok_as_string(P, P->token, "unexpected control character in comment", "?", 1); break; case LEX_TOK_COMMENT_UNTERMINATED: error_tok_as_string(P, P->token, "unterminated comment", "<eof>", 5); break; default: error_tok(P, P->token, "unexpected token in schema definition"); break; } } static int parse_schema(fb_parser_t *P) { fb_token_t *t, *t0; parse_include(P); t = P->token; for (;;) { if (is_end(t)) { break; } if (P->failed >= FLATCC_MAX_ERRORS) { return -1; } t0 = t; parse_schema_decl(P); t = P->token; if (t == t0) { if (P->failed) { return -1; } error_tok(P, t, "extra tokens in input"); return -1; } } revert_names(&P->schema.attributes); revert_symbols(&P->schema.symbols); return 0; } static inline void clear_elem_buffers(fb_parser_t *P) { void **p, **p2; p = P->elem_buffers; while (p) { p2 = *((void**)p); free(p); p = p2; }; } static void push_token(fb_parser_t *P, long id, const char *first, const char *last) { size_t offset; fb_token_t *t; P->te = P->ts + P->tcapacity; if (P->token == P->te) { offset = (size_t)(P->token - P->ts); P->tcapacity = P->tcapacity ? 2 * P->tcapacity : 1024; P->ts = realloc(P->ts, (size_t)P->tcapacity * sizeof(fb_token_t)); checkmem(P->ts); P->te = P->ts + P->tcapacity; P->token = P->ts + offset; } t = P->token; t->id = id; t->text = first; t->len = (long)(last - first); t->linenum = P->linenum; t->pos = (long)(first - P->line + 1); ++P->token; } /* * If the file contains a control character, we can get multiple * comments per line. */ static inline void push_comment(fb_parser_t *P, const char *first, const char *last) { if (P->doc_mode) { push_token(P, tok_kw_doc_comment, first, last); } } static void inject_token(fb_token_t *t, const char *lex, long id) { t->id = id; t->text = lex; t->len = (long)strlen(lex); t->pos = 0; t->linenum = 0; } /* --- Customize lexer --- */ /* Depends on the `context` argument given to the lex function. */ #define ctx(name) (((fb_parser_t *)context)->name) #define lex_emit_newline(first, last) (ctx(linenum)++, ctx(line) = last) #define lex_emit_string_newline(first, last) \ (ctx(linenum)++, ctx(line) = last, \ push_token((fb_parser_t*)context, LEX_TOK_STRING_NEWLINE, first, last)) /* * Add emtpy comment on comment start - otherwise we miss empty lines. * Save is_doc becuase comment_part does not remember. */ #define lex_emit_comment_begin(first, last, is_doc) \ { ctx(doc_mode) = is_doc; push_comment((fb_parser_t*)context, last, last); } #define lex_emit_comment_part(first, last) push_comment((fb_parser_t*)context, first, last) #define lex_emit_comment_end(first, last) (ctx(doc_mode) = 0) /* By default emitted as lex_emit_other which would be ignored. */ #define lex_emit_comment_unterminated(pos) \ push_token((fb_parser_t*)context, LEX_TOK_COMMENT_UNTERMINATED, pos, pos) #define lex_emit_comment_ctrl(pos) \ if (lex_isblank(*pos)) { \ push_comment((fb_parser_t*)context, pos, pos + 1); \ } else { \ push_token((fb_parser_t*)context, LEX_TOK_COMMENT_CTRL, \ pos, pos + 1); \ } /* * Provide hook to lexer for emitting tokens. We can override many * things, but most default to calling lex_emit, so that is all we need * to handle. * * `context` is a magic name available to macros in the lexer. */ #define lex_emit(token, first, last) \ push_token((fb_parser_t*)context, token, first, last) /* * We could just eos directly as it defaults to emit, but formally we * should use the eof marker which is always zero, so parser can check * for it easily, if needed. */ #define lex_emit_eos(first, last) \ push_token((fb_parser_t*)context, LEX_TOK_EOF, first, last) /* * This event happens in place of eos if we exhaust the input buffer. * In this case we treat this as end of input, but this choice prevents * us from parsing across multiple buffers. */ #define lex_emit_eob(pos) \ push_token((fb_parser_t*)context, LEX_TOK_EOF, pos, pos) /* * Luthor is our speedy generic lexer - it knows most common operators * and therefore allows us to fail meaningfully on those that we don't * support here, which is most. */ #include "lex/luthor.c" #include "keywords.h" /* Root schema `rs` is null for top level parser. */ int fb_init_parser(fb_parser_t *P, fb_options_t *opts, const char *name, fb_error_fun error_out, void *error_ctx, fb_root_schema_t *rs) { size_t n, name_len; char *s; memset(P, 0, sizeof(*P)); if (error_out) { P->error_out = error_out; P->error_ctx = error_ctx; } else { P->error_out = fb_default_error_out; } if (opts) { memcpy(&P->opts, opts, sizeof(*opts)); } else { flatcc_init_options(&P->opts); } P->schema.root_schema = rs ? rs : &P->schema.root_schema_instance; switch (P->opts.offset_size) { case 2: case 4: case 8: break; default: error(P, "invalid offset configured, must be 2, 4 (default), or 8"); return -1; } switch (P->opts.voffset_size) { case 2: case 4: case 8: break; default: error(P, "invalid voffset configured, must be 2 (default), 4, or 8"); return -1; } if (!name) { /* Mostly for testing, just so we always have a name. */ name = FLATCC_DEFAULT_FILENAME; } if (name == 0) { name = ""; } name_len = strlen(name); checkmem((P->schema.basename = fb_create_basename(name, name_len, opts->default_schema_ext))); n = strlen(P->schema.basename); checkmem(s = fb_copy_path_n(P->schema.basename, n)); pstrntoupper(s, n); P->schema.basenameup = s; P->schema.name.name.s.s = s; P->schema.name.name.s.len = (int)n; checkmem((P->schema.errorname = fb_create_basename(name, name_len, ""))); if (opts->ns) { P->schema.prefix.s = (char *)opts->ns; P->schema.prefix.len = (int)strlen(opts->ns); } P->current_scope = fb_add_scope(P, 0); assert(P->current_scope == fb_scope_table_find(&P->schema.root_schema->scope_index, 0, 0)); return 0; } /* * Main entry function for this specific parser type. * We expect a zero terminated string. * * The parser structure is uninitialized upon entry, and should be * cleared with `clear_flatbuffer_parser` subsequently. * * Datastructures point into the token buffer and into the input * buffer, so the parser and input should not be cleared prematurely. * * The input buffer must remain valid until the parser is cleared * because the internal represenation stores pointers into the buffer. * * `own_buffer` indicates that the the buffer should be deallocated when * the parser is cleaned up. */ int fb_parse(fb_parser_t *P, const char *input, size_t len, int own_buffer) { static const char *id_none = "NONE"; static const char *id_ubyte = "ubyte"; P->line = input; P->linenum = 1; /* Used with union defaults. */ inject_token(&P->t_none, id_none, LEX_TOK_ID); inject_token(&P->t_ubyte, id_ubyte, tok_kw_ubyte); if (own_buffer) { P->managed_input = input; } lex(input, len, 0, P); P->te = P->token; P->token = P->ts; /* Only used while processing table id's. */ checkmem((P->tmp_field_marker = malloc(sizeof(P->tmp_field_marker[0]) * (size_t)P->opts.vt_max_count))); checkmem((P->tmp_field_index = malloc(sizeof(P->tmp_field_index[0]) * (size_t)P->opts.vt_max_count))); if (P->token->id == tok_kw_doc_comment) { next(P); } parse_schema(P); return P->failed; } static void __destroy_scope_item(void *item, fb_scope_t *scope) { /* Each scope points into table that is cleared separately. */ (void)item; fb_symbol_table_clear(&scope->symbol_index); } void fb_clear_parser(fb_parser_t *P) { fb_symbol_t *sym; fb_compound_type_t *ct; for (sym = P->schema.symbols; sym; sym = sym->link) { switch (sym->kind) { case fb_is_struct: case fb_is_table: case fb_is_rpc_service: case fb_is_enum: case fb_is_union: ct = (fb_compound_type_t *)sym; fb_symbol_table_clear(&ct->index); fb_value_set_clear(&ct->value_set); } } fb_schema_table_clear(&P->schema.root_schema_instance.include_index); fb_name_table_clear(&P->schema.root_schema_instance.attribute_index); ptr_set_clear(&P->schema.visible_schema); if (P->tmp_field_marker) { free(P->tmp_field_marker); } if (P->tmp_field_index) { free(P->tmp_field_index); } if (P->ts) { free(P->ts); } if (P->schema.basename) { free((void *)P->schema.basename); } if (P->schema.basenameup) { free((void *)P->schema.basenameup); } if (P->schema.errorname) { free((void *)P->schema.errorname); } /* * P->referer_path in included files points to parent P->path, so * don't free it, and don't access it after this point. */ if (P->path) { free((void *)P->path); } fb_scope_table_destroy(&P->schema.root_schema_instance.scope_index, __destroy_scope_item, 0); /* Destroy last since destructor has references into elem buffer. */ clear_elem_buffers(P); if (P->managed_input) { free((void *)P->managed_input); } memset(P, 0, sizeof(*P)); }