diff options
Diffstat (limited to 'flatcc/test/json_test/test_basic_parse.c')
-rw-r--r-- | flatcc/test/json_test/test_basic_parse.c | 291 |
1 files changed, 291 insertions, 0 deletions
diff --git a/flatcc/test/json_test/test_basic_parse.c b/flatcc/test/json_test/test_basic_parse.c new file mode 100644 index 0000000..7b8f4ba --- /dev/null +++ b/flatcc/test/json_test/test_basic_parse.c @@ -0,0 +1,291 @@ +#include <stdio.h> +#include "flatcc/flatcc_builder.h" +#include "flatcc/flatcc_json_parser.h" + +/* + * Helper macros for generating compile time tries. + * + * - this is for prototyping - codegenerator does this without macroes. + */ +#define __FLATCC_CHARW(x, p) (((uint64_t)(x)) << ((p) * 8)) +#define __FLATCC_KW1(s) (__FLATCC_CHARW(s[0], 7)) +#define __FLATCC_KW2(s) (__FLATCC_KW1(s) | __FLATCC_CHARW(s[1], 6)) +#define __FLATCC_KW3(s) (__FLATCC_KW2(s) | __FLATCC_CHARW(s[2], 5)) +#define __FLATCC_KW4(s) (__FLATCC_KW3(s) | __FLATCC_CHARW(s[3], 4)) +#define __FLATCC_KW5(s) (__FLATCC_KW4(s) | __FLATCC_CHARW(s[4], 3)) +#define __FLATCC_KW6(s) (__FLATCC_KW5(s) | __FLATCC_CHARW(s[5], 2)) +#define __FLATCC_KW7(s) (__FLATCC_KW6(s) | __FLATCC_CHARW(s[6], 1)) +#define __FLATCC_KW8(s) (__FLATCC_KW7(s) | __FLATCC_CHARW(s[7], 0)) +#define __FLATCC_KW(s, n) __FLATCC_KW ## n(s) + +#define __FLATCC_MASKKW(n) ((~(uint64_t)0) << ((8 - (n)) * 8)) +#define __FLATCC_MATCHKW(w, s, n) ((__FLATCC_MASKKW(n) & (w)) == __FLATCC_KW(s, n)) +#define __FLATCC_LTKW(w, s, n) ((__FLATCC_MASKKW(n) & (w)) < __FLATCC_KW(s, n)) + + +const char g_data[] = " \ + \ +{ \r\n \ + \"first\": 1, \ + \"second\": 2.0, \ + \"seconds left\": 42, \ + \"seconds lead\": 1, \n \ + \"zulu\": \"really\" \n \ +} \ +"; + +/* + * This is proof of concept test before code-generation to evaluate + * efficient parsing and buffer construction principles while scanning + * text such as a JSON. We do no use a schema per se, but implicitly + * define one in the way that we construct the parser. + */ + +#define match(x) if (end > buf && buf[0] == x) { ++buf; } \ + else { fprintf(stderr, "failed to match '%c'\n", x); \ + buf = flatcc_json_parser_set_error(ctx, buf, end, \ + flatcc_json_parser_error_invalid_character); goto fail; } + +/* Space is optional, but we do expect more input. */ +#define space() { \ + buf = flatcc_json_parser_space(ctx, buf, end); \ + if (buf == end) { fprintf(stderr, "parse failed\n"); goto fail; }} \ + +#ifdef FLATCC_JSON_ALLOW_UNKNOWN_FIELD +#define ignore_field() { \ + buf = flatcc_json_parser_symbol_end(ctx, buf, end); \ + space(); match(':'); space(); \ + buf = flatcc_json_parser_generic_json(ctx, buf, end); \ + if (buf == end) { \ + goto fail; \ + }} +#else +#define ignore_field() { \ + buf = flatcc_json_parser_set_error(ctx, buf, end, flatcc_json_parser_error_unknown_symbol);\ + goto fail; } +#endif + + +/* + * We build a flatbuffer dynamically without a schema, but we still need + * to assigned vtable entries. + */ +enum { + id_first = 0, + id_second = 1, + id_seconds_left = 2, + id_seconds_lead = 3, + id_zulu = 10 +}; + +enum { + ctx_done = 0, ctx_t1_start, ctx_t1_again +}; + +const char *test(flatcc_builder_t *B, const char *buf, const char *end, int *ret) +{ + flatcc_json_parser_t parse_ctx, *ctx; + flatcc_builder_ref_t root = 0, ref, *p_ref; + uint64_t w; + const char *k; + char *s; + flatcc_json_parser_escape_buffer_t code; + + void *p; + + ctx = &parse_ctx; + memset(ctx, 0, sizeof(*ctx)); + ctx->line = 1; + ctx->line_start = buf; + + flatcc_builder_start_buffer(B, "TEST", 0, 0); + + space(); match('{'); space(); + flatcc_builder_start_table(B, id_zulu + 1); + +t1_again: + + buf = flatcc_json_parser_symbol_start(ctx, buf, end); + w = flatcc_json_parser_symbol_part(buf, end); + k = end - buf > 8 ? buf + 8 : end; + /* + * We implement a trie here. Because we compare big endian + * any trailing garbage in a word is least significant + * and masked out in MATCH tests. + * + * When a keyword is a prefix of another, the shorter keyword + * must be tested first because any trailing "garbage" will + * be larger (or equal if at buffer end or invalid nulls are + * contained) than the short keyword, but if testing the long + * keyword, the shorter keyword may be either larger or smaller + * depending on what content follows. + * + * Errors result in `buf` being set to `end` so we need not test + * for errors all the time. We use space as a convenient bailout + * point. + */ + if (__FLATCC_LTKW(w, "second", 6)) { + if (!__FLATCC_MATCHKW(w, "first", 5)) { + ignore_field(); + } else { + buf = flatcc_json_parser_symbol_end(ctx, buf + 5, end); + space(); match(':'); space(); + p = flatcc_builder_table_add(B, id_first, 1, 1); + if (!p) { goto fail; } + k = buf; + buf = flatcc_json_parser_uint8(ctx, buf, end, p); + /* Here we could optionally parse for symbolic constants. */ + if (k == buf) { goto fail; }; + /* Successfully parsed field. */ + } + } else { + if (__FLATCC_LTKW(w, "zulu", 4)) { + if (__FLATCC_LTKW(w, "seconds ", 8)) { + if (!__FLATCC_MATCHKW(w, "second", 6)) { + ignore_field(); + } else { + buf = flatcc_json_parser_symbol_end(ctx, buf + 6, end); + space(); match(':'); space(); + p = flatcc_builder_table_add(B, id_second, 8, 8); + if (!p) { goto fail; } + k = buf; + buf = flatcc_json_parser_double(ctx, buf, end, p); + /* Here we could optionally parse for symbolic constants. */ + if (k == buf) { goto fail; }; + /* Successfully parsed field. */ + } + } else { + if (!__FLATCC_MATCHKW(w, "seconds ", 8)) { + ignore_field(); + } else { + /* We have multiple keys matching the first word, so we load another. */ + buf = k; + w = flatcc_json_parser_symbol_part(buf, end); + k = end - buf > 8 ? buf + 8 : end; + if (__FLATCC_LTKW(w, "left", 4)) { + if (!__FLATCC_MATCHKW(w, "lead", 4)) { + ignore_field(); + } else { + buf = flatcc_json_parser_symbol_end(ctx, buf + 4, end); + space(); match(':'); space(); + p = flatcc_builder_table_add(B, id_seconds_lead, 8, 8); + if (!p) { goto fail; } + k = buf; + buf = flatcc_json_parser_int64(ctx, buf, end, p); + /* Here we could optionally parse for symbolic constants. */ + if (k == buf) { goto fail; }; + /* Successfully parsed field. */ + } + } else { + if (!__FLATCC_MATCHKW(w, "left", 4)) { + ignore_field(); + } else { + buf = flatcc_json_parser_symbol_end(ctx, buf + 4, end); + space(); match(':'); space(); + p = flatcc_builder_table_add(B, id_seconds_left, 4, 4); + if (!p) { goto fail; } + k = buf; + buf = flatcc_json_parser_uint32(ctx, buf, end, p); + /* Here we could optionally parse for symbolic constants. */ + if (k == buf) { goto fail; }; + /* Successfully parsed field. */ + } + } + } + } + } else { + if (!__FLATCC_MATCHKW(w, "zulu", 4)) { + ignore_field(); + } else { + buf = flatcc_json_parser_symbol_end(ctx, buf + 4, end); + space(); match(':'); space(); + /* + * Parse field as string. If we are lucky, we can + * create the string in one go, which is faster. + * We can't if the string contains escape codes. + */ + buf = flatcc_json_parser_string_start(ctx, buf, end); + k = buf; + buf = flatcc_json_parser_string_part(ctx, buf, end); + if (buf == end) { + goto fail; + } + if (buf[0] == '\"') { + ref = flatcc_builder_create_string(B, k, (size_t)(buf - k)); + } else { + /* Start string with enough space for what we have. */ + flatcc_builder_start_string(B); + s = flatcc_builder_extend_string(B, (size_t)(buf - k)); + if (!s) { goto fail; } + memcpy(s, k, (size_t)(buf - k)); + do { + buf = flatcc_json_parser_string_escape(ctx, buf, end, code); + flatcc_builder_append_string(B, code + 1, (size_t)code[0]); + k = buf; + buf = flatcc_json_parser_string_part(ctx, buf, end); + if (buf == end) { + goto fail; + } + flatcc_builder_append_string(B, k, (size_t)(buf - k)); + } while (buf[0] != '\"'); + ref = flatcc_builder_end_string(B); + } + if (!ref) { + goto fail; + } + /* Duplicate fields may fail or assert. */ + p_ref = flatcc_builder_table_add_offset(B, id_zulu); + if (!p_ref) { + goto fail; + } + *p_ref = ref; + buf = flatcc_json_parser_string_end(ctx, buf, end); + /* Successfully parsed field. */ + } + } + } + space(); + if (*buf == ',') { + ++buf; + space(); + if (*buf != '}') { + goto t1_again; + } +#if !FLATCC_JSON_PARSE_ALLOW_TRAILING_COMMA + return flatcc_json_parser_set_error(ctx, buf, end, flatcc_json_parser_error_trailing_comma); +#endif + } + match('}'); + root = flatcc_builder_end_table(B); + + flatcc_builder_end_buffer(B, root); +#if !FLATCC_JSON_PARSE_IGNORE_TRAILING_DATA + buf = flatcc_json_parser_space(ctx, buf, end); + if (buf != end) { + fprintf(stderr, "extra characters in input\n"); + goto fail; + } +#endif +fail: + if (ctx->error) { + fprintf(stderr, "%d:%d: %s\n", (int)ctx->line, (int)(ctx->error_loc - ctx->line_start + 1), flatcc_json_parser_error_string(ctx->error)); + flatcc_builder_reset(B); + } else { + fprintf(stderr, "parse accepted\n"); + } + *ret = ctx->error; + return buf; +} + +int main(void) +{ + int ret = -1; + flatcc_builder_t builder; + + flatcc_builder_init(&builder); + + test(&builder, g_data, g_data + sizeof(g_data) - 1, &ret); + + flatcc_builder_clear(&builder); + return ret; +} |