aboutsummaryrefslogtreecommitdiff
path: root/test/json_test/test_basic_parse.c
blob: 7b8f4bac9abd1bcfa9625090d50dfb1320342832 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
#include <stdio.h>
#include "flatcc/flatcc_builder.h"
#include "flatcc/flatcc_json_parser.h"

/*
 * Helper macros for generating compile time tries.
 *
 * - this is for prototyping - codegenerator does this without macroes.
 */
#define __FLATCC_CHARW(x, p) (((uint64_t)(x)) << ((p) * 8))
#define __FLATCC_KW1(s) (__FLATCC_CHARW(s[0], 7))
#define __FLATCC_KW2(s) (__FLATCC_KW1(s) | __FLATCC_CHARW(s[1], 6))
#define __FLATCC_KW3(s) (__FLATCC_KW2(s) | __FLATCC_CHARW(s[2], 5))
#define __FLATCC_KW4(s) (__FLATCC_KW3(s) | __FLATCC_CHARW(s[3], 4))
#define __FLATCC_KW5(s) (__FLATCC_KW4(s) | __FLATCC_CHARW(s[4], 3))
#define __FLATCC_KW6(s) (__FLATCC_KW5(s) | __FLATCC_CHARW(s[5], 2))
#define __FLATCC_KW7(s) (__FLATCC_KW6(s) | __FLATCC_CHARW(s[6], 1))
#define __FLATCC_KW8(s) (__FLATCC_KW7(s) | __FLATCC_CHARW(s[7], 0))
#define __FLATCC_KW(s, n) __FLATCC_KW ## n(s)

#define __FLATCC_MASKKW(n) ((~(uint64_t)0) << ((8 - (n)) * 8))
#define __FLATCC_MATCHKW(w, s, n) ((__FLATCC_MASKKW(n) & (w)) == __FLATCC_KW(s, n))
#define __FLATCC_LTKW(w, s, n) ((__FLATCC_MASKKW(n) & (w)) < __FLATCC_KW(s, n))


const char g_data[] = "                                                     \
                                                                            \
{                               \r\n                                        \
    \"first\": 1,                                                           \
    \"second\": 2.0,                                                        \
    \"seconds left\": 42,                                                   \
    \"seconds lead\": 1,          \n                                        \
    \"zulu\": \"really\"      \n                                            \
}                                                                           \
";

/*
 * This is proof of concept test before code-generation to evaluate
 * efficient parsing and buffer construction principles while scanning
 * text such as a JSON. We do no use a schema per se, but implicitly
 * define one in the way that we construct the parser.
 */

#define match(x) if (end > buf && buf[0] == x) { ++buf; }                   \
        else { fprintf(stderr, "failed to match '%c'\n", x);                \
            buf = flatcc_json_parser_set_error(ctx, buf, end,              \
                    flatcc_json_parser_error_invalid_character); goto fail; }

/* Space is optional, but we do expect more input. */
#define space() {                                                           \
        buf = flatcc_json_parser_space(ctx, buf, end);                     \
        if (buf == end) { fprintf(stderr, "parse failed\n"); goto fail; }}  \

#ifdef FLATCC_JSON_ALLOW_UNKNOWN_FIELD
#define ignore_field() {                                                    \
    buf = flatcc_json_parser_symbol_end(ctx, buf, end);                    \
    space(); match(':'); space();                                           \
    buf = flatcc_json_parser_generic_json(ctx, buf, end);                  \
    if (buf == end) {                                                       \
        goto fail;                                                          \
    }}
#else
#define ignore_field() {                                                    \
    buf = flatcc_json_parser_set_error(ctx, buf, end, flatcc_json_parser_error_unknown_symbol);\
    goto fail; }
#endif


/*
 * We build a flatbuffer dynamically without a schema, but we still need
 * to assigned vtable entries.
 */
enum {
    id_first = 0,
    id_second = 1,
    id_seconds_left = 2,
    id_seconds_lead = 3,
    id_zulu = 10
};

enum {
    ctx_done = 0, ctx_t1_start, ctx_t1_again
};

const char *test(flatcc_builder_t *B, const char *buf, const char *end, int *ret)
{
    flatcc_json_parser_t parse_ctx, *ctx;
    flatcc_builder_ref_t root = 0, ref, *p_ref;
    uint64_t w;
    const char *k;
    char *s;
    flatcc_json_parser_escape_buffer_t code;

    void *p;

    ctx = &parse_ctx;
    memset(ctx, 0, sizeof(*ctx));
    ctx->line = 1;
    ctx->line_start = buf;

    flatcc_builder_start_buffer(B, "TEST", 0, 0);

    space(); match('{'); space();
    flatcc_builder_start_table(B, id_zulu + 1);

t1_again:

    buf = flatcc_json_parser_symbol_start(ctx, buf, end);
    w = flatcc_json_parser_symbol_part(buf, end);
    k = end - buf > 8 ? buf + 8 : end;
    /*
     * We implement a trie here. Because we compare big endian
     * any trailing garbage in a word is least significant
     * and masked out in MATCH tests.
     *
     * When a keyword is a prefix of another, the shorter keyword
     * must be tested first because any trailing "garbage" will
     * be larger (or equal if at buffer end or invalid nulls are
     * contained) than the short keyword, but if testing the long
     * keyword, the shorter keyword may be either larger or smaller
     * depending on what content follows.
     *
     * Errors result in `buf` being set to `end` so we need not test
     * for errors all the time. We use space as a convenient bailout
     * point.
     */
    if (__FLATCC_LTKW(w, "second", 6)) {
        if (!__FLATCC_MATCHKW(w, "first", 5)) {
            ignore_field();
        } else {
            buf = flatcc_json_parser_symbol_end(ctx, buf + 5, end);
            space(); match(':'); space();
            p = flatcc_builder_table_add(B, id_first, 1, 1);
            if (!p) { goto fail; }
            k = buf;
            buf = flatcc_json_parser_uint8(ctx, buf, end, p);
            /* Here we could optionally parse for symbolic constants. */
            if (k == buf) { goto fail; };
            /* Successfully parsed field. */
        }
    } else {
        if (__FLATCC_LTKW(w, "zulu", 4)) {
            if (__FLATCC_LTKW(w, "seconds ", 8)) {
                if (!__FLATCC_MATCHKW(w, "second", 6)) {
                    ignore_field();
                } else {
                    buf = flatcc_json_parser_symbol_end(ctx, buf + 6, end);
                    space(); match(':'); space();
                    p = flatcc_builder_table_add(B, id_second, 8, 8);
                    if (!p) { goto fail; }
                    k = buf;
                    buf = flatcc_json_parser_double(ctx, buf, end, p);
                    /* Here we could optionally parse for symbolic constants. */
                    if (k == buf) { goto fail; };
                    /* Successfully parsed field. */
                }
            } else {
                if (!__FLATCC_MATCHKW(w, "seconds ", 8)) {
                    ignore_field();
                } else {
                    /* We have multiple keys matching the first word, so we load another. */
                    buf = k;
                    w = flatcc_json_parser_symbol_part(buf, end);
                    k = end - buf > 8 ? buf + 8 : end;
                    if (__FLATCC_LTKW(w, "left", 4)) {
                        if (!__FLATCC_MATCHKW(w, "lead", 4)) {
                            ignore_field();
                        } else {
                            buf = flatcc_json_parser_symbol_end(ctx, buf + 4, end);
                            space(); match(':'); space();
                            p = flatcc_builder_table_add(B, id_seconds_lead, 8, 8);
                            if (!p) { goto fail; }
                            k = buf;
                            buf = flatcc_json_parser_int64(ctx, buf, end, p);
                            /* Here we could optionally parse for symbolic constants. */
                            if (k == buf) { goto fail; };
                            /* Successfully parsed field. */
                        }
                    } else {
                        if (!__FLATCC_MATCHKW(w, "left", 4)) {
                            ignore_field();
                        } else {
                            buf = flatcc_json_parser_symbol_end(ctx, buf + 4, end);
                            space(); match(':'); space();
                            p = flatcc_builder_table_add(B, id_seconds_left, 4, 4);
                            if (!p) { goto fail; }
                            k = buf;
                            buf = flatcc_json_parser_uint32(ctx, buf, end, p);
                            /* Here we could optionally parse for symbolic constants. */
                            if (k == buf) { goto fail; };
                            /* Successfully parsed field. */
                        }
                    }
                }
            }
        } else {
            if (!__FLATCC_MATCHKW(w, "zulu", 4)) {
                ignore_field();
            } else {
                buf = flatcc_json_parser_symbol_end(ctx, buf + 4, end);
                space(); match(':'); space();
                /*
                 * Parse field as string. If we are lucky, we can
                 * create the string in one go, which is faster.
                 * We can't if the string contains escape codes.
                 */
                buf = flatcc_json_parser_string_start(ctx, buf, end);
                k = buf;
                buf = flatcc_json_parser_string_part(ctx, buf, end);
                if (buf == end) {
                    goto fail;
                }
                if (buf[0] == '\"') {
                    ref = flatcc_builder_create_string(B, k, (size_t)(buf - k));
                } else {
                    /* Start string with enough space for what we have. */
                    flatcc_builder_start_string(B);
                    s = flatcc_builder_extend_string(B, (size_t)(buf - k));
                    if (!s) { goto fail; }
                    memcpy(s, k, (size_t)(buf - k));
                    do {
                        buf = flatcc_json_parser_string_escape(ctx, buf, end, code);
                        flatcc_builder_append_string(B, code + 1, (size_t)code[0]);
                        k = buf;
                        buf = flatcc_json_parser_string_part(ctx, buf, end);
                        if (buf == end) {
                            goto fail;
                        }
                        flatcc_builder_append_string(B, k, (size_t)(buf - k));
                    } while (buf[0] != '\"');
                    ref = flatcc_builder_end_string(B);
                }
                if (!ref) {
                    goto fail;
                }
                /* Duplicate fields may fail or assert. */
                p_ref = flatcc_builder_table_add_offset(B, id_zulu);
                if (!p_ref) {
                    goto fail;
                }
                *p_ref = ref;
                buf = flatcc_json_parser_string_end(ctx, buf, end);
                /* Successfully parsed field. */
            }
        }
    }
    space();
    if (*buf == ',') {
        ++buf;
        space();
        if (*buf != '}') {
            goto t1_again;
        }
#if !FLATCC_JSON_PARSE_ALLOW_TRAILING_COMMA
        return flatcc_json_parser_set_error(ctx, buf, end, flatcc_json_parser_error_trailing_comma);
#endif
    }
    match('}');
    root = flatcc_builder_end_table(B);

    flatcc_builder_end_buffer(B, root);
#if !FLATCC_JSON_PARSE_IGNORE_TRAILING_DATA
    buf = flatcc_json_parser_space(ctx, buf, end);
    if (buf != end) {
        fprintf(stderr, "extra characters in input\n");
        goto fail;
    }
#endif
fail:
    if (ctx->error) {
        fprintf(stderr, "%d:%d: %s\n", (int)ctx->line, (int)(ctx->error_loc - ctx->line_start + 1), flatcc_json_parser_error_string(ctx->error));
        flatcc_builder_reset(B);
    } else {
        fprintf(stderr, "parse accepted\n");
    }
    *ret = ctx->error;
    return buf;
}

int main(void)
{
    int ret = -1;
    flatcc_builder_t builder;

    flatcc_builder_init(&builder);

    test(&builder, g_data, g_data + sizeof(g_data) - 1, &ret);

    flatcc_builder_clear(&builder);
    return ret;
}