aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorAlfredo Cardigliano <cardigliano@ntop.org>2025-03-27 12:45:25 +0100
committerAlfredo Cardigliano <cardigliano@ntop.org>2025-03-27 12:45:25 +0100
commit484f93d64e3ca562f1010e3956af86538c9c9274 (patch)
tree3b4fcc61ab5b70c2be19ec75cb822118b2b97744 /src
parent5e3728c611c701279bbca9be086ba905f4a90cd7 (diff)
Add support for UTF-8 encoding in JSON serialization
Diffstat (limited to 'src')
-rw-r--r--src/lib/ndpi_serializer.c92
1 files changed, 58 insertions, 34 deletions
diff --git a/src/lib/ndpi_serializer.c b/src/lib/ndpi_serializer.c
index 9b7634a68..670925d82 100644
--- a/src/lib/ndpi_serializer.c
+++ b/src/lib/ndpi_serializer.c
@@ -88,47 +88,71 @@ static int ndpi_is_number(const char *str, u_int32_t str_len) {
* Upon successful return, these functions return the number of characters printed (excluding the null byte used to terminate the string).
*/
int ndpi_json_string_escape(const char *src, int src_len, char *dst, int dst_max_len) {
- char c = 0;
+ u_char c = 0;
int i, j = 0;
dst[j++] = '"';
for(i = 0; i < src_len && j < dst_max_len; i++) {
- c = src[i];
+ c = (u_char) src[i];
+
+ if (c < 0x20 /* ' ' */ || c == 0x7F) {
+ ; // Non-printable ASCII character (skip)
+ } else if (c < 0x7F) {
+ /* Valid ASCII character (escape if required) */
+ switch (c) {
+ case '\\':
+ case '"':
+ case '/':
+ dst[j++] = '\\';
+ dst[j++] = c;
+ break;
+ case '\b':
+ dst[j++] = '\\';
+ dst[j++] = 'b';
+ break;
+ case '\t':
+ dst[j++] = '\\';
+ dst[j++] = 't';
+ break;
+ case '\n':
+ dst[j++] = '\\';
+ dst[j++] = 'n';
+ break;
+ case '\f':
+ dst[j++] = '\\';
+ dst[j++] = 'f';
+ break;
+ case '\r':
+ dst[j++] = '\\';
+ dst[j++] = 'r';
+ break;
+ default:
+ dst[j++] = c;
+ }
- switch (c) {
- case '\\':
- case '"':
- case '/':
- dst[j++] = '\\';
+ } else if ((c >= 0xC2 && c <= 0xDF) &&
+ ((u_char) src[i+1] >= 0x80 && (u_char) src[i+1] <= 0xBF)) {
+ // 2-byte sequence (U+0080 to U+07FF)
dst[j++] = c;
- break;
- case '\b':
- dst[j++] = '\\';
- dst[j++] = 'b';
- break;
- case '\t':
- dst[j++] = '\\';
- dst[j++] = 't';
- break;
- case '\n':
- dst[j++] = '\\';
- dst[j++] = 'n';
- break;
- case '\f':
- dst[j++] = '\\';
- dst[j++] = 'f';
- break;
- case '\r':
- dst[j++] = '\\';
- dst[j++] = 'r';
- break;
- default:
- if(c < ' ')
- ; /* non printable */
- else
- dst[j++] = c;
+ dst[j++] = src[++i];
+ } else if ((c >= 0xE0 && c <= 0xEF) &&
+ ((u_char) src[i+1] >= 0x80 && (u_char) src[i+1] <= 0xBF) &&
+ ((u_char) src[i+2] >= 0x80 && (u_char) src[i+2] <= 0xBF)) {
+ // 3-byte sequence (U+0800 to U+FFFF)
+ dst[j++] = c;
+ dst[j++] = src[++i];
+ dst[j++] = src[++i];
+ } else if ((c >= 0xF0 && c <= 0xF4) &&
+ ((u_char) src[i+1] >= 0x80 && (u_char) src[i+1] <= 0xBF) &&
+ ((u_char) src[i+2] >= 0x80 && (u_char) src[i+2] <= 0xBF) &&
+ ((u_char) src[i+3] >= 0x80 && (u_char) src[i+3] <= 0xBF)) {
+ // 4-byte sequence (U+10000 to U+10FFiFF)
+ dst[j++] = c;
+ dst[j++] = src[++i];
+ dst[j++] = src[++i];
+ dst[j++] = src[++i];
}
}
@@ -1274,7 +1298,7 @@ int ndpi_serialize_uint32_binary(ndpi_serializer *_serializer,
int rc;
if(serializer->fmt == ndpi_serialization_format_json)
- needed += 24 + slen;
+ needed += 24 + slen*2 /* account escape (x2) */;
if(buff_diff < needed) {
if(ndpi_extend_serializer_buffer(&serializer->buffer, needed - buff_diff) < 0)