summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.github/workflows/build-openwrt.yml2
-rw-r--r--.github/workflows/build.yml2
-rw-r--r--dependencies/nDPIsrvd.h2
-rw-r--r--examples/README.md5
-rwxr-xr-xexamples/py-machine-learning/keras-autoencoder.py125
-rw-r--r--examples/py-schema-validation/requirements.txt1
6 files changed, 133 insertions, 4 deletions
diff --git a/.github/workflows/build-openwrt.yml b/.github/workflows/build-openwrt.yml
index 5a4e4e701..b134cd36e 100644
--- a/.github/workflows/build-openwrt.yml
+++ b/.github/workflows/build-openwrt.yml
@@ -14,7 +14,7 @@ on:
jobs:
build:
- name: ${{ matrix.arch }} build
+ name: ${{ matrix.arch }} ${{ matrix.target }}
runs-on: ubuntu-latest
strategy:
fail-fast: false
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index b30edc945..74ad276ad 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -14,7 +14,7 @@ on:
jobs:
test:
- name: ${{ matrix.os }} ${{ matrix.gcrypt }}
+ name: ${{ matrix.os }} ${{ matrix.compiler }}
runs-on: ${{ matrix.os }}
env:
CMAKE_C_COMPILER: ${{ matrix.compiler }}
diff --git a/dependencies/nDPIsrvd.h b/dependencies/nDPIsrvd.h
index 806bd09c8..11aa489b1 100644
--- a/dependencies/nDPIsrvd.h
+++ b/dependencies/nDPIsrvd.h
@@ -35,8 +35,6 @@
#define nDPIsrvd_ARRAY_LENGTH(s) (sizeof(s) / sizeof(s[0]))
#define nDPIsrvd_STRLEN_SZ(s) (sizeof(s) / sizeof(s[0]) - sizeof(s[0]))
#define TOKEN_GET_SZ(sock, ...) nDPIsrvd_get_token(sock, __VA_ARGS__, NULL)
-#define TOKEN_GET_VALUE_SZ(sock, value_length, ...) \
- nDPIsrvd_get_token_value(sock, TOKEN_GET_SZ(sock, __VA_ARGS__, NULL))
#define TOKEN_VALUE_EQUALS(sock, token, string_to_check, string_to_check_length) \
nDPIsrvd_token_value_equals(sock, token, string_to_check, string_to_check_length)
#define TOKEN_VALUE_EQUALS_SZ(sock, token, string_to_check) \
diff --git a/examples/README.md b/examples/README.md
index 2b1d43427..03d7a9262 100644
--- a/examples/README.md
+++ b/examples/README.md
@@ -41,6 +41,11 @@ Required by `tests/run_tests.sh`
## py-machine-learning
+Contains:
+
+1. Classification via Random Forests and SciLearn
+2. Anomaly Detection via Autoencoder and Keras (Work-In-Progress!)
+
Use sklearn together with CSVs created with **c-analysed** to train and predict DPI detections.
Try it with: `./examples/py-machine-learning/sklearn_random_forest.py --csv ./ndpi-analysed.csv --proto-class tls.youtube --proto-class tls.github --proto-class tls.spotify --proto-class tls.facebook --proto-class tls.instagram --proto-class tls.doh_dot --proto-class quic --proto-class icmp`
diff --git a/examples/py-machine-learning/keras-autoencoder.py b/examples/py-machine-learning/keras-autoencoder.py
new file mode 100755
index 000000000..943a6aefc
--- /dev/null
+++ b/examples/py-machine-learning/keras-autoencoder.py
@@ -0,0 +1,125 @@
+#!/usr/bin/env python3
+
+import base64
+import csv
+import matplotlib.pyplot as plt
+import numpy as np
+import os
+import pandas as pd
+import tensorflow as tf
+import sys
+
+from tensorflow.keras import layers, preprocessing
+from tensorflow.keras.layers import Embedding, Input, Dense
+from tensorflow.keras.models import Model, Sequential
+from tensorflow.keras.utils import plot_model
+
+sys.path.append(os.path.dirname(sys.argv[0]) + '/../../dependencies')
+sys.path.append(os.path.dirname(sys.argv[0]) + '/../share/nDPId')
+sys.path.append(os.path.dirname(sys.argv[0]))
+sys.path.append(sys.base_prefix + '/share/nDPId')
+import nDPIsrvd
+from nDPIsrvd import nDPIsrvdSocket, TermColor
+
+input_size = nDPIsrvd.nDPId_PACKETS_PLEN_MAX
+training_size = 500
+batch_size = 100
+
+def generate_autoencoder():
+ input_i = Input(shape=())
+ input_i = Embedding(input_dim=input_size, output_dim=input_size, mask_zero=True)(input_i)
+ encoded_h1 = Dense(1024, activation='relu', name='input_i')(input_i)
+ encoded_h2 = Dense(512, activation='relu', name='encoded_h1')(encoded_h1)
+ encoded_h3 = Dense(128, activation='relu', name='encoded_h2')(encoded_h2)
+ encoded_h4 = Dense(64, activation='relu', name='encoded_h3')(encoded_h3)
+ encoded_h5 = Dense(32, activation='relu', name='encoded_h4')(encoded_h4)
+ latent = Dense(2, activation='relu', name='encoded_h5')(encoded_h5)
+ decoder_h1 = Dense(32, activation='relu', name='latent')(latent)
+ decoder_h2 = Dense(64, activation='relu', name='decoder_h1')(decoder_h1)
+ decoder_h3 = Dense(128, activation='relu', name='decoder_h2')(decoder_h2)
+ decoder_h4 = Dense(512, activation='relu', name='decoder_h3')(decoder_h3)
+ decoder_h5 = Dense(1024, activation='relu', name='decoder_h4')(decoder_h4)
+ return input_i, Model(input_i, Dense(input_size, activation='sigmoid', name='decoder_h5')(decoder_h5))
+
+def compile_autoencoder():
+ inp, autoencoder = generate_autoencoder()
+ autoencoder.compile(loss='mse', optimizer='adam', metrics=[tf.keras.metrics.Accuracy()])
+ return inp, autoencoder
+
+def onJsonLineRecvd(json_dict, instance, current_flow, global_user_data):
+ if 'packet_event_name' not in json_dict:
+ return True
+
+ if json_dict['packet_event_name'] != 'packet' and \
+ json_dict['packet_event_name'] != 'packet-flow':
+ return True
+
+ _, padded_pkts = global_user_data
+ buf = base64.b64decode(json_dict['pkt'], validate=True)
+
+ # Generate decimal byte buffer with valus from 0-255
+ int_buf = []
+ for v in buf:
+ int_buf.append(int(v))
+
+ mat = np.array([int_buf])
+
+ # Normalize the values
+ mat = mat.astype('float32') / 255.
+
+ # Mean removal
+ matmean = np.mean(mat, axis=0)
+ mat -= matmean
+
+ # Pad resulting matrice
+ buf = preprocessing.sequence.pad_sequences(mat, padding="post", maxlen=input_size, truncating='post')
+ padded_pkts.append(buf[0])
+
+ sys.stdout.write('.')
+ sys.stdout.flush()
+ if (len(padded_pkts) % training_size == 0):
+ print('\nGot {} packets, training..'.format(len(padded_pkts)))
+ tmp = np.array(padded_pkts)
+ history = autoencoder.fit(
+ tmp, tmp, epochs=10, batch_size=batch_size,
+ validation_split=0.2,
+ shuffle=True
+ )
+ padded_pkts.clear()
+
+ #plot_model(autoencoder, show_shapes=True, show_layer_names=True)
+ #plt.plot(history.history['loss'])
+ #plt.plot(history.history['val_loss'])
+ #plt.title('model loss')
+ #plt.xlabel('loss')
+ #plt.ylabel('val_loss')
+ #plt.legend(['loss', 'val_loss'], loc='upper left')
+ #plt.show()
+
+ return True
+
+if __name__ == '__main__':
+ sys.stderr.write('\b\n***************\n')
+ sys.stderr.write('*** WARNING ***\n')
+ sys.stderr.write('***************\n')
+ sys.stderr.write('\nThis is an unmature Autoencoder example.\n')
+ sys.stderr.write('Please do not rely on any of it\'s output!\n\n')
+
+ argparser = nDPIsrvd.defaultArgumentParser()
+ args = argparser.parse_args()
+ address = nDPIsrvd.validateAddress(args)
+
+ sys.stderr.write('Recv buffer size: {}\n'.format(nDPIsrvd.NETWORK_BUFFER_MAX_SIZE))
+ sys.stderr.write('Connecting to {} ..\n'.format(address[0]+':'+str(address[1]) if type(address) is tuple else address))
+
+ _, autoencoder = compile_autoencoder()
+
+ nsock = nDPIsrvdSocket()
+ nsock.connect(address)
+ try:
+ padded_pkts = list()
+ nsock.loop(onJsonLineRecvd, None, (autoencoder, padded_pkts))
+ except nDPIsrvd.SocketConnectionBroken as err:
+ sys.stderr.write('\n{}\n'.format(err))
+ except KeyboardInterrupt:
+ print()
diff --git a/examples/py-schema-validation/requirements.txt b/examples/py-schema-validation/requirements.txt
new file mode 100644
index 000000000..d89304b1a
--- /dev/null
+++ b/examples/py-schema-validation/requirements.txt
@@ -0,0 +1 @@
+jsonschema