diff options
-rw-r--r-- | .github/workflows/build-openwrt.yml | 2 | ||||
-rw-r--r-- | .github/workflows/build.yml | 2 | ||||
-rw-r--r-- | dependencies/nDPIsrvd.h | 2 | ||||
-rw-r--r-- | examples/README.md | 5 | ||||
-rwxr-xr-x | examples/py-machine-learning/keras-autoencoder.py | 125 | ||||
-rw-r--r-- | examples/py-schema-validation/requirements.txt | 1 |
6 files changed, 133 insertions, 4 deletions
diff --git a/.github/workflows/build-openwrt.yml b/.github/workflows/build-openwrt.yml index 5a4e4e701..b134cd36e 100644 --- a/.github/workflows/build-openwrt.yml +++ b/.github/workflows/build-openwrt.yml @@ -14,7 +14,7 @@ on: jobs: build: - name: ${{ matrix.arch }} build + name: ${{ matrix.arch }} ${{ matrix.target }} runs-on: ubuntu-latest strategy: fail-fast: false diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index b30edc945..74ad276ad 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -14,7 +14,7 @@ on: jobs: test: - name: ${{ matrix.os }} ${{ matrix.gcrypt }} + name: ${{ matrix.os }} ${{ matrix.compiler }} runs-on: ${{ matrix.os }} env: CMAKE_C_COMPILER: ${{ matrix.compiler }} diff --git a/dependencies/nDPIsrvd.h b/dependencies/nDPIsrvd.h index 806bd09c8..11aa489b1 100644 --- a/dependencies/nDPIsrvd.h +++ b/dependencies/nDPIsrvd.h @@ -35,8 +35,6 @@ #define nDPIsrvd_ARRAY_LENGTH(s) (sizeof(s) / sizeof(s[0])) #define nDPIsrvd_STRLEN_SZ(s) (sizeof(s) / sizeof(s[0]) - sizeof(s[0])) #define TOKEN_GET_SZ(sock, ...) nDPIsrvd_get_token(sock, __VA_ARGS__, NULL) -#define TOKEN_GET_VALUE_SZ(sock, value_length, ...) \ - nDPIsrvd_get_token_value(sock, TOKEN_GET_SZ(sock, __VA_ARGS__, NULL)) #define TOKEN_VALUE_EQUALS(sock, token, string_to_check, string_to_check_length) \ nDPIsrvd_token_value_equals(sock, token, string_to_check, string_to_check_length) #define TOKEN_VALUE_EQUALS_SZ(sock, token, string_to_check) \ diff --git a/examples/README.md b/examples/README.md index 2b1d43427..03d7a9262 100644 --- a/examples/README.md +++ b/examples/README.md @@ -41,6 +41,11 @@ Required by `tests/run_tests.sh` ## py-machine-learning +Contains: + +1. Classification via Random Forests and SciLearn +2. Anomaly Detection via Autoencoder and Keras (Work-In-Progress!) + Use sklearn together with CSVs created with **c-analysed** to train and predict DPI detections. Try it with: `./examples/py-machine-learning/sklearn_random_forest.py --csv ./ndpi-analysed.csv --proto-class tls.youtube --proto-class tls.github --proto-class tls.spotify --proto-class tls.facebook --proto-class tls.instagram --proto-class tls.doh_dot --proto-class quic --proto-class icmp` diff --git a/examples/py-machine-learning/keras-autoencoder.py b/examples/py-machine-learning/keras-autoencoder.py new file mode 100755 index 000000000..943a6aefc --- /dev/null +++ b/examples/py-machine-learning/keras-autoencoder.py @@ -0,0 +1,125 @@ +#!/usr/bin/env python3 + +import base64 +import csv +import matplotlib.pyplot as plt +import numpy as np +import os +import pandas as pd +import tensorflow as tf +import sys + +from tensorflow.keras import layers, preprocessing +from tensorflow.keras.layers import Embedding, Input, Dense +from tensorflow.keras.models import Model, Sequential +from tensorflow.keras.utils import plot_model + +sys.path.append(os.path.dirname(sys.argv[0]) + '/../../dependencies') +sys.path.append(os.path.dirname(sys.argv[0]) + '/../share/nDPId') +sys.path.append(os.path.dirname(sys.argv[0])) +sys.path.append(sys.base_prefix + '/share/nDPId') +import nDPIsrvd +from nDPIsrvd import nDPIsrvdSocket, TermColor + +input_size = nDPIsrvd.nDPId_PACKETS_PLEN_MAX +training_size = 500 +batch_size = 100 + +def generate_autoencoder(): + input_i = Input(shape=()) + input_i = Embedding(input_dim=input_size, output_dim=input_size, mask_zero=True)(input_i) + encoded_h1 = Dense(1024, activation='relu', name='input_i')(input_i) + encoded_h2 = Dense(512, activation='relu', name='encoded_h1')(encoded_h1) + encoded_h3 = Dense(128, activation='relu', name='encoded_h2')(encoded_h2) + encoded_h4 = Dense(64, activation='relu', name='encoded_h3')(encoded_h3) + encoded_h5 = Dense(32, activation='relu', name='encoded_h4')(encoded_h4) + latent = Dense(2, activation='relu', name='encoded_h5')(encoded_h5) + decoder_h1 = Dense(32, activation='relu', name='latent')(latent) + decoder_h2 = Dense(64, activation='relu', name='decoder_h1')(decoder_h1) + decoder_h3 = Dense(128, activation='relu', name='decoder_h2')(decoder_h2) + decoder_h4 = Dense(512, activation='relu', name='decoder_h3')(decoder_h3) + decoder_h5 = Dense(1024, activation='relu', name='decoder_h4')(decoder_h4) + return input_i, Model(input_i, Dense(input_size, activation='sigmoid', name='decoder_h5')(decoder_h5)) + +def compile_autoencoder(): + inp, autoencoder = generate_autoencoder() + autoencoder.compile(loss='mse', optimizer='adam', metrics=[tf.keras.metrics.Accuracy()]) + return inp, autoencoder + +def onJsonLineRecvd(json_dict, instance, current_flow, global_user_data): + if 'packet_event_name' not in json_dict: + return True + + if json_dict['packet_event_name'] != 'packet' and \ + json_dict['packet_event_name'] != 'packet-flow': + return True + + _, padded_pkts = global_user_data + buf = base64.b64decode(json_dict['pkt'], validate=True) + + # Generate decimal byte buffer with valus from 0-255 + int_buf = [] + for v in buf: + int_buf.append(int(v)) + + mat = np.array([int_buf]) + + # Normalize the values + mat = mat.astype('float32') / 255. + + # Mean removal + matmean = np.mean(mat, axis=0) + mat -= matmean + + # Pad resulting matrice + buf = preprocessing.sequence.pad_sequences(mat, padding="post", maxlen=input_size, truncating='post') + padded_pkts.append(buf[0]) + + sys.stdout.write('.') + sys.stdout.flush() + if (len(padded_pkts) % training_size == 0): + print('\nGot {} packets, training..'.format(len(padded_pkts))) + tmp = np.array(padded_pkts) + history = autoencoder.fit( + tmp, tmp, epochs=10, batch_size=batch_size, + validation_split=0.2, + shuffle=True + ) + padded_pkts.clear() + + #plot_model(autoencoder, show_shapes=True, show_layer_names=True) + #plt.plot(history.history['loss']) + #plt.plot(history.history['val_loss']) + #plt.title('model loss') + #plt.xlabel('loss') + #plt.ylabel('val_loss') + #plt.legend(['loss', 'val_loss'], loc='upper left') + #plt.show() + + return True + +if __name__ == '__main__': + sys.stderr.write('\b\n***************\n') + sys.stderr.write('*** WARNING ***\n') + sys.stderr.write('***************\n') + sys.stderr.write('\nThis is an unmature Autoencoder example.\n') + sys.stderr.write('Please do not rely on any of it\'s output!\n\n') + + argparser = nDPIsrvd.defaultArgumentParser() + args = argparser.parse_args() + address = nDPIsrvd.validateAddress(args) + + sys.stderr.write('Recv buffer size: {}\n'.format(nDPIsrvd.NETWORK_BUFFER_MAX_SIZE)) + sys.stderr.write('Connecting to {} ..\n'.format(address[0]+':'+str(address[1]) if type(address) is tuple else address)) + + _, autoencoder = compile_autoencoder() + + nsock = nDPIsrvdSocket() + nsock.connect(address) + try: + padded_pkts = list() + nsock.loop(onJsonLineRecvd, None, (autoencoder, padded_pkts)) + except nDPIsrvd.SocketConnectionBroken as err: + sys.stderr.write('\n{}\n'.format(err)) + except KeyboardInterrupt: + print() diff --git a/examples/py-schema-validation/requirements.txt b/examples/py-schema-validation/requirements.txt new file mode 100644 index 000000000..d89304b1a --- /dev/null +++ b/examples/py-schema-validation/requirements.txt @@ -0,0 +1 @@ +jsonschema |