diff options
author | aouinizied <aouinizied@gmail.com> | 2022-03-22 13:19:27 +0100 |
---|---|---|
committer | aouinizied <aouinizied@gmail.com> | 2022-03-22 13:19:27 +0100 |
commit | beef4f997bccc90c545abdf8d387bab600b4af8f (patch) | |
tree | 71a608cc34d1b727f2e408033e3d04f480399eb9 /python/ndpi_example.py | |
parent | 93f723d50f789530ca09dd9c5104e629824e30f4 (diff) |
Complete rework of nDPI Python bindings (cffi API, automatic generation, packaging and CI integration)
Diffstat (limited to 'python/ndpi_example.py')
-rw-r--r--[-rwxr-xr-x] | python/ndpi_example.py | 398 |
1 files changed, 146 insertions, 252 deletions
diff --git a/python/ndpi_example.py b/python/ndpi_example.py index d134d3947..8606ae84b 100755..100644 --- a/python/ndpi_example.py +++ b/python/ndpi_example.py @@ -1,273 +1,167 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- - """ -file: ndpi_example.py -This file is part of nDPI. - -Copyright (C) 2011-19 - ntop.org -Copyright (C) 2019 - Zied Aouini <aouinizied@gmail.com> (Incremental improvements) - -nDPI is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License -as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. - +------------------------------------------------------------------------------------------------------------------------ +ndpi_example.py +Copyright (C) 2011-22 - ntop.org +This file is part of nDPI, an open source deep packet inspection library. +nDPI is free software: you can redistribute it and/or modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later +version. nDPI is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty -of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with nDPI. +of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. +You should have received a copy of the GNU Lesser General Public License along with NFStream. If not, see <http://www.gnu.org/licenses/>. +------------------------------------------------------------------------------------------------------------------------ """ -from ndpi_typestruct import * -from ctypes import * -from scapy.all import * -import sys - -# ------- return type & pcapstruct to declare ------- - - -class WorkFlow(Structure): - _fields_ = [("src_ip", c_uint32), - ("dst_ip", c_uint32), - ("src_port", c_uint16), - ("dst_port", c_uint16), - ("protocol", c_uint8), - ("packets", c_uint32), - ("detected_protocol", NDPIProtocol), - ("detection_completed", c_uint8), - ("id", c_uint32), - ("src_id", POINTER(NDPIIdStruct)), - ("dst_id", POINTER(NDPIIdStruct)), - ("flow", POINTER(NDPIFlowStruct))] - - -CMCFUN = CFUNCTYPE(c_int, c_void_p, c_void_p) -GUESS = CFUNCTYPE(None, c_void_p, c_int32, c_int, c_void_p) -FREE = CFUNCTYPE(None, c_void_p) - - -def node_proto_guess_walker(nodo, which, depth, user_data): - global ndpi_info_mod +from collections import namedtuple +from ndpi import NDPI, NDPIFlow +import argparse +import socket +import dpkt - flow = cast(nodo, POINTER(POINTER(WorkFlow))).contents.contents - if which == 0 or which == 3: # execute only preorder operation of the tree - if flow.detection_completed == 0: # order for tree operation - flow.detected_protocol = ndpi.ndpi_detection_giveup(ndpi_info_mod, - flow.flow, - 1, - cast(addressof(c_uint8(0)), POINTER(c_uint8))) - count_protocol[flow.detected_protocol.app_protocol] += flow.packets +FLOW_KEY = "{} {}:{} <-> {}:{}" +FLOW_STR = " {} {} [protocol:{}] [category:{}] [confidence:{}] [{} packets/{} bytes]" -def py_cmp_fun(a, b): - fa = cast(a, POINTER(WorkFlow)) - fb = cast(b, POINTER(WorkFlow)) - if fa.contents.src_ip < fb.contents.src_ip: return -1 - elif fa.contents.src_ip > fb.contents.src_ip: return 1 - if fa.contents.src_port < fb.contents.src_port: return -1 - elif fa.contents.src_port > fb.contents.src_port: return 1 - if fa.contents.dst_ip < fb.contents.dst_ip: return -1 - elif fa.contents.dst_ip > fb.contents.dst_ip: return 1 - if fa.contents.dst_port < fb.contents.dst_port: return -1 - elif fa.contents.dst_port > fb.contents.dst_port: return 1 - if fa.contents.protocol < fb.contents.protocol: return -1 - elif fa.contents.protocol > fb.contents.protocol: return 1 - return 0 +PROTOCOL_UNKNWON = 0 -def freer(a): - pass +class Flow(object): + __slots__ = ("index", + "pkts", + "bytes", + "detected_protocol", + "ndpi_flow") + def __init__(self): + self.pkts = 0 + self.detected_protocol = None + self.bytes = 0 + self.ndpi_flow = None -cmp_func = CMCFUN(py_cmp_fun) -guess_walker = GUESS(node_proto_guess_walker) -free_walk = FREE(freer) -# -------------------------------------- - -# number of analyzed packet -packet_number = 0 -flow_count = 0 -max_num_udp_dissected_pkts = 16 -max_num_tcp_dissected_pkts = 10 -flows_root = c_void_p(None) -flows_root_ref = pointer(flows_root) -count_protocol = (c_int32 * (ndpi.ndpi_wrap_ndpi_max_supported_protocols() + ndpi.ndpi_wrap_ndpi_max_num_custom_protocols() + 1))() -lista = [] # used to avoid impropriate memory deallocation from python - - -# check ndpi version -if ndpi.ndpi_get_api_version() != ndpi.ndpi_wrap_get_api_version(): - print("nDPI Library version mismatch: please make sure this code and the nDPI library are in sync\n") - sys.exit(-1) - -# create data structure of ndpi -ndpi_info_mod = ndpi.ndpi_init_detection_module() -if ndpi_info_mod is None: - sys.exit(-1) -else: - ndpi_ndpi_finalize_initalization(ndpi_info_mod) - - -def ip2int(ip): - """ - Convert an IP string to long and then c_uint32 - """ - packedIP = socket.inet_aton(ip) - return int(struct.unpack("!I", packedIP)[0]) - - -def get_flow(pkt): - global flows_root - global flows_root_ref - global flow_count - - ip_packet = pkt[1] - ip_protocol = ip_packet.proto - transport_packet = None - - if ip_protocol == 6 or ip_protocol == 17: - transport_packet = pkt[2] - if transport_packet is not None: - # to avoid two nodes in one binary tree for a flow - ip_src = ip2int(ip_packet.src) - ip_dst = ip2int(ip_packet.dst) - src_port = transport_packet.sport - dst_port = transport_packet.dport - else: - return None - # set flows to correct type and data - ndpi_flow = pointer(NDPIFlowStruct()) - memset(ndpi_flow, 0, sizeof(NDPIFlowStruct)) - if ip_src > ip_dst: - flow = WorkFlow(ip_src, ip_dst, src_port, dst_port, int(ip_packet.proto), 0, NDPIProtocol(), 0, 0, - pointer(NDPIIdStruct()), pointer(NDPIIdStruct()), ndpi_flow) - else: - flow = WorkFlow(ip_dst, ip_src, dst_port, src_port, int(ip_packet.proto), 0, NDPIProtocol(), 0, 0, - pointer(NDPIIdStruct()), pointer(NDPIIdStruct()), ndpi_flow) - flow_ref = pointer(flow) - res = ndpi.ndpi_tfind(flow_ref, flows_root_ref, cmp_func) - if res is None: - ndpi.ndpi_tsearch(flow_ref, pointer(flows_root), cmp_func) # add - lista.append(flow) - flow_count += 1 - return pointer(flow) - flow = cast(res, POINTER(POINTER(WorkFlow))).contents - return flow +ppacket = namedtuple('ParsedPacket', ['src_ip', + 'src_port', + 'dst_ip', + 'dst_port', + 'protocol', + 'ip_version', + 'ip_bytes']) -def packetcaptured(pkt): - global packet_number - global ndpi_info_mod - - flow = None - h = PcapPktHdr() - - # getting flow +def inet_to_str(inet): + """ get string representation of IP address """ try: - flow = get_flow(pkt) - except AttributeError: - pass # ignore packet - if flow is None: return - - # filling pcap_pkthdr - h.len = h.caplen = len(pkt) - h.ts.tv_sec = int(pkt["IP"].time/1000000) - h.ts.tv_usec = int(pkt["IP"].time) - - # real work - if int(pkt[1].frag) == 0: # not fragmented packet - flow.contents.packets += 1 - packet_number += 1 - # get ndpi_iphdr address - iphdr_addr = cast(c_char_p(pkt[1].build()), c_void_p) - ndpi_flow = flow.contents.flow - - if flow.contents.detection_completed is 0: - flow.contents.detected_protocol = ndpi.ndpi_detection_process_packet(ndpi_info_mod, - ndpi_flow, - cast(iphdr_addr, POINTER(c_uint8)), - int(pkt[1].len), - h.ts.tv_usec, - flow.contents.src_id, - flow.contents.dst_id) - - flow1 = flow.contents.detected_protocol - - valid = False - - if flow.contents.protocol == 6: valid = flow.contents.packets > max_num_tcp_dissected_pkts - elif flow.contents.protocol == 17: valid = flow.contents.packets > max_num_udp_dissected_pkts - - # should we continue anylizing packet or not? - if valid or flow1.app_protocol is not 0: - if valid or flow1.master_protocol is not 91: # or # 91 is NDPI_PROTOCOL_TLS - flow.contents.detection_completed = 1 # protocol found - if flow1.app_protocol is 0: - flow.contents.detected_protocol = ndpi.ndpi_detection_giveup(ndpi_info_mod, - ndpi_flow, - 1, - cast(addressof(c_uint8(0)), - POINTER(c_uint8))) - - -def result(): - global flows_root_ref - global ndpi_info_mod - print('\nnumber of analyzed packet: ' + str(packet_number)) - print('number of flows: ' + str(flow_count)) - - ndpi.ndpi_twalk(flows_root_ref.contents, guess_walker, None) + return socket.inet_ntop(socket.AF_INET, inet) + except ValueError: + return socket.inet_ntop(socket.AF_INET6, inet) - print('\nDetected protocols:') - for i in range(0, ndpi.ndpi_get_num_supported_protocols(ndpi_info_mod)): - if count_protocol[i] > 0: - print("{}: {} packets".format( - cast(ndpi.ndpi_get_proto_name(ndpi_info_mod, i), c_char_p).value.decode('utf-8'), - str(count_protocol[i]))) - -def free(ndpi_struct): - ndpi.ndpi_tdestroy(flows_root, free_walk) - ndpi.ndpi_exit_detection_module(ndpi_struct) - - -def initialize(ndpi_struct): - all = NDPIProtocolBitMask() - ndpi.ndpi_wrap_NDPI_BITMASK_SET_ALL(pointer(all)) - ndpi.ndpi_set_protocol_detection_bitmask2(ndpi_struct, pointer(all)) - - -print('Using nDPI ' + cast(ndpi.ndpi_revision(), c_char_p).value.decode("utf-8")) - -initialize(ndpi_info_mod) - -if len(sys.argv) != 2: - print("\nUsage: ndpi_example.py <device>") - sys.exit(0) - -if "." in sys.argv[1]: - print('Reading pcap from file ' + sys.argv[1] + '...') - scapy_cap = None +def parse_packet(pkt): + """ parse packet and extract 5 tuple and IP bytes """ try: - scapy_cap = rdpcap(sys.argv[1]) - except FileNotFoundError: - print("\nFile not found") - except Scapy_Exception: - print("\nBad pcap") + l2 = dpkt.ethernet.Ethernet(pkt) + if isinstance(l2.data, dpkt.ip.IP): + ip_version = 4 + elif isinstance(l2.data, dpkt.ip6.IP6): + ip_version = 6 + else: + return + except dpkt.dpkt.NeedData: + return + + l3 = l2.data + stop_decoding = False + while not stop_decoding: + if isinstance(l3.data, dpkt.tcp.TCP): + l4 = l3.data + proto = "TCP" + stop_decoding = True + elif isinstance(l3.data, dpkt.udp.UDP): + l4 = l3.data + proto = "UDP" + stop_decoding = True + elif isinstance(l3.data, dpkt.ip6.IP6): + l3 = l3.data + else: + return + + return ppacket(src_ip=inet_to_str(l3.src), src_port=l4.sport, + dst_ip=inet_to_str(l3.dst), dst_port=l4.dport, + protocol=proto, ip_version=ip_version, + ip_bytes=bytes(l3)) + + +def ppkt_to_flow_key(ppkt): + """ create a consistent direction agnostic flow keyfrom a parsed packet """ + if ppkt.src_ip < ppkt.dst_ip: + k = FLOW_KEY.format(ppkt.protocol, ppkt.src_ip, ppkt.src_port, ppkt.dst_ip, ppkt.dst_port) else: - for packet in scapy_cap: - packetcaptured(packet) -else: - print('Capturing live traffic from device ' + sys.argv[1] + '...') - try: - sniff(iface=sys.argv[1], prn=packetcaptured) - except KeyboardInterrupt: - print('\nInterrupted\n') - except PermissionError: - sys.exit('\nRoot privilege required for live capture on interface: {}\n'.format(sys.argv[1])) - - -result() -free(ndpi_info_mod) + if ppkt.src_ip == ppkt.dst_ip: + if ppkt.src_port <= ppkt.dst_port: + k = FLOW_KEY.format(ppkt.protocol, ppkt.src_ip, ppkt.src_port, ppkt.dst_ip, ppkt.dst_port) + else: + k = FLOW_KEY.format(ppkt.protocol, ppkt.dst_ip, ppkt.dst_port, ppkt.src_ip, ppkt.src_port) + else: + k = FLOW_KEY.format(ppkt.protocol, ppkt.dst_ip, ppkt.dst_port, ppkt.src_ip, ppkt.src_port) + return k + + +def parse_arguments(): + parser = argparse.ArgumentParser() + parser.add_argument("input", help="input pcap file path") + parser.add_argument('-u', '--include-unknowns', action='store_true') + return parser.parse_args() + + +if __name__ == "__main__": + nDPI = NDPI() # As simple as that. :) + flow_cache = {} # We store the flows in a dictionary. + flow_count = 0 # Flow counter + print("Using nDPI {}".format(nDPI.revision)) + args = parse_arguments() + + with open(args.input, 'rb') as pcap_file: + capture = dpkt.pcap.Reader(pcap_file) # We use dpkt pcap capture handler + for time, packet in capture: + time_ms = int(time * 1000) # Convert packet timestamp to milliseconds + ppkt = parse_packet(packet) + if ppkt is not None: # If we succeed to parse the packet + key = ppkt_to_flow_key(ppkt) + try: # Try a Flow update + flow = flow_cache[key] + flow.detected_protocol = nDPI.process_packet(flow.ndpi_flow, ppkt.ip_bytes, time_ms) + flow.pkts += 1 + flow.bytes += len(packet) + except KeyError: # New Flow + flow = Flow() + flow.index = flow_count + flow_count += 1 + flow.ndpi_flow = NDPIFlow() # We create an nDPIFlow object per Flow + flow.detected_protocol = nDPI.process_packet(flow.ndpi_flow, ppkt.ip_bytes, time_ms) + flow.pkts += 1 + flow.bytes += len(packet) + flow_cache[key] = flow + + print(" Detected flows:") + unknown_flows = [] + for key, flow in flow_cache.items(): # Iterate over all flows in flow cache + if flow.detected_protocol.app_protocol == PROTOCOL_UNKNWON: # Didn't succeed to identigy it using DPI + flow.detected_protocol = nDPI.giveup(flow.ndpi_flow) # We try to guess it (port matching, LRU, etc.) + FLOW_EXPORT = FLOW_STR.format(flow.index, + key, + nDPI.protocol_name(flow.detected_protocol), + nDPI.protocol_category_name(flow.detected_protocol), + flow.ndpi_flow.confidence.name, + flow.pkts, + flow.bytes) + if flow.detected_protocol.app_protocol != PROTOCOL_UNKNWON: + print(FLOW_EXPORT) # We start by printing detected flows + else: + # Format it for later + unknown_flows.append(FLOW_EXPORT) + if args.include_unknowns: + print(" Unknown flows:") + for unknown_flow in unknown_flows: # Dump unknown flows + print(unknown_flow) |