eris

- Elasticsearch Recon Ingestion Scripts (ERIS) 🔎
git clone git://git.acid.vegas/-c.git
Log | Files | Refs | Archive | README | LICENSE

commit 84f124b23d29a7b183dc14ddffe948ae4a8d3b95
parent 5a45be5c603fd8605d38f5f36391512b50123886
Author: acidvegas <acid.vegas@acid.vegas>
Date: Wed, 6 Mar 2024 13:26:45 -0500

Masscan ingestion script updated to use ip:port as the document id to allow updating records that already exist. Added a sentinal value to trigger an EOF when using --watch with FIFO's

Diffstat:
Mingestors/ingest_masscan.py | 117+++++++++++++++++++++++++++++++++++++++++---------------------------------------

1 file changed, 60 insertions(+), 57 deletions(-)

diff --git a/ingestors/ingest_masscan.py b/ingestors/ingest_masscan.py
@@ -2,21 +2,8 @@
 # Elasticsearch Recon Ingestion Scripts (ERIS) - Developed by Acidvegas (https://git.acid.vegas/eris)
 # ingest_masscan.py
 
-'''
-apt-get install iptables masscan libpcap-dev screen
-setcap 'CAP_NET_RAW+eip CAP_NET_ADMIN+eip' /bin/masscan
-/sbin/iptables -A INPUT -p tcp --dport 61010 -j DROP
-printf "0.0.0.0/8\n10.0.0.0/8\n100.64.0.0/10\n127.0.0.0/8\n169.254.0.0/16\n172.16.0.0/12\n192.0.0.0/24\n192.0.2.0/24\n192.31.196.0/24\n192.52.193.0/24\n192.88.99.0/24\n192.168.0.0/16\n192.175.48.0/24\n198.18.0.0/15\n198.51.100.0/24\n203.0.113.0/24\n224.0.0.0/3\n255.255.255.255/32"  > exclude.conf
-screen -S scan
-masscan 0.0.0.0/0 -p21,22,23 --banners --http-user-agent "USER_AGENT" --source-port 61010 --open-only --rate 30000 --excludefile exclude.conf -oJ output.json
-masscan 0.0.0.0/0 -p21,22,23 --banners --http-user-agent "USER_AGENT" --source-port 61000-65503 --open-only --rate 30000 --excludefile exclude.conf -oJ output_new.json --shard $i/$TOTAL
-
-Note: The above iptables rule is not persistent and will be removed on reboot.
-'''
-
 import json
 import logging
-import re
 import time
 
 try:
@@ -31,27 +18,27 @@ def construct_map() -> dict:
 
     keyword_mapping = { 'type': 'text',  'fields': { 'keyword': { 'type': 'keyword', 'ignore_above': 256 } } }
 
+    geoip_mapping = {
+        'city_name'        : keyword_mapping,
+        'continent_name'   : keyword_mapping,
+        'country_iso_code' : keyword_mapping,
+        'country_name'     : keyword_mapping,
+        'location'         : { 'type': 'geo_point' },
+        'region_iso_code'  : keyword_mapping,
+        'region_name'      : keyword_mapping,
+    }
+
     mapping = {
         'mappings': {
             'properties': {
-                'ip':      { 'type': 'ip' },
-                'port':    { 'type': 'integer' },
-                'proto':   { 'type': 'keyword' },
-                'service': { 'type': 'keyword' },
-                'banner':  keyword_mapping,
-                'ref_id':  { 'type': 'keyword' },
-                'seen':    { 'type': 'date' }
-                #'geoip':   {
-                #    'properties': {
-                #        'city_name':        keyword_mapping,
-                #        'continent_name':   keyword_mapping,
-                #        'country_iso_code': keyword_mapping,
-                #        'country_name':     keyword_mapping,
-                #        'location':         { 'type': 'geo_point' },
-                #        'region_iso_code':  keyword_mapping,
-                #        'region_name':      keyword_mapping,
-                #    }
-                #}
+                'ip'      : { 'type': 'ip' },
+                'port'    : { 'type': 'integer' },
+                'proto'   : { 'type': 'keyword' },
+                'service' : { 'type': 'keyword' },
+                'banner'  : keyword_mapping,
+                #'geoip'  : { 'properties': geoip_mapping } # Used witht he geoip pipeline to enrich the data
+                'seen'    : { 'type': 'date' }
+                
             }
         }
     }
@@ -70,10 +57,13 @@ async def process_data(file_path: str):
         async for line in input_file:
             line = line.strip()
 
+            if line == '~eof': # Sentinel value to indicate the end of a process (Used with --watch with FIFO)
+                break
+
             if not line or not line.startswith('{'):
                 continue
 
-            if line.endswith(','):
+            if line.endswith(','): # Do we need this? Masscan JSON output seems with seperate records with a comma between lines for some reason...
                 line = line[:-1]
 
             try:
@@ -83,12 +73,13 @@ async def process_data(file_path: str):
                 #   {   "ip": "51.161.12.223",   "timestamp": "1707628302", "ports": [ {"port": 22, "proto": "tcp", "service": {"name": "ssh", "banner":
                 #   {   "ip": "83.66.211.246",   "timestamp": "1706557002"
                 logging.error(f'Failed to parse JSON record! ({line})')
-                input('Press Enter to continue...') # Pause for review & debugging (Will remove pausing in production, still investigating the cause of this issue.)
+                input('Press Enter to continue...') # Pause for review & debugging (remove this in production)
                 continue
 
             if len(record['ports']) > 1:
+                # In rare cases, a single record may contain multiple ports, though I have yet to witness this...
                 logging.warning(f'Multiple ports found for record! ({record})')
-                input('Press Enter to continue...') # Pause for review (Will remove pausing in production, still investigating if you ever seen more than one port in a record.)
+                input('Press Enter to continue...') # Pause for review (remove this in production)
 
             for port_info in record['ports']:
                 struct = {
@@ -106,30 +97,26 @@ async def process_data(file_path: str):
                     if 'banner' in port_info['service']:
                         banner = ' '.join(port_info['service']['banner'].split()) # Remove extra whitespace
                         if banner:
-                            match = re.search(r'\(Ref\.Id: (.*?)\)', banner)
-                            if match:
-                                struct['ref_id'] = match.group(1)
-                            else:
-                                struct['banner'] = banner
+                            struct['banner'] = banner
 
-                yield {'_index': default_index, '_source': struct}
- 
-    return None # EOF
+                id = f'{record["ip"]}:{port_info["port"]}' # Store with ip:port as the unique id to allow the record to be reindexed if it exists.
+
+                yield {'_id': id, '_index': default_index, '_source': struct}
 
 
 
 '''
 Example record:
 {
-    "ip": "43.134.51.142",
-    "timestamp": "1705255468", # Convert to ZULU BABY
-    "ports": [ # We will create a record for each port opened
+    "ip"        : "43.134.51.142",
+    "timestamp" : "1705255468", # Convert to ZULU BABY
+    "ports"     : [ # We will create a record for each port opened
         {
-            "port": 22,
-            "proto": "tcp",
-            "service": { # This field is optional
-                "name": "ssh",
-                "banner": "SSH-2.0-OpenSSH_8.9p1 Ubuntu-3ubuntu0.4"
+            "port"    : 22,
+            "proto"   : "tcp",
+            "service" : { # This field is optional
+                "name"   : "ssh",
+                "banner" : "SSH-2.0-OpenSSH_8.9p1 Ubuntu-3ubuntu0.4"
             }
         }
     ]
@@ -137,12 +124,28 @@ Example record:
 
 Will be indexed as:
 {
-    "ip": "43.134.51.142",
-    "port": 22,
-    "proto": "tcp",
-    "service": "ssh",
-    "banner": "SSH-2.0-OpenSSH_8.9p1 Ubuntu-3ubuntu0.4",
-    "seen": "2021-10-08T02:04:28Z",
-    "ref_id": "?sKfOvsC4M4a2W8PaC4zF?" # TCP RST Payload, Might be useful..
+    "_id"     : "43.134.51.142:22"
+    "_index"  : "masscan-logs",
+    "_source" : {
+        "ip"      : "43.134.51.142",
+        "port"    : 22,
+        "proto"   : "tcp",
+        "service" : "ssh",
+        "banner"  : "SSH-2.0-OpenSSH_8.9p1 Ubuntu-3ubuntu0.4",
+        "seen"    : "2021-10-08T02:04:28Z"
 }
+'''
+
+
+
+'''
+Notes:
+
+apt-get install iptables masscan libpcap-dev screen
+setcap 'CAP_NET_RAW+eip CAP_NET_ADMIN+eip' /bin/masscan
+/sbin/iptables -A INPUT -p tcp --dport 61010 -j DROP # Not persistent
+printf "0.0.0.0/8\n10.0.0.0/8\n100.64.0.0/10\n127.0.0.0/8\n169.254.0.0/16\n172.16.0.0/12\n192.0.0.0/24\n192.0.2.0/24\n192.31.196.0/24\n192.52.193.0/24\n192.88.99.0/24\n192.168.0.0/16\n192.175.48.0/24\n198.18.0.0/15\n198.51.100.0/24\n203.0.113.0/24\n224.0.0.0/3\n255.255.255.255/32"  > exclude.conf
+screen -S scan
+masscan 0.0.0.0/0 -p21,22,23 --banners --http-user-agent "USER_AGENT" --source-port 61010 --open-only --rate 30000 --excludefile exclude.conf -oJ output.json
+masscan 0.0.0.0/0 -p21,22,23 --banners --http-user-agent "USER_AGENT" --source-port 61000-65503 --open-only --rate 30000 --excludefile exclude.conf -oJ output_new.json --shard $i/$TOTAL
 '''
 \ No newline at end of file