eris

- Elasticsearch Recon Ingestion Scripts (ERIS) 🔎
git clone git://git.acid.vegas/eris.git
Log | Files | Refs | Archive | README | LICENSE

commit ff86784a5370724b19a090e86e0053db3e51f096
parent 16bc2aaa7f37012917509b0b480592f42ed6b535
Author: acidvegas <acid.vegas@acid.vegas>
Date: Wed, 27 Nov 2024 14:52:26 -0500

Improved processing using the hidden -oD output for ndjson

Diffstat:
Mingestors/ingest_masscan.py | 55+++++++++++++++++++++----------------------------------

1 file changed, 21 insertions(+), 34 deletions(-)

diff --git a/ingestors/ingest_masscan.py b/ingestors/ingest_masscan.py
@@ -71,10 +71,6 @@ async def process_data(input_path: str):
 			if not line or not line.startswith('{'):
 				continue
 
-			# Do we need this? Masscan JSON output seems with seperate records with a comma between lines for some reason...
-			if line.endswith(','):
-				line = line[:-1]
-
 			# Parse the JSON record
 			try:
 				record = json.loads(line)
@@ -86,36 +82,27 @@ async def process_data(input_path: str):
 				input('Press Enter to continue...') # Pause for review & debugging (remove this in production)
 				continue
 
-			# In rare cases, a single record may contain multiple ports, though I have yet to witness this...
-			if len(record['ports']) > 1:
-				logging.warning(f'Multiple ports found for record! ({record})')
-				input('Press Enter to continue...') # Pause for review (remove this in production)
-
-			# Process each port in the record
-			for port_info in record['ports']:
-				struct = {
-					'ip'    : record['ip'],
-					'port'  : port_info['port'],
-					'proto' : port_info['proto'],
-					'seen'  : time.strftime('%Y-%m-%dT%H:%M:%SZ', time.gmtime(int(record['timestamp'])))
-				}
-
-				# Add the service information if available (this field is optional)
-				if 'service' in port_info:
-
-        			# Add the service name if available
-					if 'name' in port_info['service']:
-						if (service_name := port_info['service']['name']) not in ('unknown',''):
-							struct['service'] = service_name
+			# Process the record
+			struct = {
+				'ip'    : record['ip'],
+				'port'  : record['port'],
+				'proto' : record['proto'],
+				'seen'  : time.strftime('%Y-%m-%dT%H:%M:%SZ', time.gmtime(int(record['timestamp'])))
+			}
 
-					# Add the service banner if available
-					if 'banner' in port_info['service']:
-						banner = ' '.join(port_info['service']['banner'].split()) # Remove extra whitespace
-						if banner:
-							struct['banner'] = banner
+			# Add the service information if available (this field is optional)
+			if record['rec_type'] == 'banner':
+				data = record['data']
+				if 'service_name' in data:
+					if (service_name := data['service_name']) not in ('unknown', ''):
+						struct['service'] = service_name
+				if 'banner' in data:
+					banner = ' '.join(data['banner'].split()) # Remove extra whitespace
+					if banner:
+						struct['banner'] = banner
 
-				# Yield the record
-				yield {'_index': default_index, '_source': struct}
+			# Yield the record
+			yield {'_index': default_index, '_source': struct}
 
 
 async def test(input_path: str):
@@ -149,8 +136,8 @@ Deploy:
 	/sbin/iptables -A INPUT -p tcp --dport 61010 -j DROP # Not persistent
 	printf "0.0.0.0/8\n10.0.0.0/8\n100.64.0.0/10\n127.0.0.0/8\n169.254.0.0/16\n172.16.0.0/12\n192.0.0.0/24\n192.0.2.0/24\n192.31.196.0/24\n192.52.193.0/24\n192.88.99.0/24\n192.168.0.0/16\n192.175.48.0/24\n198.18.0.0/15\n198.51.100.0/24\n203.0.113.0/24\n224.0.0.0/3\n255.255.255.255/32"  > exclude.conf
 	screen -S scan
-	masscan 0.0.0.0/0 -p18000 --banners --http-user-agent "USER_AGENT" --source-port 61010 --open-only --rate 30000 --excludefile exclude.conf -oJ 18000.json
-	masscan 0.0.0.0/0 -p21,22,23 --banners --http-user-agent "USER_AGENT" --source-port 61000-65503 --open-only --rate 30000 --excludefile exclude.conf -oJ output_new.json --shard $i/$TOTAL
+	masscan 0.0.0.0/0 -p18000 --banners --http-user-agent "USER_AGENT" --source-port 61010 --open-only --rate 30000 --excludefile exclude.conf -oD 18000.json
+	masscan 0.0.0.0/0 -p21,22,23 --banners --http-user-agent "USER_AGENT" --source-port 61000-65503 --open-only --rate 30000 --excludefile exclude.conf -oD output_new.json --shard $i/$TOTAL
 
 Output:
 	{