eris

- Elasticsearch Recon Ingestion Scripts (ERIS) 🔎
git clone git://git.acid.vegas/-c.git
Log | Files | Refs | Archive | README | LICENSE

commit 78d7556f09284219f77fe28ffa7e401d61a5a09b
parent 603d005a475e4e26f0d51b7d68d5ed8858450920
Author: acidvegas <acid.vegas@acid.vegas>
Date: Sat, 23 Mar 2024 13:46:38 -0400

Records stored as a nested type for better querying, added source field so we can identify where the zone data derived (icann, axfr, breach, etc)

Diffstat:
Mingestors/ingest_zone.py | 15+++++++++------

1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/ingestors/ingest_zone.py b/ingestors/ingest_zone.py
@@ -12,7 +12,7 @@ except ImportError:
 
 
 # Set a default elasticsearch index if one is not provided
-default_index = 'dns-zones'
+default_index = 'eris-zones'
 
 # Known DNS record types found in zone files
 record_types  = ('a','aaaa','caa','cdnskey','cds','cname','dnskey','ds','mx','naptr','ns','nsec','nsec3','nsec3param','ptr','rrsig','rp','sshfp','soa','srv','txt','type65534')
@@ -29,7 +29,8 @@ def construct_map() -> dict:
 		'mappings': {
 			'properties': {
 				'domain'  : keyword_mapping,
-				'records' : { 'properties': {} },
+				'records' : { 'type': 'nested', 'properties': {} },
+				'source'  : { 'type': 'keyword' },
 				'seen'    : { 'type': 'date' }
 			}
 		}
@@ -39,9 +40,10 @@ def construct_map() -> dict:
 	for record_type in record_types:
 		if record_type in ('a','aaaa'):
 			mapping['mappings']['properties']['records']['properties'][record_type] = {
-				'properties': {
-					'data': { 'type': 'ip' if record_type in ('a','aaaa') else keyword_mapping},
-					'ttl':  { 'type': 'integer' }
+				'type'       : 'nested',
+				'properties' : {
+					'data' : { 'type': 'ip' if record_type in ('a','aaaa') else keyword_mapping },
+					'ttl'  : { 'type': 'integer' }
 				}
 			}
 
@@ -127,7 +129,8 @@ async def process_data(file_path: str):
 				'_index'   : default_index,
 				'_doc'     : {
 					'domain'  : domain,
-					'records' : {record_type: [{'ttl': ttl, 'data': data}]},
+					'records' : {record_type: [{'data': data, 'ttl': ttl}]},
+					'source'  : 'czds',
 					'seen'    : time.strftime('%Y-%m-%dT%H:%M:%SZ', time.gmtime()) # Zone files do not contain a timestamp, so we use the current time
 				},
 				'doc_as_upsert' : True # This will create the document if it does not exist