diff --git a/ingestors/ingest_httpx.py b/ingestors/ingest_httpx.py
@@ -64,7 +64,7 @@ async def process_data(file_path: str):
:param file_path: Path to the HTTPX log file
'''
- async with aiofiles.open(file_path, mode='r') as input_file:
+ async with aiofiles.open(file_path) as input_file:
async for line in input_file:
line = line.strip()
diff --git a/ingestors/ingest_masscan.py b/ingestors/ingest_masscan.py
@@ -59,7 +59,7 @@ async def process_data(file_path: str):
:param file_path: Path to the Masscan log file
'''
- async with aiofiles.open(file_path, mode='r') as input_file:
+ async with aiofiles.open(file_path) as input_file:
async for line in input_file:
line = line.strip()
diff --git a/ingestors/ingest_massdns.py b/ingestors/ingest_massdns.py
@@ -39,7 +39,7 @@ async def process_data(file_path: str):
:param file_path: Path to the Massdns log file
'''
- async with aiofiles.open(file_path, mode='r') as input_file:
+ async with aiofiles.open(file_path) as input_file:
last = None
@@ -152,7 +152,7 @@ Output:
Input:
{
"_id" : "47.229.6.0"
- "_index" : "ptr-records",
+ "_index" : "eris-massdns",
"_source" : {
"ip" : "47.229.6.0",
"record" : "047-229-006-000.res.spectrum.com", # This will be a list if there are more than one PTR record
@@ -161,6 +161,6 @@ Input:
}
Notes:
-- Why do some IP addresses return a CNAME from a PTR request
-- What is dns-servfail.net (Frequent CNAME response from PTR requests)
+ Why do some IP addresses return a CNAME from a PTR request
+ What is dns-servfail.net (Frequent CNAME response from PTR requests)
'''
\ No newline at end of file
diff --git a/ingestors/ingest_zone.py b/ingestors/ingest_zone.py
@@ -2,6 +2,7 @@
# Elasticsearch Recon Ingestion Scripts (ERIS) - Developed by Acidvegas (https://git.acid.vegas/eris)
# ingest_zone.py
+import logging
import time
try:
@@ -56,15 +57,15 @@ async def process_data(file_path: str):
:param file_path: Path to the zone file
'''
- domain_records = {}
- last_domain = None
+ async with aiofiles.open(file_path) as input_file:
+
+ last = None
- async with aiofiles.open(file_path, mode='r') as input_file:
async for line in input_file:
line = line.strip()
if line == '~eof': # Sentinel value to indicate the end of a process (Used with --watch with FIFO)
- break
+ return last
if not line or line.startswith(';'):
continue
@@ -72,22 +73,25 @@ async def process_data(file_path: str):
parts = line.split()
if len(parts) < 5:
- raise ValueError(f'Invalid line: {line}')
+ logging.warning(f'Invalid line: {line}')
domain, ttl, record_class, record_type, data = parts[0].rstrip('.').lower(), parts[1], parts[2].lower(), parts[3].lower(), ' '.join(parts[4:])
if not ttl.isdigit():
- raise ValueError(f'Invalid TTL: {ttl} with line: {line}')
+ logging.warning(f'Invalid TTL: {ttl} with line: {line}')
+ continue
ttl = int(ttl)
# Anomaly...Doubtful any CHAOS/HESIOD records will be found in zone files
if record_class != 'in':
- raise ValueError(f'Unsupported record class: {record_class} with line: {line}')
+ logging.warning(f'Unsupported record class: {record_class} with line: {line}')
+ continue
# We do not want to collide with our current mapping (Again, this is an anomaly)
if record_type not in record_types:
- raise ValueError(f'Unsupported record type: {record_type} with line: {line}')
+ logging.warning(f'Unsupported record type: {record_type} with line: {line}')
+ continue
# Little tidying up for specific record types (removing trailing dots, etc)
if record_type == 'nsec':
@@ -97,26 +101,27 @@ async def process_data(file_path: str):
elif data.endswith('.'):
data = data.rstrip('.')
- if domain != last_domain:
- if last_domain:
- struct = {
- 'domain' : last_domain,
- 'records' : domain_records[last_domain],
- 'seen' : time.strftime('%Y-%m-%dT%H:%M:%SZ', time.gmtime()) # Zone files do not contain a timestamp, so we use the current time
- }
-
- del domain_records[last_domain]
-
- yield {'_id': domain, '_index': default_index, '_source': struct} # Set the ID to the domain name to allow the record to be reindexed if it exists.
-
- last_domain = domain
-
- domain_records[domain] = {}
-
- if record_type not in domain_records[domain]:
- domain_records[domain][record_type] = []
-
- domain_records[domain][record_type].append({'ttl': ttl, 'data': data})
+ if last:
+ if domain == last['domain']:
+ if record_type in last['_doc']['records']:
+ last['_doc']['records'][record_type].append({'ttl': ttl, 'data': data}) # Do we need to check for duplicate records?
+ else:
+ last['_doc']['records'][record_type] = [{'ttl': ttl, 'data': data}]
+ continue
+ else:
+ yield last
+
+ last = {
+ '_op_type' : 'update',
+ '_id' : domain,
+ '_index' : default_index,
+ '_doc' : {
+ 'domain' : domain,
+ 'records' : {record_type: [{'ttl': ttl, 'data': data}]},
+ 'seen' : time.strftime('%Y-%m-%dT%H:%M:%SZ', time.gmtime()) # Zone files do not contain a timestamp, so we use the current time
+ },
+ 'doc_as_upsert' : True # This will create the document if it does not exist
+ }
async def test(input_path: str):
@@ -144,22 +149,22 @@ if __name__ == '__main__':
'''
Output:
- 1001.vegas. 3600 in ns ns11.waterrockdigital.com.
- 1001.vegas. 3600 in ns ns12.waterrockdigital.com.
+ 1001.vegas. 3600 in ns ns11.waterrockdigital.com.
+ 1001.vegas. 3600 in ns ns12.waterrockdigital.com.
Input:
{
- "_id" : "1001.vegas"
- "_index" : "dns-zones",
- "_source" : {
- "domain" : "1001.vegas",
- "records" : {
- "ns": [
- {"ttl": 3600, "data": "ns11.waterrockdigital.com"},
- {"ttl": 3600, "data": "ns12.waterrockdigital.com"}
+ '_id' : '1001.vegas'
+ '_index' : 'dns-zones',
+ '_source' : {
+ 'domain' : '1001.vegas',
+ 'records' : {
+ 'ns': [
+ {'ttl': 3600, 'data': 'ns11.waterrockdigital.com'},
+ {'ttl': 3600, 'data': 'ns12.waterrockdigital.com'}
]
},
- "seen" : "2021-09-01T00:00:00Z"
+ 'seen' : '2021-09-01T00:00:00Z'
}
}
| | | |