eris

- Elasticsearch Recon Ingestion Scripts (ERIS) 🔎
git clone git://git.acid.vegas/-c.git
Log | Files | Refs | Archive | README | LICENSE

ingest_zone.py (4991B)

      1 #!/usr/bin/env python
      2 # Elasticsearch Recon Ingestion Scripts (ERIS) - Developed by Acidvegas (https://git.acid.vegas/eris)
      3 # ingest_zone.py
      4 
      5 import time
      6 
      7 try:
      8     import aiofiles
      9 except ImportError:
     10     raise ImportError('Missing required \'aiofiles\' library. (pip install aiofiles)')
     11 
     12 default_index = 'dns-zones'
     13 record_types = ('a','aaaa','caa','cdnskey','cds','cname','dnskey','ds','mx','naptr','ns','nsec','nsec3','nsec3param','ptr','rrsig','rp','sshfp','soa','srv','txt','type65534')
     14 
     15 def construct_map() -> dict:
     16     '''Construct the Elasticsearch index mapping for zone file records.'''
     17 
     18     keyword_mapping = { 'type': 'text',  'fields': { 'keyword': { 'type': 'keyword', 'ignore_above': 256 } } }
     19 
     20     mapping = {
     21         'mappings': {
     22             'properties': {
     23                 'domain':  keyword_mapping,
     24                 'records': { 'properties': {} },
     25                 'seen':    {'type': 'date'}
     26             }
     27         }
     28     }
     29 
     30     # Add record types to mapping dynamically to not clutter the code
     31     for item in record_types:
     32         if item in ('a','aaaa'):
     33             mapping['mappings']['properties']['records']['properties'][item] = {
     34                 'properties': {
     35                     'data': { 'type': 'ip' },
     36                     'ttl':  { 'type': 'integer' }
     37                 }
     38             }
     39         else:
     40             mapping['mappings']['properties']['records']['properties'][item] = {
     41                 'properties': {
     42                 'data': keyword_mapping,
     43                 'ttl':  { 'type': 'integer' }
     44                 }
     45             }
     46 
     47     return mapping
     48 
     49 
     50 async def process_data(file_path: str):
     51     '''
     52     Read and process zone file records.
     53 
     54     :param file_path: Path to the zone file
     55     '''
     56 
     57     domain_records = {}
     58     last_domain = None
     59 
     60     async with aiofiles.open(file_path, mode='r') as input_file:
     61         async for line in input_file:
     62             line = line.strip()
     63 
     64             if not line or line.startswith(';'):
     65                 continue
     66 
     67             parts = line.split()
     68 
     69             if len(parts) < 5:
     70                 raise ValueError(f'Invalid line: {line}')
     71 
     72             domain, ttl, record_class, record_type, data = parts[0].rstrip('.').lower(), parts[1], parts[2].lower(), parts[3].lower(), ' '.join(parts[4:])
     73 
     74             if not ttl.isdigit():
     75                 raise ValueError(f'Invalid TTL: {ttl} with line: {line}')
     76             
     77             ttl = int(ttl)
     78 
     79             if record_class != 'in':
     80                 raise ValueError(f'Unsupported record class: {record_class} with line: {line}') # Anomaly (Doubtful any CHAOS/HESIOD records will be found)
     81 
     82             # We do not want to collide with our current mapping (Again, this is an anomaly)
     83             if record_type not in record_types:
     84                 raise ValueError(f'Unsupported record type: {record_type} with line: {line}')
     85 
     86             # Little tidying up for specific record types
     87             if record_type == 'nsec':
     88                 data = ' '.join([data.split()[0].rstrip('.'), *data.split()[1:]])
     89             elif record_type == 'soa':
     90                     data = ' '.join([part.rstrip('.') if '.' in part else part for part in data.split()])
     91             elif data.endswith('.'):
     92                 data = data.rstrip('.')
     93 
     94             if domain != last_domain:
     95                 if last_domain:
     96                     struct = {'domain': last_domain, 'records': domain_records[last_domain], 'seen': time.strftime('%Y-%m-%dT%H:%M:%SZ', time.gmtime())}
     97                     
     98                     del domain_records[last_domain]
     99 
    100                     yield {'_index': default_index, '_source': struct}
    101 
    102                 last_domain = domain
    103 
    104                 domain_records[domain] = {}
    105 
    106             if record_type not in domain_records[domain]:
    107                 domain_records[domain][record_type] = []
    108 
    109             domain_records[domain][record_type].append({'ttl': ttl, 'data': data})
    110 
    111     return None # EOF
    112 
    113 
    114 
    115 '''
    116 Example record:
    117 0so9l9nrl425q3tf7dkv1nmv2r3is6vm.vegas. 3600    in  nsec3   1 1 100 332539EE7F95C32A 10MHUKG4FHIAVEFDOTF6NKU5KFCB2J3A NS DS RRSIG
    118 0so9l9nrl425q3tf7dkv1nmv2r3is6vm.vegas. 3600    in  rrsig   NSEC3 8 2 3600 20240122151947 20240101141947 4125 vegas. hzIvQrZIxBSwRWyiHkb5M2W0R3ikNehv884nilkvTt9DaJSDzDUrCtqwQb3jh6+BesByBqfMQK+L2n9c//ZSmD5/iPqxmTPCuYIB9uBV2qSNSNXxCY7uUt5w7hKUS68SLwOSjaQ8GRME9WQJhY6gck0f8TT24enjXXRnQC8QitY=
    119 1-800-flowers.vegas.    3600    in  ns  dns1.cscdns.net.
    120 1-800-flowers.vegas.    3600    in  ns  dns2.cscdns.net.
    121 100.vegas.  3600    in  ns  ns51.domaincontrol.com.
    122 100.vegas.  3600    in  ns  ns52.domaincontrol.com.
    123 1001.vegas. 3600    in  ns  ns11.waterrockdigital.com.
    124 1001.vegas. 3600    in  ns  ns12.waterrockdigital.com.
    125 
    126 Will be indexed as:
    127 {
    128     "domain": "1001.vegas",
    129     "records": {
    130         "ns": [
    131             {"ttl": 3600, "data": "ns11.waterrockdigital.com"},
    132             {"ttl": 3600, "data": "ns12.waterrockdigital.com"}
    133         ]
    134     },
    135     "seen": "2021-09-01T00:00:00Z" # Zulu time added upon indexing
    136 }
    137 '''