eris

- Elasticsearch Recon Ingestion Scripts (ERIS) 🔎
git clone git://git.acid.vegas/-c.git
Log | Files | Refs | Archive | README | LICENSE

ingest_certs.py (8800B)

      1 #!/usr/bin/env python
      2 # Elasticsearch Recon Ingestion Scripts (ERIS) - Developed by Acidvegas (https://git.acid.vegas/eris)
      3 # ingest_certs.py
      4 
      5 import asyncio
      6 import json
      7 import logging
      8 
      9 try:
     10     import websockets
     11 except ImportError:
     12     raise ImportError('Missing required \'websockets\' library. (pip install websockets)')
     13 
     14 default_index = 'cert-stream'
     15 
     16 def construct_map() -> dict:
     17     '''Construct the Elasticsearch index mapping for Certstream records.'''
     18 
     19     keyword_mapping = { 'type': 'text',  'fields': { 'keyword': { 'type': 'keyword', 'ignore_above': 256 } } }
     20 
     21     mapping = {
     22         'mappings': {
     23             'properties': {
     24                 'data': {
     25                     'properties': {
     26                         'cert_index': { 'type': 'integer' },
     27                         'cert_link': { 'type': 'keyword' },
     28                         'leaf_cert': {
     29                             'properties': {
     30                                 'all_domains': { 'type': 'keyword' },
     31                                 'extensions': {
     32                                     'properties': {
     33                                         'authorityInfoAccess'    : { 'type': 'text'    },
     34                                         'authorityKeyIdentifier' : { 'type': 'text'    },
     35                                         'basicConstraints'       : { 'type': 'text'    },
     36                                         'certificatePolicies'    : { 'type': 'text'    },
     37                                         'crlDistributionPoints'  : { 'type': 'text'    },
     38                                         'ctlPoisonByte'          : { 'type': 'boolean' },
     39                                         'extendedKeyUsage'       : { 'type': 'text'    },
     40                                         'keyUsage'               : { 'type': 'text'    },
     41                                         'subjectAltName'         : { 'type': 'text'    },
     42                                         'subjectKeyIdentifier'   : { 'type': 'text'    }
     43                                     }
     44                                 },
     45                                 'fingerprint': { 'type': 'keyword' },
     46                                 'issuer': {
     47                                     'properties': {
     48                                         'C'            : { 'type': 'keyword' },
     49                                         'CN'           : { 'type': 'text'    },
     50                                         'L'            : { 'type': 'text'    },
     51                                         'O'            : { 'type': 'text'    },
     52                                         'OU'           : { 'type': 'text'    },
     53                                         'ST'           : { 'type': 'text'    },
     54                                         'aggregated'   : { 'type': 'text'    },
     55                                         'emailAddress' : { 'type': 'text'    }
     56                                     }
     57                                 },
     58                                 'not_after': { 'type': 'integer' },
     59                                 'not_before': { 'type': 'integer' },
     60                                 'serial_number': { 'type': 'keyword' },
     61                                 'signature_algorithm': { 'type': 'text' },
     62                                 'subject': {
     63                                     'properties': {
     64                                         'C'            : { 'type': 'keyword' },
     65                                         'CN'           : { 'type': 'text'    },
     66                                         'L'            : { 'type': 'text'    },
     67                                         'O'            : { 'type': 'text'    },
     68                                         'OU'           : { 'type': 'text'    },
     69                                         'ST'           : { 'type': 'text'    },
     70                                         'aggregated'   : { 'type': 'text'    },
     71                                         'emailAddress' : { 'type': 'text'    }
     72                                     }
     73                                 }
     74                             }
     75                         },
     76                         'seen': { 'type': 'date', 'format': 'epoch_second' },
     77                         'source': {
     78                             'properties': {
     79                                 'name' : { 'type': 'keyword' },
     80                                 'url'  : { 'type': 'keyword' }
     81                             }
     82                         },
     83                         'update_type': { 'type': 'keyword' }
     84                     }
     85                 },
     86                 'message_type': { 'type': 'keyword' }
     87             }
     88         }
     89     }
     90 
     91     return mapping
     92 
     93 
     94 async def process_data(file_path: str = None):
     95     '''
     96     Read and process Certsream records live from the Websocket stream.
     97     
     98     :param file_path: Path to the Certstream log file (unused, placeholder for consistency with other ingestors)
     99     '''
    100 
    101     while True:
    102         try:
    103             async with websockets.connect('wss://certstream.calidog.io/') as websocket:
    104                 
    105                 while True:
    106                     line = await websocket.recv()
    107 
    108                     try:
    109                         record = json.loads(line)
    110                     except json.decoder.JSONDecodeError:
    111                         logging.error(f'Failed to parse JSON record from Certstream! ({line})')
    112                         input('Press Enter to continue...') # Pause the script to allow the user to read the error message
    113                         continue
    114 
    115                     yield record
    116 
    117         except websockets.ConnectionClosed:
    118             logging.error('Connection to Certstream was closed. Attempting to reconnect...')
    119             await asyncio.sleep(10)
    120 
    121         except Exception as e:
    122             logging.error(f'An error occurred while processing Certstream records! ({e})')
    123             await asyncio.sleep(10)
    124 
    125 
    126 async def strip_struct_empty(data: dict) -> dict:
    127     '''
    128     Recursively remove empty values from a nested dictionary or list.
    129     
    130     :param data: The dictionary or list to clean.
    131     '''
    132 
    133     empties = [None, '', [], {}]
    134 
    135     if isinstance(data, dict):
    136         for key, value in list(data.items()):
    137             if value in empties:
    138                 del data[key]
    139             else:
    140                 cleaned_value = strip_struct_empty(value)
    141                 if cleaned_value in empties:
    142                     del data[key]
    143                 else:
    144                     data[key] = cleaned_value
    145 
    146         return data
    147     
    148     elif isinstance(data, list):
    149         return [strip_struct_empty(item) for item in data if item not in empties and strip_struct_empty(item) not in empties]
    150 
    151     else:
    152         return data
    153 
    154 
    155 
    156 '''
    157 Example record:
    158 {
    159   "data": {
    160     "cert_index": 43061646,
    161     "cert_link": "https://yeti2025.ct.digicert.com/log/ct/v1/get-entries?start=43061646&end=43061646",
    162     "leaf_cert": {
    163       "all_domains": [
    164         "*.d7zdnegbre53n.amplifyapp.com",
    165         "d7zdnegbre53n.amplifyapp.com"
    166       ],
    167       "extensions": {
    168         "authorityInfoAccess": "CA Issuers - URI:http://crt.r2m02.amazontrust.com/r2m02.cer\nOCSP - URI:http://ocsp.r2m02.amazontrust.com\n",
    169         "authorityKeyIdentifier": "keyid:C0:31:52:CD:5A:50:C3:82:7C:74:71:CE:CB:E9:9C:F9:7A:EB:82:E2\n",
    170         "basicConstraints": "CA:FALSE",
    171         "certificatePolicies": "Policy: 2.23.140.1.2.1",
    172         "crlDistributionPoints": "Full Name:\n URI:http://crl.r2m02.amazontrust.com/r2m02.crl",
    173         "ctlPoisonByte": true,
    174         "extendedKeyUsage": "TLS Web server authentication, TLS Web client authentication",
    175         "keyUsage": "Digital Signature, Key Encipherment",
    176         "subjectAltName": "DNS:d7zdnegbre53n.amplifyapp.com, DNS:*.d7zdnegbre53n.amplifyapp.com",
    177         "subjectKeyIdentifier": "59:32:78:2A:11:03:62:55:BB:3B:B9:80:24:76:28:90:2E:D1:A4:56"
    178       },
    179       "fingerprint": "D9:05:A3:D5:AA:F9:68:BC:0C:0A:15:69:C9:5E:11:92:32:67:4F:FA",
    180       "issuer": {
    181         "C": "US",
    182         "CN": "Amazon RSA 2048 M02",
    183         "L": null,
    184         "O": "Amazon",
    185         "OU": null,
    186         "ST": null,
    187         "aggregated": "/C=US/CN=Amazon RSA 2048 M02/O=Amazon",
    188         "emailAddress": null
    189       },
    190       "not_after": 1743811199,
    191       "not_before": 1709596800,
    192       "serial_number": "FDB450C1942E3D30A18737063449E62",
    193       "signature_algorithm": "sha256, rsa",
    194       "subject": {
    195         "C": null,
    196         "CN": "*.d7zdnegbre53n.amplifyapp.com",
    197         "L": null,
    198         "O": null,
    199         "OU": null,
    200         "ST": null,
    201         "aggregated": "/CN=*.d7zdnegbre53n.amplifyapp.com",
    202         "emailAddress": null
    203       }
    204     },
    205     "seen": 1709651773.594684,
    206     "source": {
    207       "name": "DigiCert Yeti2025 Log",
    208       "url": "https://yeti2025.ct.digicert.com/log/"
    209     },
    210     "update_type": "PrecertLogEntry"
    211   },
    212   "message_type": "certificate_update"
    213 }
    214 '''