eris- Elasticsearch Recon Ingestion Scripts (ERIS) 🔎 |
git clone git://git.acid.vegas/-c.git |
Log | Files | Refs | Archive | README | LICENSE |
ingest_httpx.py (3042B)
1 #!/usr/bin/env python 2 # Elasticsearch Recon Ingestion Scripts (ERIS) - Developed by Acidvegas (https://git.acid.vegas/eris) 3 # ingest_httpx.py 4 5 import json 6 7 try: 8 import aiofiles 9 except ImportError: 10 raise ImportError('Missing required \'aiofiles\' library. (pip install aiofiles)') 11 12 default_index = 'httpx-logs' 13 14 def construct_map() -> dict: 15 '''Construct the Elasticsearch index mapping for Masscan records.''' 16 17 keyword_mapping = { 'type': 'text', 'fields': { 'keyword': { 'type': 'keyword', 'ignore_above': 256 } } } 18 19 mapping = { 20 'mappings': { 21 'properties': { 22 'change': 'me' 23 } 24 } 25 } 26 27 return mapping 28 29 30 async def process_data(file_path: str): 31 ''' 32 Read and process HTTPX records from the log file. 33 34 :param file_path: Path to the HTTPX log file 35 ''' 36 37 async with aiofiles.open(file_path, mode='r') as input_file: 38 async for line in input_file: 39 line = line.strip() 40 41 if not line: 42 continue 43 44 record = json.loads(line) 45 46 record['seen'] = record.pop('timestamp').split('.')[0] + 'Z' # Hacky solution to maintain ISO 8601 format without milliseconds or offsets 47 record['domain'] = record.pop('input') 48 49 del record['failed'], record['knowledgebase'], record['time'] 50 51 yield {'_index': default_index, '_source': record} 52 53 return None # EOF 54 55 56 57 '''' 58 Example record: 59 { 60 "timestamp":"2024-01-14T13:08:15.117348474-05:00", # Rename to seen and remove milliseconds and offset 61 "hash": { # Do we need all of these ? 62 "body_md5":"4ae9394eb98233b482508cbda3b33a66", 63 "body_mmh3":"-4111954", 64 "body_sha256":"89e06e8374353469c65adb227b158b265641b424fba7ddb2c67eef0c4c1280d3", 65 "body_simhash":"9814303593401624250", 66 "header_md5":"980366deb2b2fb5df2ad861fc63e79ce", 67 "header_mmh3":"-813072798", 68 "header_sha256":"39aea75ad548e38b635421861641ad1919ed3b103b17a33c41e7ad46516f736d", 69 "header_simhash":"10962523587435277678" 70 }, 71 "port":"443", 72 "url":"https://supernets.org", # Remove this and only use the input field as "domain" maybe 73 "input":"supernets.org", # rename to domain 74 "title":"SuperNETs", 75 "scheme":"https", 76 "webserver":"nginx", 77 "body_preview":"SUPERNETS Home About Contact Donate Docs Network IRC Git Invidious Jitsi LibreX Mastodon Matrix Sup", 78 "content_type":"text/html", 79 "method":"GET", # Do we need this ? 80 "host":"51.89.151.158", 81 "path":"/", 82 "favicon":"-674048714", 83 "favicon_path":"/i/favicon.png", 84 "time":"592.907689ms", # Do we need this ? 85 "a":[ 86 "6.150.220.23" 87 ], 88 "tech":[ 89 "Bootstrap:4.0.0", 90 "HSTS", 91 "Nginx" 92 ], 93 "words":436, # Do we need this ? 94 "lines":79, # Do we need this ? 95 "status_code":200, 96 "content_length":4597, 97 "failed":false, # Do we need this ? 98 "knowledgebase":{ # Do we need this ? 99 "PageType":"nonerror", 100 "pHash":0 101 } 102 } 103 '''