eris- Elasticsearch Recon Ingestion Scripts (ERIS) 🔎 |
git clone git://git.acid.vegas/-c.git |
Log | Files | Refs | Archive | README | LICENSE |
ingest_httpx.py (2827B)
1 #!/usr/bin/env python 2 # Elasticsearch Recon Ingestion Scripts (ERIS) - Developed by Acidvegas (https://git.acid.vegas/eris) 3 # ingest_httpx.py 4 5 import json 6 7 default_index = 'httpx-logs' 8 9 def construct_map() -> dict: 10 '''Construct the Elasticsearch index mapping for Masscan records.''' 11 12 keyword_mapping = { 'type': 'text', 'fields': { 'keyword': { 'type': 'keyword', 'ignore_above': 256 } } } 13 14 mapping = { 15 'mappings': { 16 'properties': { 17 'change': 'me' 18 } 19 } 20 } 21 22 return mapping 23 24 25 def process_file(file_path: str): 26 ''' 27 Read and process HTTPX records from the log file. 28 29 :param file_path: Path to the HTTPX log file 30 ''' 31 32 with open(file_path, 'r') as file: 33 for line in file: 34 line = line.strip() 35 36 if not line: 37 continue 38 39 record = json.loads(line) 40 41 record['seen'] = record.pop('timestamp').split('.')[0] + 'Z' # Hacky solution to maintain ISO 8601 format without milliseconds or offsets 42 record['domain'] = record.pop('input') 43 44 del record['failed'], record['knowledgebase'], record['time'] 45 46 yield record 47 48 return None # EOF 49 50 51 52 '''' 53 Example record: 54 { 55 "timestamp":"2024-01-14T13:08:15.117348474-05:00", # Rename to seen and remove milliseconds and offset 56 "hash": { # Do we need all of these ? 57 "body_md5":"4ae9394eb98233b482508cbda3b33a66", 58 "body_mmh3":"-4111954", 59 "body_sha256":"89e06e8374353469c65adb227b158b265641b424fba7ddb2c67eef0c4c1280d3", 60 "body_simhash":"9814303593401624250", 61 "header_md5":"980366deb2b2fb5df2ad861fc63e79ce", 62 "header_mmh3":"-813072798", 63 "header_sha256":"39aea75ad548e38b635421861641ad1919ed3b103b17a33c41e7ad46516f736d", 64 "header_simhash":"10962523587435277678" 65 }, 66 "port":"443", 67 "url":"https://supernets.org", # Remove this and only use the input field as "domain" maybe 68 "input":"supernets.org", # rename to domain 69 "title":"SuperNETs", 70 "scheme":"https", 71 "webserver":"nginx", 72 "body_preview":"SUPERNETS Home About Contact Donate Docs Network IRC Git Invidious Jitsi LibreX Mastodon Matrix Sup", 73 "content_type":"text/html", 74 "method":"GET", # Do we need this ? 75 "host":"51.89.151.158", 76 "path":"/", 77 "favicon":"-674048714", 78 "favicon_path":"/i/favicon.png", 79 "time":"592.907689ms", # Do we need this ? 80 "a":[ 81 "6.150.220.23" 82 ], 83 "tech":[ 84 "Bootstrap:4.0.0", 85 "HSTS", 86 "Nginx" 87 ], 88 "words":436, # Do we need this ? 89 "lines":79, # Do we need this ? 90 "status_code":200, 91 "content_length":4597, 92 "failed":false, # Do we need this ? 93 "knowledgebase":{ # Do we need this ? 94 "PageType":"nonerror", 95 "pHash":0 96 } 97 } 98 '''