diff --git a/README.md b/README.md
@@ -8,6 +8,7 @@ The is a suite of tools to aid in the ingestion of recon data from various sourc
- [elasticsearch](https://pypi.org/project/elasticsearch/) *(`pip install elasticsearch`)*
- [aiofiles](https://pypi.org/project/aiofiles) *(`pip install aiofiles`)*
- [aiohttp](https://pypi.org/projects/aiohttp) *(`pip install aiohttp`)*
+ - [websockets](https://pypi.org/project/websockets/) *(`pip install websockets`) (only required for `--certs` ingestion)*
## Usage
```shell
diff --git a/ingestors/ingest_certs.py b/ingestors/ingest_certs.py
@@ -91,11 +91,11 @@ def construct_map() -> dict:
return mapping
-async def process_data(file_path: str = None):
+async def process_data(place_holder: str = None):
'''
Read and process Certsream records live from the Websocket stream.
- :param file_path: Path to the Certstream log file (unused, placeholder for consistency with other ingestors)
+ :param place_holder: Placeholder parameter to match the process_data function signature of other ingestors.
'''
while True:
@@ -154,63 +154,82 @@ async def strip_struct_empty(data: dict) -> dict:
return data
+async def test():
+ '''Test the Cert stream ingestion process'''
+
+ async for document in process_data():
+ print(document)
+
+
+
+if __name__ == '__main__':
+ import argparse
+ import asyncio
+
+ parser = argparse.ArgumentParser(description='Certstream Ingestor for ERIS')
+ parser.add_argument('input_path', help='Path to the input file or directory')
+ args = parser.parse_args()
+
+ asyncio.run(test(args.input_path))
+
+
'''
-Example record:
-{
- "data": {
- "cert_index": 43061646,
- "cert_link": "https://yeti2025.ct.digicert.com/log/ct/v1/get-entries?start=43061646&end=43061646",
- "leaf_cert": {
- "all_domains": [
- "*.d7zdnegbre53n.amplifyapp.com",
- "d7zdnegbre53n.amplifyapp.com"
- ],
- "extensions": {
- "authorityInfoAccess": "CA Issuers - URI:http://crt.r2m02.amazontrust.com/r2m02.cer\nOCSP - URI:http://ocsp.r2m02.amazontrust.com\n",
- "authorityKeyIdentifier": "keyid:C0:31:52:CD:5A:50:C3:82:7C:74:71:CE:CB:E9:9C:F9:7A:EB:82:E2\n",
- "basicConstraints": "CA:FALSE",
- "certificatePolicies": "Policy: 2.23.140.1.2.1",
- "crlDistributionPoints": "Full Name:\n URI:http://crl.r2m02.amazontrust.com/r2m02.crl",
- "ctlPoisonByte": true,
- "extendedKeyUsage": "TLS Web server authentication, TLS Web client authentication",
- "keyUsage": "Digital Signature, Key Encipherment",
- "subjectAltName": "DNS:d7zdnegbre53n.amplifyapp.com, DNS:*.d7zdnegbre53n.amplifyapp.com",
- "subjectKeyIdentifier": "59:32:78:2A:11:03:62:55:BB:3B:B9:80:24:76:28:90:2E:D1:A4:56"
- },
- "fingerprint": "D9:05:A3:D5:AA:F9:68:BC:0C:0A:15:69:C9:5E:11:92:32:67:4F:FA",
- "issuer": {
- "C": "US",
- "CN": "Amazon RSA 2048 M02",
- "L": null,
- "O": "Amazon",
- "OU": null,
- "ST": null,
- "aggregated": "/C=US/CN=Amazon RSA 2048 M02/O=Amazon",
- "emailAddress": null
- },
- "not_after": 1743811199,
- "not_before": 1709596800,
- "serial_number": "FDB450C1942E3D30A18737063449E62",
- "signature_algorithm": "sha256, rsa",
- "subject": {
- "C": null,
- "CN": "*.d7zdnegbre53n.amplifyapp.com",
- "L": null,
- "O": null,
- "OU": null,
- "ST": null,
- "aggregated": "/CN=*.d7zdnegbre53n.amplifyapp.com",
- "emailAddress": null
- }
- },
- "seen": 1709651773.594684,
- "source": {
- "name": "DigiCert Yeti2025 Log",
- "url": "https://yeti2025.ct.digicert.com/log/"
- },
- "update_type": "PrecertLogEntry"
- },
- "message_type": "certificate_update"
-}
+Output:
+ {
+ "data": {
+ "cert_index": 43061646,
+ "cert_link": "https://yeti2025.ct.digicert.com/log/ct/v1/get-entries?start=43061646&end=43061646",
+ "leaf_cert": {
+ "all_domains": [
+ "*.d7zdnegbre53n.amplifyapp.com",
+ "d7zdnegbre53n.amplifyapp.com"
+ ],
+ "extensions": {
+ "authorityInfoAccess" : "CA Issuers - URI:http://crt.r2m02.amazontrust.com/r2m02.cer\nOCSP - URI:http://ocsp.r2m02.amazontrust.com\n",
+ "authorityKeyIdentifier" : "keyid:C0:31:52:CD:5A:50:C3:82:7C:74:71:CE:CB:E9:9C:F9:7A:EB:82:E2\n",
+ "basicConstraints" : "CA:FALSE",
+ "certificatePolicies" : "Policy: 2.23.140.1.2.1",
+ "crlDistributionPoints" : "Full Name:\n URI:http://crl.r2m02.amazontrust.com/r2m02.crl",
+ "ctlPoisonByte" : true,
+ "extendedKeyUsage" : "TLS Web server authentication, TLS Web client authentication",
+ "keyUsage" : "Digital Signature, Key Encipherment",
+ "subjectAltName" : "DNS:d7zdnegbre53n.amplifyapp.com, DNS:*.d7zdnegbre53n.amplifyapp.com",
+ "subjectKeyIdentifier" : "59:32:78:2A:11:03:62:55:BB:3B:B9:80:24:76:28:90:2E:D1:A4:56"
+ },
+ "fingerprint": "D9:05:A3:D5:AA:F9:68:BC:0C:0A:15:69:C9:5E:11:92:32:67:4F:FA",
+ "issuer": {
+ "C" : "US",
+ "CN" : "Amazon RSA 2048 M02",
+ "L" : null,
+ "O" : "Amazon",
+ "OU" : null,
+ "ST" : null,
+ "aggregated" : "/C=US/CN=Amazon RSA 2048 M02/O=Amazon",
+ "emailAddress" : null
+ },
+ "not_after" : 1743811199,
+ "not_before" : 1709596800,
+ "serial_number" : "FDB450C1942E3D30A18737063449E62",
+ "signature_algorithm" : "sha256, rsa",
+ "subject": {
+ "C" : null,
+ "CN" : "*.d7zdnegbre53n.amplifyapp.com",
+ "L" : null,
+ "O" : null,
+ "OU" : null,
+ "ST" : null,
+ "aggregated" : "/CN=*.d7zdnegbre53n.amplifyapp.com",
+ "emailAddress" : null
+ }
+ },
+ "seen": 1709651773.594684,
+ "source": {
+ "name" : "DigiCert Yeti2025 Log",
+ "url" : "https://yeti2025.ct.digicert.com/log/"
+ },
+ "update_type": "PrecertLogEntry"
+ },
+ "message_type": "certificate_update"
+ }
'''
\ No newline at end of file
diff --git a/ingestors/ingest_httpx.py b/ingestors/ingest_httpx.py
@@ -82,45 +82,72 @@ async def process_data(file_path: str):
yield {'_id': record['domain'], '_index': default_index, '_source': record}
+async def test(input_path: str):
+ '''
+ Test the HTTPX ingestion process
+
+ :param input_path: Path to the HTTPX log file
+ '''
+ async for document in process_data(input_path):
+ print(document)
+
+
+
+if __name__ == '__main__':
+ import argparse
+ import asyncio
+
+ parser = argparse.ArgumentParser(description='HTTPX Ingestor for ERIS')
+ parser.add_argument('input_path', help='Path to the input file or directory')
+ args = parser.parse_args()
+
+ asyncio.run(test(args.input_path))
+
+
''''
-Example record:
-{
- "timestamp":"2024-01-14T13:08:15.117348474-05:00", # Rename to seen and remove milliseconds and offset
- "hash": { # Do we need all of these ?
- "body_md5" : "4ae9394eb98233b482508cbda3b33a66",
- "body_mmh3" : "-4111954",
- "body_sha256" : "89e06e8374353469c65adb227b158b265641b424fba7ddb2c67eef0c4c1280d3",
- "body_simhash" : "9814303593401624250",
- "header_md5" : "980366deb2b2fb5df2ad861fc63e79ce",
- "header_mmh3" : "-813072798",
- "header_sha256" : "39aea75ad548e38b635421861641ad1919ed3b103b17a33c41e7ad46516f736d",
- "header_simhash" : "10962523587435277678"
- },
- "port" : "443",
- "url" : "https://supernets.org", # Remove this and only use the input field as "domain" maybe
- "input" : "supernets.org", # rename to domain
- "title" : "SuperNETs",
- "scheme" : "https",
- "webserver" : "nginx",
- "body_preview" : "SUPERNETS Home About Contact Donate Docs Network IRC Git Invidious Jitsi LibreX Mastodon Matrix Sup",
- "content_type" : "text/html",
- "method" : "GET", # Remove this
- "host" : "51.89.151.158",
- "path" : "/",
- "favicon" : "-674048714",
- "favicon_path" : "/i/favicon.png",
- "time" : "592.907689ms", # Do we need this ?
- "a" : ["6.150.220.23"],
- "tech" : ["Bootstrap:4.0.0", "HSTS", "Nginx"],
- "words" : 436, # Do we need this ?
- "lines" : 79, # Do we need this ?
- "status_code" : 200,
- "content_length" : 4597,
- "failed" : false, # Do we need this ?
- "knowledgebase" : { # Do we need this ?
- "PageType" : "nonerror",
- "pHash" : 0
+Deploy:
+ go install -v github.com/projectdiscovery/httpx/cmd/httpx@latest
+ curl -s https://public-dns.info/nameservers.txt -o nameservers.txt
+ httpx -l zone.txt -t 200 -sc -location -favicon -title -bp -td -ip -cname -mc 200,201,301,302,303,307,308 -fr -r nameservers.txt -retries 2 -stream -sd -j -o httpx.json -v
+
+Output:
+ {
+ "timestamp":"2024-01-14T13:08:15.117348474-05:00", # Rename to seen and remove milliseconds and offset
+ "hash": { # Do we need all of these ?
+ "body_md5" : "4ae9394eb98233b482508cbda3b33a66",
+ "body_mmh3" : "-4111954",
+ "body_sha256" : "89e06e8374353469c65adb227b158b265641b424fba7ddb2c67eef0c4c1280d3",
+ "body_simhash" : "9814303593401624250",
+ "header_md5" : "980366deb2b2fb5df2ad861fc63e79ce",
+ "header_mmh3" : "-813072798",
+ "header_sha256" : "39aea75ad548e38b635421861641ad1919ed3b103b17a33c41e7ad46516f736d",
+ "header_simhash" : "10962523587435277678"
+ },
+ "port" : "443",
+ "url" : "https://supernets.org", # Remove this and only use the input field as "domain" maybe
+ "input" : "supernets.org", # rename to domain
+ "title" : "SuperNETs",
+ "scheme" : "https",
+ "webserver" : "nginx",
+ "body_preview" : "SUPERNETS Home About Contact Donate Docs Network IRC Git Invidious Jitsi LibreX Mastodon Matrix Sup",
+ "content_type" : "text/html",
+ "method" : "GET", # Remove this
+ "host" : "51.89.151.158",
+ "path" : "/",
+ "favicon" : "-674048714",
+ "favicon_path" : "/i/favicon.png",
+ "time" : "592.907689ms", # Do we need this ?
+ "a" : ["6.150.220.23"],
+ "tech" : ["Bootstrap:4.0.0", "HSTS", "Nginx"],
+ "words" : 436, # Do we need this ?
+ "lines" : 79, # Do we need this ?
+ "status_code" : 200,
+ "content_length" : 4597,
+ "failed" : false, # Do we need this ?
+ "knowledgebase" : { # Do we need this ?
+ "PageType" : "nonerror",
+ "pHash" : 0
+ }
}
-}
'''
\ No newline at end of file
diff --git a/ingestors/ingest_masscan.py b/ingestors/ingest_masscan.py
@@ -113,48 +113,65 @@ async def process_data(file_path: str):
yield {'_id': id, '_index': default_index, '_source': struct}
+async def test(input_path: str):
+ '''
+ Test the Masscan ingestion process
+
+ :param input_path: Path to the MassDNS log file
+ '''
+ async for document in process_data(input_path):
+ print(document)
+
-'''
-Example record:
-{
- "ip" : "43.134.51.142",
- "timestamp" : "1705255468", # Convert to ZULU BABY
- "ports" : [ # We will create a record for each port opened
- {
- "port" : 22,
- "proto" : "tcp",
- "service" : { # This field is optional
- "name" : "ssh",
- "banner" : "SSH-2.0-OpenSSH_8.9p1 Ubuntu-3ubuntu0.4"
- }
- }
- ]
-}
-
-Will be indexed as:
-{
- "_id" : "43.134.51.142:22"
- "_index" : "masscan-logs",
- "_source" : {
- "ip" : "43.134.51.142",
- "port" : 22,
- "proto" : "tcp",
- "service" : "ssh",
- "banner" : "SSH-2.0-OpenSSH_8.9p1 Ubuntu-3ubuntu0.4",
- "seen" : "2021-10-08T02:04:28Z"
-}
-'''
+
+if __name__ == '__main__':
+ import argparse
+ import asyncio
+
+ parser = argparse.ArgumentParser(description='Masscan Ingestor for ERIS')
+ parser.add_argument('input_path', help='Path to the input file or directory')
+ args = parser.parse_args()
+
+ asyncio.run(test(args.input_path))
'''
-Notes:
-
-apt-get install iptables masscan libpcap-dev screen
-setcap 'CAP_NET_RAW+eip CAP_NET_ADMIN+eip' /bin/masscan
-/sbin/iptables -A INPUT -p tcp --dport 61010 -j DROP # Not persistent
-printf "0.0.0.0/8\n10.0.0.0/8\n100.64.0.0/10\n127.0.0.0/8\n169.254.0.0/16\n172.16.0.0/12\n192.0.0.0/24\n192.0.2.0/24\n192.31.196.0/24\n192.52.193.0/24\n192.88.99.0/24\n192.168.0.0/16\n192.175.48.0/24\n198.18.0.0/15\n198.51.100.0/24\n203.0.113.0/24\n224.0.0.0/3\n255.255.255.255/32" > exclude.conf
-screen -S scan
-masscan 0.0.0.0/0 -p21,22,23 --banners --http-user-agent "USER_AGENT" --source-port 61010 --open-only --rate 30000 --excludefile exclude.conf -oJ output.json
-masscan 0.0.0.0/0 -p21,22,23 --banners --http-user-agent "USER_AGENT" --source-port 61000-65503 --open-only --rate 30000 --excludefile exclude.conf -oJ output_new.json --shard $i/$TOTAL
+Deploy:
+ apt-get install iptables masscan libpcap-dev screen
+ setcap 'CAP_NET_RAW+eip CAP_NET_ADMIN+eip' /bin/masscan
+ /sbin/iptables -A INPUT -p tcp --dport 61010 -j DROP # Not persistent
+ printf "0.0.0.0/8\n10.0.0.0/8\n100.64.0.0/10\n127.0.0.0/8\n169.254.0.0/16\n172.16.0.0/12\n192.0.0.0/24\n192.0.2.0/24\n192.31.196.0/24\n192.52.193.0/24\n192.88.99.0/24\n192.168.0.0/16\n192.175.48.0/24\n198.18.0.0/15\n198.51.100.0/24\n203.0.113.0/24\n224.0.0.0/3\n255.255.255.255/32" > exclude.conf
+ screen -S scan
+ masscan 0.0.0.0/0 -p21,22,23 --banners --http-user-agent "USER_AGENT" --source-port 61010 --open-only --rate 30000 --excludefile exclude.conf -oJ output.json
+ masscan 0.0.0.0/0 -p21,22,23 --banners --http-user-agent "USER_AGENT" --source-port 61000-65503 --open-only --rate 30000 --excludefile exclude.conf -oJ output_new.json --shard $i/$TOTAL
+
+Output:
+ {
+ "ip" : "43.134.51.142",
+ "timestamp" : "1705255468",
+ "ports" : [
+ {
+ "port" : 22, # We will create a record for each port opened
+ "proto" : "tcp",
+ "service" : {
+ "name" : "ssh",
+ "banner" : "SSH-2.0-OpenSSH_8.9p1 Ubuntu-3ubuntu0.4"
+ }
+ }
+ ]
+ }
+
+Input:
+ {
+ "_id" : "43.134.51.142:22"
+ "_index" : "masscan-logs",
+ "_source" : {
+ "ip" : "43.134.51.142",
+ "port" : 22,
+ "proto" : "tcp",
+ "service" : "ssh",
+ "banner" : "SSH-2.0-OpenSSH_8.9p1 Ubuntu-3ubuntu0.4",
+ "seen" : "2021-10-08T02:04:28Z"
+ }
'''
\ No newline at end of file
diff --git a/ingestors/ingest_massdns.py b/ingestors/ingest_massdns.py
@@ -2,35 +2,6 @@
# Elasticsearch Recon Ingestion Scripts (ERIS) - Developed by Acidvegas (https://git.acid.vegas/eris)
# ingest_massdns.py
-'''
-Deployment:
- git clone https://github.com/blechschmidt/massdns.git $HOME/massdns && cd $HOME/massdns && make
- curl -s https://public-dns.info/nameservers.txt | grep -v ':' > $HOME/massdns/nameservers.txt
- pythons ./scripts/ptr.py | ./bin/massdns -r $HOME/massdns/nameservers.txt -t PTR --filter NOERROR-s 1000 -o S -w $HOME/massdns/fifo.json
- or...
- while true; do python ./scripts/ptr.py | ./bin/massdns -r $HOME/massdns/nameservers.txt -t PTR --filter NOERROR -s 1000 -o S -w $HOME/massdns/fifo.json; done
-
-Output:
- 0.6.229.47.in-addr.arpa. PTR 047-229-006-000.res.spectrum.com.
- 0.6.228.75.in-addr.arpa. PTR 0.sub-75-228-6.myvzw.com.
- 0.6.207.73.in-addr.arpa. PTR c-73-207-6-0.hsd1.ga.comcast.net.
-
-Input:
- {
- "_id" : "47.229.6.0"
- "_index" : "ptr-records",
- "_source" : {
- "ip" : "47.229.6.0",
- "record" : "047-229-006-000.res.spectrum.com", # This will be a list if there are more than one PTR record
- "seen" : "2021-06-30T18:31:00Z"
- }
- }
-
-Notes:
-- Why do some IP addresses return a CNAME from a PTR request
-- What is dns-servfail.net (Frequent CNAME response from PTR requests)
-'''
-
import logging
import time
@@ -161,4 +132,35 @@ if __name__ == '__main__':
parser.add_argument('input_path', help='Path to the input file or directory')
args = parser.parse_args()
- asyncio.run(test(args.input_path))
-\ No newline at end of file
+ asyncio.run(test(args.input_path))
+
+
+
+'''
+Deployment:
+ git clone --depth 1 https://github.com/blechschmidt/massdns.git $HOME/massdns && cd $HOME/massdns && make
+ curl -s https://public-dns.info/nameservers.txt | grep -v ':' > $HOME/massdns/nameservers.txt
+ pythons ./scripts/ptr.py | ./bin/massdns -r $HOME/massdns/nameservers.txt -t PTR --filter NOERROR-s 1000 -o S -w $HOME/massdns/fifo.json
+ or...
+ while true; do python ./scripts/ptr.py | ./bin/massdns -r $HOME/massdns/nameservers.txt -t PTR --filter NOERROR -s 1000 -o S -w $HOME/massdns/fifo.json; done
+
+Output:
+ 0.6.229.47.in-addr.arpa. PTR 047-229-006-000.res.spectrum.com.
+ 0.6.228.75.in-addr.arpa. PTR 0.sub-75-228-6.myvzw.com.
+ 0.6.207.73.in-addr.arpa. PTR c-73-207-6-0.hsd1.ga.comcast.net.
+
+Input:
+ {
+ "_id" : "47.229.6.0"
+ "_index" : "ptr-records",
+ "_source" : {
+ "ip" : "47.229.6.0",
+ "record" : "047-229-006-000.res.spectrum.com", # This will be a list if there are more than one PTR record
+ "seen" : "2021-06-30T18:31:00Z"
+ }
+ }
+
+Notes:
+- Why do some IP addresses return a CNAME from a PTR request
+- What is dns-servfail.net (Frequent CNAME response from PTR requests)
+'''
+\ No newline at end of file
diff --git a/ingestors/ingest_zone.py b/ingestors/ingest_zone.py
@@ -119,36 +119,50 @@ async def process_data(file_path: str):
domain_records[domain][record_type].append({'ttl': ttl, 'data': data})
+async def test(input_path: str):
+ '''
+ Test the Zone file ingestion process
+
+ :param input_path: Path to the MassDNS log file
+ '''
+ async for document in process_data(input_path):
+ print(document)
+
+
+
+if __name__ == '__main__':
+ import argparse
+ import asyncio
+
+ parser = argparse.ArgumentParser(description='Zone file Ingestor for ERIS')
+ parser.add_argument('input_path', help='Path to the input file or directory')
+ args = parser.parse_args()
+
+ asyncio.run(test(args.input_path))
+
+
'''
-Example record:
-0so9l9nrl425q3tf7dkv1nmv2r3is6vm.vegas. 3600 in nsec3 1 1 100 332539EE7F95C32A 10MHUKG4FHIAVEFDOTF6NKU5KFCB2J3A NS DS RRSIG
-0so9l9nrl425q3tf7dkv1nmv2r3is6vm.vegas. 3600 in rrsig NSEC3 8 2 3600 20240122151947 20240101141947 4125 vegas. hzIvQrZIxBSwRWyiHkb5M2W0R3ikNehv884nilkvTt9DaJSDzDUrCtqwQb3jh6+BesByBqfMQK+L2n9c//ZSmD5/iPqxmTPCuYIB9uBV2qSNSNXxCY7uUt5w7hKUS68SLwOSjaQ8GRME9WQJhY6gck0f8TT24enjXXRnQC8QitY=
-1-800-flowers.vegas. 3600 in ns dns1.cscdns.net.
-1-800-flowers.vegas. 3600 in ns dns2.cscdns.net.
-100.vegas. 3600 in ns ns51.domaincontrol.com.
-100.vegas. 3600 in ns ns52.domaincontrol.com.
-1001.vegas. 3600 in ns ns11.waterrockdigital.com.
-1001.vegas. 3600 in ns ns12.waterrockdigital.com.
-
-Will be indexed as:
-{
- "_id" : "1001.vegas"
- "_index" : "dns-zones",
- "_source" : {
- "domain" : "1001.vegas",
- "records" : { # All records are stored in a single dictionary
- "ns": [
- {"ttl": 3600, "data": "ns11.waterrockdigital.com"},
- {"ttl": 3600, "data": "ns12.waterrockdigital.com"}
- ]
- },
- "seen" : "2021-09-01T00:00:00Z" # Zulu time added upon indexing
+Output:
+ 1001.vegas. 3600 in ns ns11.waterrockdigital.com.
+ 1001.vegas. 3600 in ns ns12.waterrockdigital.com.
+
+Input:
+ {
+ "_id" : "1001.vegas"
+ "_index" : "dns-zones",
+ "_source" : {
+ "domain" : "1001.vegas",
+ "records" : {
+ "ns": [
+ {"ttl": 3600, "data": "ns11.waterrockdigital.com"},
+ {"ttl": 3600, "data": "ns12.waterrockdigital.com"}
+ ]
+ },
+ "seen" : "2021-09-01T00:00:00Z"
+ }
}
-}
-'''
-'''
Notes:
-- How do we want to handle hashed NSEC3 records? Do we ignest them as they are, or crack the NSEC3 hashes first and ingest?
+ How do we want to handle hashed NSEC3 records? Do we ignest them as they are, or crack the NSEC3 hashes first and ingest?
'''
\ No newline at end of file
| | | | | |