diff --git a/README.md b/README.md
@@ -92,14 +92,29 @@ This allows efficient distribution of large scans across multiple machines.
### Python Library
```python
import asyncio
-import aiohttp
-import aioboto3
+import urllib.request
from httpz_scanner import HTTPZScanner
-async def scan_domains():
+async def scan_from_list() -> list:
+ with urllib.request.urlopen('https://example.com/domains.txt') as response:
+ content = response.read().decode()
+ return [line.strip() for line in content.splitlines() if line.strip()][:20]
+
+async def scan_from_url():
+ with urllib.request.urlopen('https://example.com/domains.txt') as response:
+ for line in response:
+ if line := line.strip():
+ yield line.decode().strip()
+
+async def scan_from_file():
+ with open('domains.txt', 'r') as file:
+ for line in file:
+ if line := line.strip():
+ yield line
+
+async def main():
# Initialize scanner with all possible options (showing defaults)
scanner = HTTPZScanner(
- # Core settings
concurrent_limit=100, # Number of concurrent requests
timeout=5, # Request timeout in seconds
follow_redirects=False, # Follow redirects (max 10)
@@ -131,61 +146,33 @@ async def scan_domains():
exclude_codes={404,500,503} # Exclude these status codes
)
- # Initialize resolvers (required before scanning)
- await scanner.init()
+ # Example 1: Process file
+ print('\nProcessing file:')
+ async for result in scanner.scan(scan_from_file()):
+ print(f"{result['domain']}: {result['status']}")
- # Example 1: Stream from S3/MinIO using aioboto3
- async with aioboto3.Session().client('s3',
- endpoint_url='http://minio.example.com:9000',
- aws_access_key_id='access_key',
- aws_secret_access_key='secret_key') as s3:
-
- response = await s3.get_object(Bucket='my-bucket', Key='huge-domains.txt')
- async with response['Body'] as stream:
- async def s3_generator():
- while True:
- line = await stream.readline()
- if not line:
- break
- yield line.decode().strip()
-
- await scanner.scan(s3_generator())
-
- # Example 2: Stream from URL using aiohttp
- async with aiohttp.ClientSession() as session:
- # For large files - stream line by line
- async with session.get('https://example.com/huge-domains.txt') as resp:
- async def url_generator():
- async for line in resp.content:
- yield line.decode().strip()
-
- await scanner.scan(url_generator())
-
- # For small files - read all at once
- async with session.get('https://example.com/small-domains.txt') as resp:
- content = await resp.text()
- await scanner.scan(content) # Library handles splitting into lines
-
- # Example 3: Simple list of domains
- domains = [
- 'example1.com',
- 'example2.com',
- 'example3.com'
- ]
- await scanner.scan(domains)
+ # Example 2: Stream URLs
+ print('\nStreaming URLs:')
+ async for result in scanner.scan(scan_from_url()):
+ print(f"{result['domain']}: {result['status']}")
+
+ # Example 3: Process list
+ print('\nProcessing list:')
+ domains = await scan_from_list()
+ async for result in scanner.scan(domains):
+ print(f"{result['domain']}: {result['status']}")
if __name__ == '__main__':
- asyncio.run(scan_domains())
+ asyncio.run(main())
```
The scanner accepts various input types:
-- Async/sync generators that yield domains
-- String content with newlines
+- File paths (string)
- Lists/tuples of domains
-- File paths
- stdin (using '-')
+- Async generators that yield domains
-All inputs support sharding for distributed scanning.
+All inputs support sharding for distributed scanning using the `shard` parameter.
## Arguments
diff --git a/httpz_scanner/__init__.py b/httpz_scanner/__init__.py
@@ -6,4 +6,4 @@ from .scanner import HTTPZScanner
from .colors import Colors
-__version__ = '2.0.0'
-\ No newline at end of file
+__version__ = '2.0.1'
+\ No newline at end of file
diff --git a/httpz_scanner/parsers.py b/httpz_scanner/parsers.py
@@ -8,9 +8,9 @@ except ImportError:
raise ImportError('missing bs4 module (pip install beautifulsoup4)')
try:
- from cryptography import x509
+ from cryptography import x509
from cryptography.hazmat.primitives import hashes
- from cryptography.x509.oid import NameOID
+ from cryptography.x509.oid import NameOID
except ImportError:
raise ImportError('missing cryptography module (pip install cryptography)')
@@ -28,8 +28,8 @@ def parse_domain_url(domain: str) -> tuple:
Parse domain string into base domain, port, and protocol list
:param domain: Raw domain string to parse
- :return: Tuple of (base_domain, port, protocols)
'''
+
port = None
base_domain = domain.rstrip('/')
@@ -58,6 +58,7 @@ def parse_domain_url(domain: str) -> tuple:
return base_domain, port, protocols
+
async def get_cert_info(ssl_object, url: str) -> dict:
'''
Get SSL certificate information for a domain
@@ -65,6 +66,7 @@ async def get_cert_info(ssl_object, url: str) -> dict:
:param ssl_object: SSL object to get certificate info from
:param url: URL to get certificate info from
'''
+
try:
if not ssl_object or not (cert_der := ssl_object.getpeercert(binary_form=True)):
return None
@@ -101,6 +103,7 @@ async def get_cert_info(ssl_object, url: str) -> dict:
error(f'Error getting cert info for {url}: {str(e)}')
return None
+
async def get_favicon_hash(session, base_url: str, html: str) -> str:
'''
Get favicon hash from a webpage
@@ -141,6 +144,7 @@ async def get_favicon_hash(session, base_url: str, html: str) -> str:
return None
+
def parse_status_codes(codes_str: str) -> set:
'''
Parse comma-separated status codes and ranges into a set of integers
@@ -174,4 +178,26 @@ def parse_shard(shard_str: str) -> tuple:
raise ValueError
return shard_index - 1, total_shards # Convert to 0-based index
except (ValueError, TypeError):
- raise argparse.ArgumentTypeError('Shard must be in format INDEX/TOTAL where INDEX <= TOTAL')
-\ No newline at end of file
+ raise argparse.ArgumentTypeError('Shard must be in format INDEX/TOTAL where INDEX <= TOTAL')
+
+
+def parse_title(html: str, content_type: str = None) -> str:
+ '''
+ Parse title from HTML content
+
+ :param html: HTML content of the page
+ :param content_type: Content-Type header value
+ '''
+
+ # Only parse title for HTML content
+ if content_type and not any(x in content_type.lower() for x in ['text/html', 'application/xhtml']):
+ return None
+
+ try:
+ soup = bs4.BeautifulSoup(html, 'html.parser', from_encoding='utf-8', features='lxml')
+ if title := soup.title:
+ return title.string.strip()
+ except:
+ pass
+
+ return None
+\ No newline at end of file
diff --git a/httpz_scanner/scanner.py b/httpz_scanner/scanner.py
@@ -5,7 +5,6 @@
import asyncio
import json
import random
-import sys
try:
import aiohttp
@@ -20,7 +19,7 @@ except ImportError:
from .dns import resolve_all_dns, load_resolvers
from .formatters import format_console_output
from .colors import Colors
-from .parsers import parse_domain_url, get_cert_info, get_favicon_hash
+from .parsers import parse_domain_url, get_cert_info, get_favicon_hash, parse_title
from .utils import debug, info, USER_AGENTS, input_generator
@@ -154,12 +153,13 @@ class HTTPZScanner:
except AttributeError:
debug(f'Failed to get SSL info for {url}')
- html = (await response.text())[:1024*1024]
- soup = bs4.BeautifulSoup(html, 'html.parser')
+ content_type = response.headers.get('Content-Type', '')
+ html = await response.text() if any(x in content_type.lower() for x in ['text/html', 'application/xhtml']) else None
# Only add title if it exists
- if soup.title and soup.title.string:
- result['title'] = ' '.join(soup.title.string.strip().split()).rstrip('.')[:300]
+ if soup := bs4.BeautifulSoup(html, 'html.parser'):
+ if soup.title and soup.title.string:
+ result['title'] = ' '.join(soup.title.string.strip().split()).rstrip('.')[:300]
# Only add body if it exists
if body_text := soup.get_text():
@@ -210,32 +210,81 @@ class HTTPZScanner:
async def scan(self, input_source):
'''
- Scan domains from a file or stdin
+ Scan domains from a file, stdin, or async generator
- :param input_source: Path to file or '-' for stdin
+ :param input_source: Can be:
+ - Path to file (str)
+ - stdin ('-')
+ - List/tuple of domains
+ - Async generator yielding domains
+ :yields: Result dictionary for each domain scanned
'''
+
if not self.resolvers:
await self.init()
async with aiohttp.ClientSession(connector=aiohttp.TCPConnector(ssl=False)) as session:
tasks = set()
- # Pass shard info to input_generator
- for domain in input_generator(input_source, self.shard):
- if len(tasks) >= self.concurrent_limit:
- done, tasks = await asyncio.wait(
- tasks, return_when=asyncio.FIRST_COMPLETED
- )
- for task in done:
- result = await task
- await self.process_result(result)
+ # Handle different input types
+ if isinstance(input_source, str):
+ # File or stdin input
+ domain_iter = input_generator(input_source, self.shard)
+ for domain in domain_iter:
+ if len(tasks) >= self.concurrent_limit:
+ done, tasks = await asyncio.wait(
+ tasks, return_when=asyncio.FIRST_COMPLETED
+ )
+ for task in done:
+ result = await task
+ await self.process_result(result)
+ yield result
+
+ task = asyncio.create_task(self.check_domain(session, domain))
+ tasks.add(task)
+ elif isinstance(input_source, (list, tuple)):
+ # List/tuple input
+ for line_num, domain in enumerate(input_source):
+ if domain := str(domain).strip():
+ if self.shard is None or line_num % self.shard[1] == self.shard[0]:
+ if len(tasks) >= self.concurrent_limit:
+ done, tasks = await asyncio.wait(
+ tasks, return_when=asyncio.FIRST_COMPLETED
+ )
+ for task in done:
+ result = await task
+ await self.process_result(result)
+ yield result
+
+ task = asyncio.create_task(self.check_domain(session, domain))
+ tasks.add(task)
+ else:
+ # Async generator input
+ line_num = 0
+ async for domain in input_source:
+ if isinstance(domain, bytes):
+ domain = domain.decode()
+ domain = domain.strip()
+
+ if domain:
+ if self.shard is None or line_num % self.shard[1] == self.shard[0]:
+ if len(tasks) >= self.concurrent_limit:
+ done, tasks = await asyncio.wait(
+ tasks, return_when=asyncio.FIRST_COMPLETED
+ )
+ for task in done:
+ result = await task
+ await self.process_result(result)
+ yield result
- task = asyncio.create_task(self.check_domain(session, domain))
- tasks.add(task)
+ task = asyncio.create_task(self.check_domain(session, domain))
+ tasks.add(task)
+ line_num += 1
# Process remaining tasks
if tasks:
done, _ = await asyncio.wait(tasks)
for task in done:
result = await task
- await self.process_result(result)
-\ No newline at end of file
+ await self.process_result(result)
+ yield result
+\ No newline at end of file
diff --git a/httpz_scanner/utils.py b/httpz_scanner/utils.py
@@ -5,6 +5,7 @@
import logging
import os
import sys
+import asyncio
# Global for silent mode
@@ -12,58 +13,58 @@ SILENT_MODE = False
# List of user agents to randomize requests
USER_AGENTS = [
- "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/132.0.0.0 Safari/537.36",
- "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/132.0.0.0 Safari/537.36",
- "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36",
- "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/132.0.0.0 Safari/537.36 Edg/132.0.0.0",
- "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36",
- "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36",
- "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36",
- "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/132.0.0.0 Safari/537.36 Edg/132.0.0.0",
- "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:134.0) Gecko/20100101 Firefox/134.0",
- "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/132.0.0.0 Safari/537.36",
- "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) obsidian/1.6.5 Chrome/124.0.6367.243 Electron/30.1.2 Safari/537.36",
- "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:135.0) Gecko/20100101 Firefox/135.0",
- "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36 OPR/116.0.0.0",
- "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:134.0) Gecko/20100101 Firefox/134.0",
- "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) obsidian/1.8.3 Chrome/130.0.6723.191 Electron/33.3.2 Safari/537.36",
- "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.2 Safari/605.1.15",
- "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.3 Safari/605.1.15",
- "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36",
- "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.2 Safari/605.1.15",
- "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.6613.137 Safari/537.36",
- "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36",
- "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36",
- "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:134.0) Gecko/20100101 Firefox/134.0",
- "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36",
- "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.6 Safari/605.1.15",
- "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.1.1 Safari/605.1.15",
- "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36",
- "Mozilla/5.0 (X11; Linux x86_64; rv:134.0) Gecko/20100101 Firefox/134.0",
- "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:135.0) Gecko/20100101 Firefox/135.0",
- "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36",
- "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) obsidian/1.5.12 Chrome/120.0.6099.283 Electron/28.2.3 Safari/537.36",
- "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.114 Safari/537.36",
- "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36 Edg/129.0.0.0",
- "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36",
- "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36",
- "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36",
- "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36",
- "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36 Edg/131.0.0.0",
- "Mozilla/5.0 (X11; CrOS x86_64 14541.0.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/132.0.0.0 Safari/537.36",
- "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36 OPR/114.0.0.0",
- "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36",
- "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36",
- "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36",
- "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36",
- "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36",
- "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.1 Safari/605.1.15",
- "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.5 Safari/605.1.15",
- "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36",
- "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36",
- "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36",
- "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3",
- "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) obsidian/1.7.7 Chrome/128.0.6613.186 Electron/32.2.5 Safari/537.36"
+ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/132.0.0.0 Safari/537.36',
+ 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/132.0.0.0 Safari/537.36',
+ 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36',
+ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/132.0.0.0 Safari/537.36 Edg/132.0.0.0',
+ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36',
+ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36',
+ 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36',
+ 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/132.0.0.0 Safari/537.36 Edg/132.0.0.0',
+ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:134.0) Gecko/20100101 Firefox/134.0',
+ 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/132.0.0.0 Safari/537.36',
+ 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) obsidian/1.6.5 Chrome/124.0.6367.243 Electron/30.1.2 Safari/537.36',
+ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:135.0) Gecko/20100101 Firefox/135.0',
+ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36 OPR/116.0.0.0',
+ 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:134.0) Gecko/20100101 Firefox/134.0',
+ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) obsidian/1.8.3 Chrome/130.0.6723.191 Electron/33.3.2 Safari/537.36',
+ 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.2 Safari/605.1.15',
+ 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.3 Safari/605.1.15',
+ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36',
+ 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.2 Safari/605.1.15',
+ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.6613.137 Safari/537.36',
+ 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36',
+ 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36',
+ 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:134.0) Gecko/20100101 Firefox/134.0',
+ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36',
+ 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.6 Safari/605.1.15',
+ 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.1.1 Safari/605.1.15',
+ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36',
+ 'Mozilla/5.0 (X11; Linux x86_64; rv:134.0) Gecko/20100101 Firefox/134.0',
+ 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:135.0) Gecko/20100101 Firefox/135.0',
+ 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36',
+ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) obsidian/1.5.12 Chrome/120.0.6099.283 Electron/28.2.3 Safari/537.36',
+ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.114 Safari/537.36',
+ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36 Edg/129.0.0.0',
+ 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36',
+ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36',
+ 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36',
+ 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36',
+ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36 Edg/131.0.0.0',
+ 'Mozilla/5.0 (X11; CrOS x86_64 14541.0.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/132.0.0.0 Safari/537.36',
+ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36 OPR/114.0.0.0',
+ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36',
+ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36',
+ 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36',
+ 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36',
+ 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36',
+ 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.1 Safari/605.1.15',
+ 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.5 Safari/605.1.15',
+ 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36',
+ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36',
+ 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36',
+ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3',
+ 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) obsidian/1.7.7 Chrome/128.0.6613.186 Electron/32.2.5 Safari/537.36'
]
@@ -98,9 +99,9 @@ def human_size(size_bytes: int) -> str:
return f'{size:.1f}{units[unit_index]}'
-def input_generator(input_source, shard: tuple = None):
+async def input_generator(input_source, shard: tuple = None):
'''
- Generator function to yield domains from various input sources with optional sharding
+ Async generator function to yield domains from various input sources with optional sharding
:param input_source: Can be:
- string path to local file
@@ -116,6 +117,7 @@ def input_generator(input_source, shard: tuple = None):
# Handle stdin
if input_source == '-' or input_source is None:
for line in sys.stdin:
+ await asyncio.sleep(0) # Yield control
if line := line.strip():
if shard is None or line_num % shard[1] == shard[0]:
yield line
@@ -125,6 +127,7 @@ def input_generator(input_source, shard: tuple = None):
elif isinstance(input_source, str) and os.path.exists(input_source):
with open(input_source, 'r') as f:
for line in f:
+ await asyncio.sleep(0) # Yield control
if line := line.strip():
if shard is None or line_num % shard[1] == shard[0]:
yield line
@@ -133,6 +136,7 @@ def input_generator(input_source, shard: tuple = None):
# Handle iterables (generators, lists, etc)
elif hasattr(input_source, '__iter__') and not isinstance(input_source, (str, bytes)):
for line in input_source:
+ await asyncio.sleep(0) # Yield control
if isinstance(line, bytes):
line = line.decode()
if line := line.strip():
@@ -145,6 +149,7 @@ def input_generator(input_source, shard: tuple = None):
if isinstance(input_source, bytes):
input_source = input_source.decode()
for line in input_source.splitlines():
+ await asyncio.sleep(0) # Yield control
if line := line.strip():
if shard is None or line_num % shard[1] == shard[0]:
yield line
diff --git a/setup.py b/setup.py
@@ -4,12 +4,13 @@
from setuptools import setup, find_packages
+
with open('README.md', 'r', encoding='utf-8') as f:
long_description = f.read()
setup(
name='httpz_scanner',
- version='2.0.0',
+ version='2.0.1',
author='acidvegas',
author_email='acid.vegas@acid.vegas',
description='Hyper-fast HTTP Scraping Tool',
| | | | | |