httpz

- Hyper-fast HTTP Scraping Tool
git clone git://git.acid.vegas/httpz.git
httpz.py (25691B)
      1 #!/usr/bin/env python3
      2 # HTTPZ Web Scanner - Developed by acidvegas in Python (https://github.com/acidvegas/httpz)
      3 
      4 '''
      5 BCUZ FUCK PROJECT DISCOVERY PYTHON STILL GO HARD
      6 REAL BAY SHIT FOR REAL BAY MOTHER FUCKERS
      7 '''
      8 
      9 import argparse
     10 import asyncio
     11 import itertools
     12 import json
     13 import logging
     14 import os
     15 import random
     16 import sys
     17 
     18 try:
     19 	import aiohttp
     20 except ImportError:
     21 	raise ImportError('missing \'aiohttp\' library (pip install aiohttp)')
     22 
     23 try:
     24 	import apv
     25 except ImportError:
     26 	raise ImportError('missing \'apv\' library (pip install apv)')
     27 
     28 try:
     29 	import bs4
     30 except ImportError:
     31 	raise ImportError('missing \'bs4\' library (pip install beautifulsoup4)')
     32 
     33 try:
     34 	from cryptography                   import x509
     35 	from cryptography.hazmat.primitives import hashes
     36 	from cryptography.x509.oid          import NameOID
     37 except ImportError:
     38 	raise ImportError('missing \'cryptography\' library (pip install cryptography)')
     39 
     40 try:
     41 	import dns.asyncresolver
     42 	import dns.query
     43 	import dns.resolver
     44 	import dns.zone
     45 except ImportError:
     46 	raise ImportError('missing \'dns\' library (pip install dnspython)')
     47 
     48 try:
     49 	import mmh3
     50 except ImportError:
     51 	raise ImportError('missing \'mmh3\' library (pip install mmh3)')
     52 
     53 
     54 class Colors:
     55 	'''ANSI color codes for terminal output'''
     56 
     57 	HEADER     = '\033[95m' # Light purple
     58 	BLUE       = '\033[94m'
     59 	GREEN      = '\033[92m'
     60 	YELLOW     = '\033[93m'
     61 	RED        = '\033[91m'
     62 	BOLD       = '\033[1m'
     63 	UNDERLINE  = '\033[4m'
     64 	RESET      = '\033[0m'
     65 	PURPLE     = '\033[35m'       # Dark purple
     66 	LIGHT_RED  = '\033[38;5;203m' # Light red
     67 	DARK_GREEN = '\033[38;5;22m'  # Dark green
     68 	PINK       = '\033[38;5;198m' # Bright pink
     69 	GRAY       = '\033[90m'       # Gray color
     70 	CYAN       = '\033[96m'       # Cyan color
     71 
     72 # Global for silent mode
     73 SILENT_MODE = False
     74 
     75 def debug(msg: str): 
     76 	if not SILENT_MODE: logging.debug(msg)
     77 def error(msg: str):
     78 	if not SILENT_MODE: logging.error(msg)
     79 def info(msg: str):
     80 	if not SILENT_MODE: logging.info(msg)
     81 
     82 
     83 async def get_cert_info(ssl_object, url: str) -> dict:
     84 	'''
     85 	Get SSL certificate information for a domain
     86 	
     87 	:param ssl_object: SSL object to get certificate info from
     88 	:param url: URL to get certificate info from
     89 	'''
     90 
     91 	try:			
     92 		# Check if we have a certificate
     93 		if not ssl_object:
     94 			return None
     95 
     96 		# Get the certificate in DER format
     97 		if not (cert_der := ssl_object.getpeercert(binary_form=True)):
     98 			return None
     99 
    100 		# Load the certificate
    101 		cert = x509.load_der_x509_certificate(cert_der)
    102 
    103 		# Extract all subject alternative names
    104 		try:
    105 			san_extension = cert.extensions.get_extension_for_oid(x509.oid.ExtensionOID.SUBJECT_ALTERNATIVE_NAME)
    106 			alt_names     = [name.value for name in san_extension.value] if san_extension else []
    107 		except x509.extensions.ExtensionNotFound:
    108 			alt_names = []
    109 
    110 		# Get subject CN
    111 		try:
    112 			common_name = cert.subject.get_attributes_for_oid(NameOID.COMMON_NAME)[0].value
    113 		except IndexError:
    114 			common_name = None
    115 
    116 		# Get issuer CN
    117 		try:
    118 			issuer = cert.issuer.get_attributes_for_oid(NameOID.COMMON_NAME)[0].value
    119 		except IndexError:
    120 			issuer = None
    121 
    122 		return {
    123 			'fingerprint'   : cert.fingerprint(hashes.SHA256()).hex(),
    124 			'common_name'   : common_name,
    125 			'issuer'        : issuer,
    126 			'alt_names'     : alt_names,
    127 			'not_before'    : cert.not_valid_before_utc.isoformat(),
    128 			'not_after'     : cert.not_valid_after_utc.isoformat(),
    129 			'version'       : cert.version.value,
    130 			'serial_number' : format(cert.serial_number, 'x'),
    131 		}
    132 	except Exception as e:
    133 		error(f'Error getting cert info for {url}: {str(e)}')
    134 		return None
    135 
    136 
    137 async def get_favicon_hash(session: aiohttp.ClientSession, base_url: str, html: str) -> str:
    138 	'''
    139 	Get favicon hash from a webpage
    140 	
    141 	:param session: aiohttp client session
    142 	:param base_url: base URL of the website
    143 	:param html: HTML content of the page
    144 	'''
    145 
    146 	try:
    147 		soup = bs4.BeautifulSoup(html, 'html.parser')
    148 		
    149 		# Try to find favicon in link tags
    150 		favicon_url = None
    151 		for link in soup.find_all('link'):
    152 			if link.get('rel') and any(x.lower() == 'icon' for x in link.get('rel')):
    153 				favicon_url = link.get('href')
    154 				break
    155 		
    156 		if not favicon_url:
    157 			# Try default location
    158 			favicon_url = '/favicon.ico'
    159 		
    160 		# Handle relative URLs
    161 		if favicon_url.startswith('//'):
    162 			favicon_url = 'https:' + favicon_url
    163 		elif favicon_url.startswith('/'):
    164 			favicon_url = base_url + favicon_url
    165 		elif not favicon_url.startswith(('http://', 'https://')):
    166 			favicon_url = base_url + '/' + favicon_url
    167 
    168 		# Get favicon hash
    169 		async with session.get(favicon_url, timeout=10) as response:
    170 			if response.status == 200:
    171 				content    = (await response.read())[:1024*1024]
    172 				hash_value = mmh3.hash64(content)[0]
    173 				if hash_value != 0:
    174 					return str(hash_value)
    175 
    176 	except Exception as e:
    177 		debug(f'Error getting favicon for {base_url}: {str(e)}')
    178 	
    179 	return None
    180 
    181 
    182 def human_size(size_bytes: int) -> str:
    183 	'''
    184 	Convert bytes to human readable string
    185 	
    186 	:param size_bytes: Size in bytes
    187 	'''
    188 
    189 	if not size_bytes:
    190 		return '0B'
    191 	
    192 	units      = ('B', 'KB', 'MB', 'GB')
    193 	size       = float(size_bytes)
    194 	unit_index = 0
    195 	
    196 	while size >= 1024 and unit_index < len(units) - 1:
    197 		size /= 1024
    198 		unit_index += 1
    199 	
    200 	return f'{size:.1f}{units[unit_index]}'
    201 
    202 
    203 def input_generator(input_source: str):
    204 	'''
    205 	Generator function to yield domains from file or stdin
    206 	
    207 	:param input_source: path to file containing domains, or None for stdin
    208 	'''
    209 
    210 	if input_source == '-' or input_source is None:
    211 		for line in sys.stdin:
    212 			if line.strip():
    213 				yield line.strip()
    214 	else:
    215 		with open(input_source, 'r') as f:
    216 			for line in f:
    217 				if line.strip():
    218 					yield line.strip()
    219 
    220 
    221 async def load_resolvers(resolver_file: str = None) -> list:
    222 	'''
    223 	Load DNS resolvers from file or return default resolvers
    224 	
    225 	:param resolver_file: Path to file containing resolver IPs
    226 	:return: List of resolver IPs
    227 	'''
    228 	
    229 	if resolver_file:
    230 		try:
    231 			with open(resolver_file) as f:
    232 				resolvers = [line.strip() for line in f if line.strip()]
    233 			if resolvers:
    234 				return resolvers
    235 		except Exception as e:
    236 			debug(f'Error loading resolvers from {resolver_file}: {str(e)}')
    237 
    238 	else:
    239 		async with aiohttp.ClientSession() as session:
    240 			async with session.get('https://raw.githubusercontent.com/trickest/resolvers/refs/heads/main/resolvers.txt') as response:
    241 				resolvers = await response.text()
    242 				if not SILENT_MODE:
    243 					info(f'Loaded {len(resolvers.splitlines()):,} resolvers.')
    244 				return [resolver.strip() for resolver in resolvers.splitlines()]
    245 
    246 
    247 async def resolve_all_dns(domain: str, timeout: int = 5, nameserver: str = None, check_axfr: bool = False) -> tuple:
    248 	'''
    249 	Resolve all DNS records (NS, A, AAAA, CNAME) for a domain
    250 	
    251 	:param domain: Domain to resolve
    252 	:param timeout: Timeout in seconds
    253 	:param nameserver: Specific nameserver to use
    254 	'''
    255 
    256 	# Create the resolver
    257 	resolver          = dns.asyncresolver.Resolver()
    258 	resolver.lifetime = timeout
    259 	
    260 	# Set the nameserver if provided
    261 	if nameserver:
    262 		resolver.nameservers = [nameserver]
    263 	
    264 	# Do all DNS lookups at once
    265 	results = await asyncio.gather(*[resolver.resolve(domain, rtype) for rtype in ('NS', 'A', 'AAAA', 'CNAME')], return_exceptions=True)
    266 	
    267 	# Parse the results
    268 	nameservers = [str(ns).rstrip('.') for ns in results[0]] if isinstance(results[0], dns.resolver.Answer) else []
    269 	ips         = ([str(ip) for ip in results[1]] if isinstance(results[1], dns.resolver.Answer) else []) + ([str(ip) for ip in results[2]] if isinstance(results[2], dns.resolver.Answer) else [])
    270 	cname       = str(results[3][0].target).rstrip('.') if isinstance(results[3], dns.resolver.Answer) else None
    271 	
    272 	# Get NS IPs
    273 	ns_ips = {}
    274 	if nameservers:
    275 		ns_results = await asyncio.gather(*[resolver.resolve(ns, rtype) for ns in nameservers for rtype in ('A', 'AAAA')], return_exceptions=True)
    276 		for i, ns in enumerate(nameservers):
    277 			ns_ips[ns] = [str(ip) for records in ns_results[i*2:i*2+2] if isinstance(records, dns.resolver.Answer) for ip in records]
    278 
    279 	# Try AXFR if enabled (using already resolved nameserver IPs)
    280 	if check_axfr:
    281 		try:
    282 			# Create the axfrout directory if it doesn't exist
    283 			os.makedirs('axfrout', exist_ok=True)
    284 			
    285 			# Iterate over each nameserver and their IPs
    286 			for ns_host, ips in ns_ips.items():
    287 				for ns_ip in ips:
    288 					try:
    289 						# Perform the AXFR transfer
    290 						zone = dns.zone.from_xfr(dns.query.xfr(ns_ip, domain, lifetime=timeout))
    291 						
    292 						# Write the zone to a file
    293 						with open(f'axfrout/{domain}_{ns_ip}.zone', 'w') as f:
    294 							zone.to_text(f)
    295 
    296 						info(f'{Colors.GREEN}[AXFR SUCCESS] {domain} from {ns_host} ({ns_ip}){Colors.RESET}')
    297 					except Exception as e:
    298 						debug(f'AXFR failed for {domain} from {ns_ip}: {str(e)}')
    299 		except Exception as e:
    300 			debug(f'Failed AXFR for {domain}: {str(e)}')
    301 
    302 	return sorted(set(ips)), cname, nameservers, ns_ips
    303 
    304 
    305 def parse_domain_url(domain: str) -> tuple:
    306 	'''
    307 	Parse domain string into base domain, port, and protocol list
    308 	
    309 	:param domain: Raw domain string to parse
    310 	:return: Tuple of (base_domain, port, protocols)
    311 	'''
    312 
    313 	port = None
    314 	base_domain = domain.rstrip('/')
    315 	
    316 	if base_domain.startswith(('http://', 'https://')):
    317 		protocol = 'https://' if base_domain.startswith('https://') else 'http://'
    318 		base_domain = base_domain.split('://', 1)[1]
    319 		if ':' in base_domain.split('/')[0]:
    320 			base_domain, port_str = base_domain.split(':', 1)
    321 			try:
    322 				port = int(port_str.split('/')[0])
    323 			except ValueError:
    324 				port = 443 if protocol == 'https://' else 80
    325 		else:
    326 			port = 443 if protocol == 'https://' else 80
    327 		protocols = [f'{protocol}{base_domain}{":" + str(port) if port else ""}']
    328 	else:
    329 		if ':' in base_domain.split('/')[0]:
    330 			base_domain, port_str = base_domain.split(':', 1)
    331 			port = int(port_str.split('/')[0]) if port_str.split('/')[0].isdigit() else 443
    332 		else:
    333 			port = 443
    334 		protocols = [
    335 			f'https://{base_domain}{":" + str(port) if port else ""}',
    336 			f'http://{base_domain}{":" + str(port) if port else ""}'
    337 		]
    338 	
    339 	return base_domain, port, protocols
    340 
    341 
    342 async def check_domain(session: aiohttp.ClientSession, domain: str, follow_redirects: bool = False, timeout: int = 5, check_axfr: bool = False, resolvers: list = None) -> dict:
    343 	'''
    344 	Check a single domain for its status code, title, and body preview
    345 	
    346 	:param session: aiohttp client session
    347 	:param domain: domain to check
    348 	:param follow_redirects: whether to follow redirects
    349 	:param timeout: timeout in seconds
    350 	:param check_axfr: whether to check for AXFR
    351 	:param resolvers: list of DNS resolvers to use
    352 	'''
    353 	nameserver = random.choice(resolvers) if resolvers else None
    354 	base_domain, port, protocols = parse_domain_url(domain)
    355 	
    356 	result = {
    357 		'domain'         : base_domain,
    358 		'status'         : 0,
    359 		'title'          : None,
    360 		'body'           : None,
    361 		'content_type'   : None,
    362 		'url'            : protocols[0],
    363 		'port'           : port,
    364 		'ips'            : [],
    365 		'cname'          : None,
    366 		'nameservers'    : [],
    367 		'favicon_hash'   : None,
    368 		'headers'        : {},
    369 		'content_length' : None,
    370 		'redirect_chain' : [],
    371 		'tls'            : None
    372 	}
    373 
    374 	# Do DNS lookups
    375 	result['ips'], result['cname'], result['nameservers'], _ = await resolve_all_dns(base_domain, timeout, nameserver, check_axfr)
    376 
    377 	# Try each protocol
    378 	for url in protocols:
    379 		try:
    380 			async with session.get(url, timeout=timeout, allow_redirects=follow_redirects, max_redirects=10 if follow_redirects else 0) as response:
    381 				result.update({
    382 					'status'         : response.status,
    383 					'url'            : str(response.url),
    384 					'headers'        : dict(response.headers),
    385 					'content_type'   : response.headers.get('content-type', '').split(';')[0],
    386 					'content_length' : response.headers.get('content-length'),
    387 					'redirect_chain' : [str(h.url) for h in response.history] + [str(response.url)] if follow_redirects and response.history else []
    388 				})
    389 
    390 				if response.url.scheme == 'https':
    391 					try:
    392 						if ssl_object := response._protocol.transport.get_extra_info('ssl_object'):
    393 							result['tls'] = await get_cert_info(ssl_object, str(response.url))
    394 					except AttributeError:
    395 						debug(f'Failed to get SSL info for {url}')
    396 
    397 				if response.status == 200:
    398 					html = (await response.text())[:1024*1024]
    399 					soup = bs4.BeautifulSoup(html, 'html.parser')
    400 					result.update({
    401 						'title'        : ' '.join(soup.title.string.strip().split()).rstrip('.')[:300] if soup.title and soup.title.string else None,
    402 						'body'         : ' '.join(soup.get_text().split()).rstrip('.')[:500] if soup.get_text() else None,
    403 						'favicon_hash' : await get_favicon_hash(session, url, html)
    404 					})
    405 					break
    406 		except Exception as e:
    407 			debug(f'Error checking {url}: {str(e)}')
    408 			result['status'] = -1
    409 			continue
    410 
    411 	return result
    412 
    413 
    414 def format_console_output(result: dict, debug: bool = False, show_fields: dict = None, match_codes: set = None, exclude_codes: set = None) -> str:
    415 	'''
    416 	Format the output with colored sections
    417 	
    418 	:param result: Dictionary containing domain check results
    419 	:param debug: Whether to show error states
    420 	:param show_fields: Dictionary of fields to show
    421 	:param match_codes: Set of status codes to match
    422 	:param exclude_codes: Set of status codes to exclude
    423 	'''
    424 
    425 	# Skip errors unless in debug mode
    426 	if result['status'] < 0 and not debug:
    427 		return ''
    428 		
    429 	# Skip if status code doesn't match filters
    430 	if match_codes and result['status'] not in match_codes:
    431 		return ''
    432 	if exclude_codes and result['status'] in exclude_codes:
    433 		return ''
    434 
    435 	parts = []
    436 	
    437 	# Status code
    438 	if show_fields['status_code']:
    439 		if result['status'] < 0:
    440 			status = f"{Colors.RED}[{result['status']}]{Colors.RESET}"
    441 		elif 200 <= result['status'] < 300:
    442 			status = f"{Colors.GREEN}[{result['status']}]{Colors.RESET}"
    443 		elif 300 <= result['status'] < 400:
    444 			status = f"{Colors.YELLOW}[{result['status']}]{Colors.RESET}"
    445 		else: # 400+ and 500+ codes
    446 			status = f"{Colors.RED}[{result['status']}]{Colors.RESET}"
    447 		parts.append(status)
    448 	
    449 	# Domain (always shown)
    450 	parts.append(f"[{result['url']}]")
    451 	
    452 	# Title
    453 	if show_fields['title'] and result['title']:
    454 		parts.append(f"{Colors.DARK_GREEN}[{result['title']}]{Colors.RESET}")
    455 	
    456 	# Body
    457 	if show_fields['body'] and result['body']:
    458 		body = result['body'][:100] + ('...' if len(result['body']) > 100 else '')
    459 		parts.append(f"{Colors.BLUE}[{body}]{Colors.RESET}")
    460 	
    461 	# IPs
    462 	if show_fields['ip'] and result['ips']:
    463 		ips_text = ', '.join(result['ips'])
    464 		parts.append(f"{Colors.YELLOW}[{ips_text}]{Colors.RESET}")
    465 
    466 	# Favicon hash
    467 	if show_fields['favicon'] and result['favicon_hash']:
    468 		parts.append(f"{Colors.PURPLE}[{result['favicon_hash']}]{Colors.RESET}")
    469 
    470 	# Headers (includes content-type and content-length)
    471 	if show_fields['headers'] and result['headers']:
    472 		headers_text = []
    473 		for k, v in result['headers'].items():
    474 			headers_text.append(f"{k}: {v}")
    475 		parts.append(f"{Colors.CYAN}[{', '.join(headers_text)}]{Colors.RESET}")
    476 	else:
    477 		# Only show content-type and content-length if headers aren't shown
    478 		if show_fields['content_type'] and result['content_type']:
    479 			parts.append(f"{Colors.HEADER}[{result['content_type']}]{Colors.RESET}")
    480 		
    481 		if show_fields['content_length'] and result['content_length']:
    482 			try:
    483 				size = human_size(int(result['content_length']))
    484 				parts.append(f"{Colors.PINK}[{size}]{Colors.RESET}")
    485 			except (ValueError, TypeError):
    486 				parts.append(f"{Colors.PINK}[{result['content_length']}]{Colors.RESET}")
    487 	
    488 	# CNAME
    489 	if show_fields['cname'] and result['cname']:
    490 		parts.append(f"{Colors.PURPLE}[CNAME: {result['cname']}]{Colors.RESET}")
    491 	
    492 	# Redirect Chain
    493 	if show_fields['follow_redirects'] and result['redirect_chain']:
    494 		chain = ' -> '.join(result['redirect_chain'])
    495 		parts.append(f"{Colors.YELLOW}[Redirects: {chain}]{Colors.RESET}")
    496 
    497 	# TLS Certificate Info - Modified to always show if available
    498 	if result['tls']:
    499 		cert = result['tls']
    500 		tls_parts = []
    501 		if cert.get('subject'):
    502 			tls_parts.append(f"Subject: {cert['subject']}")
    503 		if cert.get('issuer'):
    504 			tls_parts.append(f"Issuer: {cert['issuer']}")
    505 		if cert.get('fingerprint'):
    506 			tls_parts.append(f"Fingerprint: {cert['fingerprint'][:16]}...")
    507 		if cert.get('alt_names'):
    508 			tls_parts.append(f"SANs: {', '.join(cert['alt_names'][:3])}")
    509 		if cert.get('not_before') and cert.get('not_after'):
    510 			tls_parts.append(f"Valid: {cert['not_before'].split('T')[0]} to {cert['not_after'].split('T')[0]}")
    511 		if cert.get('version'):
    512 			tls_parts.append(f"Version: {cert['version']}")
    513 		if cert.get('serial_number'):
    514 			tls_parts.append(f"Serial: {cert['serial_number'][:16]}...")
    515 		
    516 		parts.append(f"{Colors.GREEN}[{' | '.join(tls_parts)}]{Colors.RESET}")
    517 
    518 	return ' '.join(parts)
    519 
    520 
    521 def parse_status_codes(codes_str: str) -> set:
    522 	'''
    523 	Parse comma-separated status codes and ranges into a set of integers
    524 	
    525 	:param codes_str: Comma-separated status codes (e.g., "200,301-399,404,500-503")
    526 	'''
    527 	codes = set()
    528 	try:
    529 		for part in codes_str.split(','):
    530 			if '-' in part:
    531 				start, end = map(int, part.split('-'))
    532 				codes.update(range(start, end + 1))
    533 			else:
    534 				codes.add(int(part))
    535 		return codes
    536 	except ValueError:
    537 		raise argparse.ArgumentTypeError('Invalid status code format. Use comma-separated numbers or ranges (e.g., 200,301-399,404,500-503)')
    538 
    539 
    540 async def process_domains(input_source: str = None, debug: bool = False, concurrent_limit: int = 100, show_fields: dict = None, output_file: str = None, jsonl: bool = None, timeout: int = 5, match_codes: set = None, exclude_codes: set = None, show_progress: bool = False, check_axfr: bool = False, resolver_file: str = None):
    541 	'''
    542 	Process domains from a file or stdin with concurrent requests
    543 	
    544 	:param input_source: path to file containing domains, or None for stdin
    545 	:param debug: Whether to show error states
    546 	:param concurrent_limit: maximum number of concurrent requests
    547 	:param show_fields: Dictionary of fields to show
    548 	:param output_file: Path to output file (JSONL format)
    549 	:param timeout: Request timeout in seconds
    550 	:param match_codes: Set of status codes to match
    551 	:param exclude_codes: Set of status codes to exclude
    552 	:param show_progress: Whether to show progress counter
    553 	:param check_axfr: Whether to check for AXFR
    554 	:param resolver_file: Path to file containing DNS resolvers
    555 	'''
    556 
    557 	# Check if input file exists
    558 	if input_source and input_source != '-' and not os.path.exists(input_source):
    559 		raise FileNotFoundError(f'Domain file not found: {input_source}')
    560 
    561 	# Initialize tasks and processed domains
    562 	tasks             = set()
    563 	processed_domains = 0
    564 	
    565 	# Load resolvers - await the coroutine
    566 	resolvers = await load_resolvers(resolver_file)
    567 
    568 	async def write_result(result: dict):
    569 		'''Write a single result to the output file'''
    570 
    571 		nonlocal processed_domains
    572 		
    573 		# Create JSON output dict with required fields
    574 		output_dict = {'url': result['url'], 'domain': result['domain'], 'status': result['status'], 'port': result['port']}
    575 		
    576 		# Add optional fields if they exist
    577 		if result['title']:
    578 			output_dict['title'] = result['title']
    579 		if result['body']:
    580 			output_dict['body'] = result['body']
    581 		if result['ips']:
    582 			output_dict['ips'] = result['ips']
    583 		if result['favicon_hash']:
    584 			output_dict['favicon_hash'] = result['favicon_hash']
    585 		if result['headers']:
    586 			output_dict['headers'] = result['headers']
    587 		if result['cname']:
    588 			output_dict['cname'] = result['cname']
    589 		if result['redirect_chain']:
    590 			output_dict['redirect_chain'] = result['redirect_chain']
    591 		if result['tls']:
    592 			output_dict['tls'] = result['tls']
    593 		if result['nameservers']:
    594 			output_dict['nameservers'] = result['nameservers']
    595 
    596 		# Get formatted output based on filters
    597 		formatted = format_console_output(result, debug, show_fields, match_codes, exclude_codes)
    598 		
    599 		if formatted:
    600 			# Write to file if specified
    601 			if output_file:
    602 				if (not match_codes or result['status'] in match_codes) and (not exclude_codes or result['status'] not in exclude_codes):
    603 					with open(output_file, 'a') as f:
    604 						json.dump(output_dict, f, ensure_ascii=False)
    605 						f.write('\n')
    606 			
    607 			# Console output
    608 			if jsonl:
    609 				print(json.dumps(output_dict))
    610 			else:
    611 				processed_domains += 1  # Increment counter for each domain processed
    612 				if show_progress:
    613 					info(f"{Colors.GRAY}[{processed_domains:,}]{Colors.RESET} {formatted}")
    614 
    615 	async with aiohttp.ClientSession(connector=aiohttp.TCPConnector(ssl=False)) as session:
    616 		# Start initial batch of tasks
    617 		for domain in itertools.islice(input_generator(input_source), concurrent_limit):
    618 			task = asyncio.create_task(check_domain(session, domain, follow_redirects=show_fields['follow_redirects'], timeout=timeout, check_axfr=check_axfr, resolvers=resolvers))
    619 			tasks.add(task)
    620 		
    621 		# Process remaining domains, maintaining concurrent_limit active tasks
    622 		domains_iter = input_generator(input_source)
    623 		next(itertools.islice(domains_iter, concurrent_limit, concurrent_limit), None)  # Skip first concurrent_limit domains
    624 		
    625 		for domain in domains_iter:
    626 			done, pending = await asyncio.wait(tasks, return_when=asyncio.FIRST_COMPLETED)
    627 			tasks = pending
    628 			
    629 			for task in done:
    630 				result = await task
    631 				await write_result(result)
    632 			
    633 			task = asyncio.create_task(check_domain(session, domain, follow_redirects=show_fields['follow_redirects'], timeout=timeout, check_axfr=check_axfr, resolvers=resolvers))
    634 			tasks.add(task)
    635 		
    636 		# Wait for remaining tasks
    637 		if tasks:
    638 			done, _ = await asyncio.wait(tasks)
    639 			for task in done:
    640 				result = await task
    641 				await write_result(result)
    642 
    643 
    644 def main():
    645 	'''Main function to handle command line arguments and run the domain checker'''
    646 
    647 	global SILENT_MODE
    648 	
    649 	# Setup argument parser
    650 	parser = argparse.ArgumentParser(description=f'{Colors.GREEN}Hyper-fast HTTP Scraping Tool{Colors.RESET}', formatter_class=argparse.RawDescriptionHelpFormatter)
    651 
    652 	# Add arguments
    653 	parser.add_argument('file', nargs='?', default='-', help='File containing domains to check (one per line), use - for stdin')
    654 	parser.add_argument('-all', '--all-flags', action='store_true', help='Enable all output flags')
    655 	parser.add_argument('-d',   '--debug', action='store_true', help='Show error states and debug information')
    656 	parser.add_argument('-c',   '--concurrent', type=int, default=100, help='Number of concurrent checks')
    657 	parser.add_argument('-j',   '--jsonl', action='store_true', help='Output JSON Lines format to console')
    658 	parser.add_argument('-o',   '--output', help='Output file path (JSONL format)')
    659 	
    660 	# Output field flags
    661 	parser.add_argument('-b',   '--body', action='store_true', help='Show body preview')
    662 	parser.add_argument('-cn',  '--cname', action='store_true', help='Show CNAME records')
    663 	parser.add_argument('-cl',  '--content-length', action='store_true', help='Show content length')
    664 	parser.add_argument('-ct',  '--content-type', action='store_true', help='Show content type')
    665 	parser.add_argument('-f',   '--favicon', action='store_true', help='Show favicon hash')
    666 	parser.add_argument('-fr',  '--follow-redirects', action='store_true', help='Follow redirects (max 10)')
    667 	parser.add_argument('-hr',  '--headers', action='store_true', help='Show response headers')
    668 	parser.add_argument('-i',   '--ip', action='store_true', help='Show IP addresses')
    669 	parser.add_argument('-sc',  '--status-code', action='store_true', help='Show status code')
    670 	parser.add_argument('-ti',  '--title', action='store_true', help='Show page title')
    671 	parser.add_argument('-tls', '--tls-info', action='store_true', help='Show TLS certificate information')
    672 	
    673 	# Other arguments
    674 	parser.add_argument('-ax', '--axfr', action='store_true', help='Try AXFR transfer against nameservers')
    675 	parser.add_argument('-ec', '--exclude-codes', type=parse_status_codes, help='Exclude these status codes (comma-separated, e.g., 404,500)')
    676 	parser.add_argument('-mc', '--match-codes', type=parse_status_codes, help='Only show these status codes (comma-separated, e.g., 200,301,404)')
    677 	parser.add_argument('-p',  '--progress', action='store_true', help='Show progress counter')
    678 	parser.add_argument('-r',  '--resolvers', help='File containing DNS resolvers (one per line)')
    679 	parser.add_argument('-to', '--timeout', type=int, default=5, help='Request timeout in seconds')
    680 	
    681 	# Parse arguments
    682 	args = parser.parse_args()
    683 
    684 	if not (SILENT_MODE := args.jsonl):
    685 		# Setup logging
    686 		if args.debug:
    687 			apv.setup_logging(level='DEBUG', log_to_disk=True, log_file_name='havoc', show_details=True)
    688 			logging.debug('Debug logging enabled')
    689 		else:
    690 			apv.setup_logging(level='INFO')
    691 
    692 		if args.file == '-':
    693 			logging.info('Reading domains from stdin')
    694 		else:
    695 			logging.info(f'Processing file: {args.file}')
    696 
    697 	# Setup show_fields
    698 	show_fields = {
    699 		'status_code'      : args.all_flags or args.status_code,
    700 		'content_type'     : args.all_flags or args.content_type,
    701 		'content_length'   : args.all_flags or args.content_length,
    702 		'title'            : args.all_flags or args.title,
    703 		'body'             : args.all_flags or args.body,
    704 		'ip'               : args.all_flags or args.ip,
    705 		'favicon'          : args.all_flags or args.favicon,
    706 		'headers'          : args.all_flags or args.headers,
    707 		'follow_redirects' : args.all_flags or args.follow_redirects,
    708 		'cname'            : args.all_flags or args.cname,
    709 		'tls'              : args.all_flags or args.tls_info
    710 	}
    711 
    712 	# If no fields specified show all
    713 	if not any(show_fields.values()):
    714 		show_fields = {k: True for k in show_fields}
    715 
    716 	try:
    717 		asyncio.run(process_domains(args.file, args.debug, args.concurrent, show_fields, args.output, args.jsonl, args.timeout, args.match_codes, args.exclude_codes, args.progress, check_axfr=args.axfr, resolver_file=args.resolvers))
    718 	except KeyboardInterrupt:
    719 		logging.warning('Process interrupted by user')
    720 		sys.exit(1)
    721 	except Exception as e:
    722 		logging.error(f'Unexpected error: {str(e)}')
    723 		sys.exit(1)
    724 
    725 
    726 
    727 if __name__ == '__main__':
    728 	main()