proxytools

- collection of scripts for harvesting & testing proxies
git clone git://git.acid.vegas/proxytools.git
Log | Files | Refs | Archive | README | LICENSE

commit faa18045476416d1b0556820e4a940d5fd37553a
Author: acidvegas <acid.vegas@acid.vegas>
Date: Fri, 28 Jun 2019 01:40:22 -0400

Initial commit

Diffstat:
ALICENSE | 16++++++++++++++++
AREADME.md | 9+++++++++
Aproxytools/cleansocks.py | 71+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Aproxytools/floodbl.py | 56++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Aproxytools/sockspot.py | 121+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

5 files changed, 273 insertions(+), 0 deletions(-)

diff --git a/LICENSE b/LICENSE
@@ -0,0 +1,15 @@
+ISC License
+
+Copyright (c) 2019, acidvegas <acid.vegas@acid.vegas>
+
+Permission to use, copy, modify, and/or distribute this software for any
+purpose with or without fee is hereby granted, provided that the above
+copyright notice and this permission notice appear in all copies.
+
+THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+\ No newline at end of file
diff --git a/README.md b/README.md
@@ -0,0 +1,9 @@
+###### Requirements
+* [Python](https://www.python.org/downloads/) *(**Note:** This script was developed to be used with the latest version of Python)*
+* [PySocks](https://pypi.python.org/pypi/PySocks) *(Required for cleansocks.py)*
+
+###### Mirrors
+- [acid.vegas](https://acid.vegas/proxytools) *(main)*
+- [SuperNETs](https://git.supernets.org/acidvegas/proxytools)
+- [GitHub](https://github.com/acidvegas/proxytools)
+- [GitLab](https://gitlab.com/acidvegas/proxytools)
diff --git a/proxytools/cleansocks.py b/proxytools/cleansocks.py
@@ -0,0 +1,70 @@
+#!/usr/bin/env python
+# CleanSocks - Developed by acidvegas in Python (https://acid.vegas/proxytools)
+
+'''
+Requirements:
+	PySocks (https://pypi.python.org/pypi/pysocks)
+
+This script will clean a list of proxies by removing duplicates, checking for valid formats (IP:PORT), and testing if the proxies are working
+'''
+
+import argparse
+import concurrent.futures
+import os
+import re
+import sys
+
+sys.dont_write_bytecode = True
+
+def is_proxy(proxy):
+	return re.match('^(([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\.){3}([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5]):(?:6553[0-5]|655[0-2][0-9]|65[0-4][0-9]{2}|6[0-4][0-9]{3}|[1-5][0-9]{4}|[1-9][0-9]{1,3}|[0-9])$', proxy)
+
+def test_proxy(proxy):
+	global good
+	ip, port = proxy.split(':')
+	try:
+		sock = socks.socksocket()
+		sock.set_proxy(socks.SOCKS5, ip, int(port))
+		sock.settimeout(args.timeout)
+		sock.connect(('www.google.com', 80))
+	except:
+		print('BAD  | ' + proxy)
+	else:
+		print('GOOD | ' + proxy)
+		good.append(proxy)
+	finally:
+		sock.close()
+
+parser = argparse.ArgumentParser(usage='%(prog)s <input> <output> [options]')
+parser.add_argument('input',           help='file to scan')
+parser.add_argument('output',          help='file to output')
+parser.add_argument('-t', '--threads', help='number of threads      (default: 100)', default=100, type=int)
+parser.add_argument('-x', '--timeout', help='socket timeout seconds (default: 15)',  default=15,  type=int)
+args = parser.parse_args()
+try:
+	import socks
+except ImportError:
+	raise SystemExit('missing pysocks module (https://pypi.python.org/pypi/pysocks)')
+if not os.path.isfile(args.input):
+	raise SystemExit('no such input file')
+proxies = set([line.strip() for line in open(args.input).readlines() if is_proxy(line)])
+if not proxies:
+	raise SystemExit('no proxies found from input file')
+deduped, ips = list(), list()
+for proxy in proxies:
+	ip = proxy.split(':')[0]
+	if ip not in ips:
+		ips.append(ip)
+		deduped.append(proxy)
+deduped.sort()
+good = list()
+with concurrent.futures.ThreadPoolExecutor(max_workers=args.threads) as executor:
+	checks = {executor.submit(test_proxy, proxy): proxy for proxy in deduped}
+	for future in concurrent.futures.as_completed(checks):
+		checks[future]
+good.sort()
+with open(args.output, 'w') as output_file:
+	output_file.write('\n'.join(good))
+print('Total : ' + format(len(proxies),           ',d'))
+print('Good  : ' + format(len(good),              ',d'))
+print('Bad   : ' + format(len(proxies)-len(good), ',d'))
+\ No newline at end of file
diff --git a/proxytools/floodbl.py b/proxytools/floodbl.py
@@ -0,0 +1,56 @@
+#!/usr/bin/env python
+# FloodBL - Developed by acidvegas in Python (https://acid.vegas/proxytools)
+
+'''
+This script will test proxies against a set of Domain Name System-based Blackhole Lists (DNSBL) or Real-time Blackhole Lists (RBL)
+'''
+
+import argparse
+import concurrent.futures
+import os
+import re
+import socket
+
+dnsbls = ('dnsbl.dronebl.org','rbl.efnetrbl.org','torexit.dan.me.uk')
+
+def dnsbl_check(proxy):
+	global good
+	bad = False
+	ip  = proxy.split(':')[0]
+	formatted_ip = '.'.join(ip.split('.')[::-1])
+	for dnsbl in dnsbls:
+		try:
+			socket.gethostbyname(f'{formatted_ip}.{dnsbl}')
+		except socket.gaierror:
+			pass
+		else:
+			bad = True
+			break
+	if bad:
+		print('BAD  | ' + ip)
+	else:
+		good.append(proxy)
+		print('GOOD | ' + ip)
+
+# Main
+parser = argparse.ArgumentParser(usage='%(prog)s <input> <output> [options]')
+parser.add_argument('input',           help='file to scan')
+parser.add_argument('output',          help='file to output')
+parser.add_argument('-t', '--threads', help='number of threads (default: 100)', default=100, type=int)
+args = parser.parse_args()
+if not os.path.isfile(args.input):
+	raise SystemExit('no such input file')
+proxies = re.findall('[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+:[0-9]+', open(args.input).read(), re.MULTILINE)
+if not proxies:
+	raise SystemExit('no proxies found from input file')
+good = list()
+with concurrent.futures.ThreadPoolExecutor(max_workers=args.threads) as executor:
+	checks = {executor.submit(dnsbl_check, proxy): proxy for proxy in proxies}
+	for future in concurrent.futures.as_completed(checks):
+		checks[future]
+good.sort()
+with open(args.output, 'w') as output_file:
+	output_file.write('\n'.join(good))
+print('Total : ' + format(len(proxies),           ',d'))
+print('Good  : ' + format(len(good),              ',d'))
+print('Bad   : ' + format(len(proxies)-len(good), ',d'))
diff --git a/proxytools/sockspot.py b/proxytools/sockspot.py
@@ -0,0 +1,121 @@
+#!/usr/bin/env python
+# SockSpot - Developed by acidvegas in Python (https://acid.vegas/proxytools)
+
+import datetime
+import json
+import base64
+import os
+import re
+import threading
+import time
+import urllib.request
+
+# Blogspot URLs
+blogspot_list = (
+	'live-socks.net',
+	'newfreshproxies-24.blogspot.sg',
+	'proxyserverlist-24.blogspot.sg',
+	'socks24.org',
+	'sock5us.blogspot.com',
+	'sockproxy.blogspot.com',
+	'socksproxylist24.blogspot.com',
+	'newsocks.info',
+	'socksecurelist.ca',
+	'canada-socks247.com',
+	'sock5us.blogspot.com',
+	'socks24.org',
+	'sslproxies24.blogspot.com',
+	'vip-socks24.blogspot.com'
+)
+
+# Settings
+max_results = 100 # Maximum number of results per-page.
+post_depth  = 1   # How many days back from the current date to pull posts from. (1 = Today Only)
+timeout     = 30  # Timeout for HTTP requests.
+
+# Globals
+proxy_file = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'proxies.txt')
+proxy_list = list()
+threads    = dict()
+
+def debug(msg):
+	print(f'{get_time()} | [~] - {msg}')
+
+def error(msg, reason):
+	print(f'{get_time()} | [!] - {msg} ({reason})')
+
+def get_time():
+	return time.strftime('%I:%M:%S')
+
+def get_date():
+	date = datetime.datetime.today()
+	return '{0}-{1:02d}-{2:02d}'.format(date.year, date.month, date.day)
+
+def get_date_range():
+	date_range = datetime.datetime.today() - datetime.timedelta(days=post_depth)
+	return '{0}-{1:02d}-{2:02d}'.format(date_range.year, date_range.month, date_range.day)
+
+def get_source(url):
+	req = urllib.request.Request(url)
+	req.add_header('User-Agent', 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)')
+	source  = urllib.request.urlopen(req, timeout=timeout)
+	charset = source.headers.get_content_charset()
+	if charset:
+		return source.read().decode(charset)
+	else:
+		return source.read().decode()
+
+def parse_blogspot(url):
+	global proxy_list
+	try:
+		source = json.loads(get_source(f'http://{url}/feeds/posts/default?max-results={max_results}&alt=json&updated-min={get_date_range()}T00:00:00&updated-max={get_date()}T23:59:59&orderby=updated'))
+		found  = []
+		if source['feed'].get('entry'):
+			for item in source['feed']['entry']:
+				data    = item['content']['$t']
+				proxies = re.findall('[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+:[0-9]+', data, re.MULTILINE)
+				if proxies:
+					found      += proxies
+					proxy_list += proxies
+			debug('Found {0} proxies on {1}'.format(format(len(found), ',d'), url))
+		else:
+			error('No posts found on page!', url)
+	except Exception as ex:
+		proxy_value = ex
+
+def scan_blogspots():
+	for url in blogspot_list:
+		threads[url] = threading.Thread(target=parse_blogspot, args=(url,))
+	for thread in threads:
+		threads[thread].start()
+		time.sleep(10)
+	for thread in threads:
+		threads[thread].join()
+	debug('Found {0} total proxies!'.format(format(len(proxy_list), ',d')))
+	with open (proxy_file, 'w') as proxy__file:
+		for proxy in proxy_list:
+			proxy__file.write(proxy + '\n')
+
+# Main
+print('#'*56)
+print('#{0}#'.format(''.center(54)))
+print('#{0}#'.format('SockSpot Proxy Scraper'.center(54)))
+print('#{0}#'.format('Developed by acidvegas in Python'.center(54)))
+print('#{0}#'.format('https://acid.vegas/proxytools'.center(54)))
+print('#{0}#'.format(''.center(54)))
+print('#'*56)
+debug(f'Scanning {len(blogspot_list)} URLs from list...')
+threading.Thread(target=scan_blogspots).start()
+for url in blogspot_list:
+	threads[url] = threading.Thread(target=parse_blogspot, args=(url,))
+for thread in threads:
+	threads[thread].start()
+	time.sleep(10)
+for thread in threads:
+	threads[thread].join()
+if proxy_value = 0:
+	error('no socks found')
+debug('Found {0} total proxies!'.format(format(len(proxy_list), ',d')))
+with open (proxy_file, 'w') as proxy__file:
+	for proxy in proxy_list:
+		proxy__file.write(proxy + '\n')