dickserv

- irc bot with many useful commands
git clone git://git.acid.vegas/dickserv.git
Log | Files | Refs | Archive | README | LICENSE

httplib.py (1744B)

      1 #!/usr/bin/env python
      2 # DickServ IRC Bot - Developed by acidvegas in Python (https://acid.vegas/dickserv)
      3 # httplib.py
      4 
      5 import json
      6 import os
      7 import re
      8 import urllib.parse
      9 import urllib.request
     10 
     11 from bs4 import BeautifulSoup
     12 
     13 def clean_url(url):
     14     for prefix in ('https://', 'http://', 'www.'):
     15         if url.startswith(prefix):
     16             url = url[len(prefix):]
     17     if url[-1:] == '/':
     18         url = url[:-1]
     19     return url
     20 
     21 def data_quote(data):
     22     return urllib.parse.quote(data)
     23 
     24 def data_encode(data):
     25     return urllib.parse.urlencode(data)
     26 
     27 def get_file(url):
     28     return os.path.basename(url)
     29 
     30 def get_json(url):
     31     return json.loads(get_source(url))
     32 
     33 def get_size(url):
     34     content_length = int(get_url(url).getheader('content-length'))
     35     for unit in ('B','KB','MB','GB','TB','PB','EB','ZB'):
     36         if abs(content_length) < 1024.0:
     37             return '{0:.2f}'.format(content_length) + unit
     38         content_length /= 1024.0
     39     return '{0:.2f}'.format(content_length) + 'YB'
     40 
     41 def get_source(url):
     42     source  = get_url(url)
     43     charset = source.headers.get_content_charset()
     44     if charset:
     45         return source.read().decode(charset)
     46     else:
     47         return source.read().decode()
     48 
     49 def get_title(url):
     50     source = get_source(url)
     51     soup   = BeautifulSoup(source, 'html.parser')
     52     return ' '.join(soup.title.string.split())
     53 
     54 def get_type(url):
     55     return get_url(url).info().get_content_type()
     56 
     57 def get_url(url):
     58     req = urllib.request.Request(url)
     59     req.add_header('User-Agent', 'DickServ/1.0')
     60     return urllib.request.urlopen(req, timeout=10)
     61 
     62 def parse_urls(data):
     63     return re.compile('(?:http[s]?:\/\/|www.)(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', re.IGNORECASE).findall(data)