2010-02-16 04:09:07 +00:00
|
|
|
# Copyright (C) 2010 Aldo Cortesi
|
2011-07-27 05:47:08 +00:00
|
|
|
#
|
2010-02-16 04:09:07 +00:00
|
|
|
# This program is free software: you can redistribute it and/or modify
|
|
|
|
# it under the terms of the GNU General Public License as published by
|
|
|
|
# the Free Software Foundation, either version 3 of the License, or
|
|
|
|
# (at your option) any later version.
|
2011-07-27 05:47:08 +00:00
|
|
|
#
|
2010-02-16 04:09:07 +00:00
|
|
|
# This program is distributed in the hope that it will be useful,
|
|
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
# GNU General Public License for more details.
|
2011-07-27 05:47:08 +00:00
|
|
|
#
|
2010-02-16 04:09:07 +00:00
|
|
|
# You should have received a copy of the GNU General Public License
|
|
|
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
2012-02-29 00:20:53 +00:00
|
|
|
import re, os, datetime, urlparse, string, urllib
|
|
|
|
import time, functools, cgi, textwrap
|
2011-06-30 01:27:27 +00:00
|
|
|
import json
|
2011-03-07 00:46:02 +00:00
|
|
|
|
|
|
|
def timestamp():
|
2011-03-08 23:18:08 +00:00
|
|
|
"""
|
|
|
|
Returns a serializable UTC timestamp.
|
|
|
|
"""
|
|
|
|
return time.time()
|
2011-03-07 00:46:02 +00:00
|
|
|
|
2010-02-16 04:09:07 +00:00
|
|
|
|
2011-02-03 00:30:47 +00:00
|
|
|
def format_timestamp(s):
|
2011-03-08 23:18:08 +00:00
|
|
|
s = time.localtime(s)
|
2011-03-07 00:46:02 +00:00
|
|
|
d = datetime.datetime.fromtimestamp(time.mktime(s))
|
2011-02-03 00:30:47 +00:00
|
|
|
return d.strftime("%Y-%m-%d %H:%M:%S")
|
|
|
|
|
|
|
|
|
2010-02-16 04:09:07 +00:00
|
|
|
def isBin(s):
|
|
|
|
"""
|
|
|
|
Does this string have any non-ASCII characters?
|
|
|
|
"""
|
|
|
|
for i in s:
|
|
|
|
i = ord(i)
|
|
|
|
if i < 9:
|
|
|
|
return True
|
|
|
|
elif i > 13 and i < 32:
|
|
|
|
return True
|
|
|
|
elif i > 126:
|
|
|
|
return True
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
2011-06-27 03:59:17 +00:00
|
|
|
def isXML(s):
|
|
|
|
for i in s:
|
|
|
|
if i in "\n \t":
|
|
|
|
continue
|
|
|
|
elif i == "<":
|
|
|
|
return True
|
|
|
|
else:
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
2010-02-16 04:09:07 +00:00
|
|
|
def cleanBin(s):
|
|
|
|
parts = []
|
|
|
|
for i in s:
|
|
|
|
o = ord(i)
|
|
|
|
if o > 31 and o < 127:
|
|
|
|
parts.append(i)
|
|
|
|
else:
|
2011-06-27 03:59:17 +00:00
|
|
|
if i not in "\n\r\t":
|
|
|
|
parts.append(".")
|
2010-02-16 04:09:07 +00:00
|
|
|
return "".join(parts)
|
2011-07-27 05:47:08 +00:00
|
|
|
|
2010-02-16 04:09:07 +00:00
|
|
|
|
2011-02-06 01:17:30 +00:00
|
|
|
TAG = r"""
|
|
|
|
<\s*
|
|
|
|
(?!\s*[!"])
|
|
|
|
(?P<close>\s*\/)?
|
|
|
|
(?P<name>\w+)
|
|
|
|
(
|
2011-02-06 03:56:13 +00:00
|
|
|
[^'"\t >]+ |
|
|
|
|
"[^\"]*"['\"]* |
|
|
|
|
'[^']*'['\"]* |
|
|
|
|
\s+
|
2011-02-06 01:17:30 +00:00
|
|
|
)*
|
|
|
|
(?P<selfcont>\s*\/\s*)?
|
|
|
|
\s*>
|
|
|
|
"""
|
|
|
|
UNI = set(["br", "hr", "img", "input", "area", "link"])
|
|
|
|
INDENT = " "*4
|
|
|
|
def pretty_xmlish(s):
|
2011-01-27 04:26:01 +00:00
|
|
|
"""
|
2011-02-06 03:56:13 +00:00
|
|
|
A robust pretty-printer for XML-ish data.
|
2011-02-06 01:17:30 +00:00
|
|
|
Returns a list of lines.
|
2011-01-27 04:26:01 +00:00
|
|
|
"""
|
2011-03-12 22:43:55 +00:00
|
|
|
s = cleanBin(s)
|
2011-02-06 01:17:30 +00:00
|
|
|
data, offset, indent, prev = [], 0, 0, None
|
|
|
|
for i in re.finditer(TAG, s, re.VERBOSE|re.MULTILINE):
|
|
|
|
start, end = i.span()
|
|
|
|
name = i.group("name")
|
|
|
|
if start > offset:
|
|
|
|
txt = []
|
|
|
|
for x in textwrap.dedent(s[offset:start]).split("\n"):
|
|
|
|
if x.strip():
|
|
|
|
txt.append(indent*INDENT + x)
|
|
|
|
data.extend(txt)
|
|
|
|
if i.group("close") and not (name in UNI and name==prev):
|
|
|
|
indent = max(indent - 1, 0)
|
|
|
|
data.append(indent*INDENT + i.group().strip())
|
|
|
|
offset = end
|
|
|
|
if not any([i.group("close"), i.group("selfcont"), name in UNI]):
|
|
|
|
indent += 1
|
|
|
|
prev = name
|
2011-02-06 03:56:13 +00:00
|
|
|
trail = s[offset:]
|
|
|
|
if trail.strip():
|
|
|
|
data.append(s[offset:])
|
2011-02-06 01:17:30 +00:00
|
|
|
return data
|
2011-01-27 04:26:01 +00:00
|
|
|
|
|
|
|
|
2011-06-30 01:27:27 +00:00
|
|
|
def pretty_json(s):
|
|
|
|
try:
|
|
|
|
p = json.loads(s)
|
|
|
|
except ValueError:
|
|
|
|
return None
|
|
|
|
return json.dumps(p, sort_keys=True, indent=4).split("\n")
|
|
|
|
|
|
|
|
|
2011-07-15 04:16:43 +00:00
|
|
|
def urldecode(s):
|
2012-02-09 03:40:31 +00:00
|
|
|
"""
|
|
|
|
Takes a urlencoded string and returns a list of (key, value) tuples.
|
|
|
|
"""
|
2011-07-15 04:16:43 +00:00
|
|
|
return cgi.parse_qsl(s)
|
|
|
|
|
|
|
|
|
2012-02-09 03:40:31 +00:00
|
|
|
def urlencode(s):
|
|
|
|
"""
|
|
|
|
Takes a list of (key, value) tuples and returns a urlencoded string.
|
|
|
|
"""
|
2012-03-18 21:12:06 +00:00
|
|
|
s = [tuple(i) for i in s]
|
2012-02-09 03:40:31 +00:00
|
|
|
return urllib.urlencode(s, False)
|
|
|
|
|
|
|
|
|
2010-02-16 04:09:07 +00:00
|
|
|
def hexdump(s):
|
|
|
|
"""
|
|
|
|
Returns a set of typles:
|
|
|
|
(offset, hex, str)
|
|
|
|
"""
|
|
|
|
parts = []
|
|
|
|
for i in range(0, len(s), 16):
|
|
|
|
o = "%.10x"%i
|
|
|
|
part = s[i:i+16]
|
2011-08-18 21:20:38 +00:00
|
|
|
x = " ".join("%.2x"%ord(i) for i in part)
|
2010-02-16 04:09:07 +00:00
|
|
|
if len(part) < 16:
|
|
|
|
x += " "
|
2011-08-18 21:20:38 +00:00
|
|
|
x += " ".join(" " for i in range(16 - len(part)))
|
2010-02-16 04:09:07 +00:00
|
|
|
parts.append(
|
|
|
|
(o, x, cleanBin(part))
|
|
|
|
)
|
|
|
|
return parts
|
|
|
|
|
|
|
|
|
2011-08-02 04:52:47 +00:00
|
|
|
def del_all(dict, keys):
|
|
|
|
for key in keys:
|
|
|
|
if key in dict:
|
|
|
|
del dict[key]
|
2010-11-17 11:03:42 +00:00
|
|
|
|
|
|
|
|
2010-02-16 04:09:07 +00:00
|
|
|
def pretty_size(size):
|
|
|
|
suffixes = [
|
|
|
|
("B", 2**10),
|
|
|
|
("kB", 2**20),
|
|
|
|
("M", 2**30),
|
|
|
|
]
|
|
|
|
for suf, lim in suffixes:
|
|
|
|
if size >= lim:
|
|
|
|
continue
|
|
|
|
else:
|
|
|
|
x = round(size/float(lim/2**10), 2)
|
|
|
|
if x == int(x):
|
|
|
|
x = int(x)
|
|
|
|
return str(x) + suf
|
|
|
|
|
|
|
|
|
|
|
|
class Data:
|
|
|
|
def __init__(self, name):
|
|
|
|
m = __import__(name)
|
|
|
|
dirname, _ = os.path.split(m.__file__)
|
|
|
|
self.dirname = os.path.abspath(dirname)
|
|
|
|
|
|
|
|
def path(self, path):
|
|
|
|
"""
|
|
|
|
Returns a path to the package data housed at 'path' under this
|
|
|
|
module.Path can be a path to a file, or to a directory.
|
|
|
|
|
|
|
|
This function will raise ValueError if the path does not exist.
|
|
|
|
"""
|
|
|
|
fullpath = os.path.join(self.dirname, path)
|
|
|
|
if not os.path.exists(fullpath):
|
|
|
|
raise ValueError, "dataPath: %s does not exist."%fullpath
|
|
|
|
return fullpath
|
2011-08-02 04:14:33 +00:00
|
|
|
pkg_data = Data(__name__)
|
2010-02-16 04:09:07 +00:00
|
|
|
|
|
|
|
|
2011-03-15 00:05:33 +00:00
|
|
|
class LRUCache:
|
|
|
|
"""
|
|
|
|
A decorator that implements a self-expiring LRU cache for class
|
|
|
|
methods (not functions!).
|
|
|
|
|
|
|
|
Cache data is tracked as attributes on the object itself. There is
|
|
|
|
therefore a separate cache for each object instance.
|
|
|
|
"""
|
|
|
|
def __init__(self, size=100):
|
|
|
|
self.size = size
|
|
|
|
|
|
|
|
def __call__(self, f):
|
|
|
|
cacheName = "_cached_%s"%f.__name__
|
|
|
|
cacheListName = "_cachelist_%s"%f.__name__
|
|
|
|
size = self.size
|
|
|
|
|
|
|
|
@functools.wraps(f)
|
|
|
|
def wrap(self, *args):
|
|
|
|
if not hasattr(self, cacheName):
|
|
|
|
setattr(self, cacheName, {})
|
|
|
|
setattr(self, cacheListName, [])
|
|
|
|
cache = getattr(self, cacheName)
|
|
|
|
cacheList = getattr(self, cacheListName)
|
|
|
|
if cache.has_key(args):
|
|
|
|
cacheList.remove(args)
|
|
|
|
cacheList.insert(0, args)
|
|
|
|
return cache[args]
|
|
|
|
else:
|
|
|
|
ret = f(self, *args)
|
|
|
|
cacheList.insert(0, args)
|
|
|
|
cache[args] = ret
|
|
|
|
if len(cacheList) > size:
|
|
|
|
d = cacheList.pop()
|
|
|
|
cache.pop(d)
|
|
|
|
return ret
|
|
|
|
return wrap
|
2011-08-03 10:38:23 +00:00
|
|
|
|
|
|
|
|
|
|
|
def parse_url(url):
|
|
|
|
"""
|
|
|
|
Returns a (scheme, host, port, path) tuple, or None on error.
|
|
|
|
"""
|
|
|
|
scheme, netloc, path, params, query, fragment = urlparse.urlparse(url)
|
|
|
|
if not scheme:
|
|
|
|
return None
|
|
|
|
if ':' in netloc:
|
2011-09-04 19:47:47 +00:00
|
|
|
host, port = string.rsplit(netloc, ':', maxsplit=1)
|
|
|
|
try:
|
|
|
|
port = int(port)
|
|
|
|
except ValueError:
|
|
|
|
return None
|
2011-08-03 10:38:23 +00:00
|
|
|
else:
|
|
|
|
host = netloc
|
|
|
|
if scheme == "https":
|
|
|
|
port = 443
|
|
|
|
else:
|
|
|
|
port = 80
|
|
|
|
path = urlparse.urlunparse(('', '', path, params, query, fragment))
|
|
|
|
if not path.startswith("/"):
|
|
|
|
path = "/" + path
|
|
|
|
return scheme, host, port, path
|
|
|
|
|
|
|
|
|
2012-02-18 01:45:22 +00:00
|
|
|
def parse_proxy_spec(url):
|
|
|
|
p = parse_url(url)
|
2012-02-18 03:29:02 +00:00
|
|
|
if not p or not p[1]:
|
2012-02-18 01:45:22 +00:00
|
|
|
return None
|
|
|
|
return p[:3]
|
|
|
|
|
|
|
|
|
2012-02-18 03:27:09 +00:00
|
|
|
def hostport(scheme, host, port):
|
|
|
|
"""
|
|
|
|
Returns the host component, with a port specifcation if needed.
|
|
|
|
"""
|
|
|
|
if (port, scheme) in [(80, "http"), (443, "https")]:
|
|
|
|
return host
|
|
|
|
else:
|
|
|
|
return "%s:%s"%(host, port)
|
|
|
|
|
|
|
|
|
|
|
|
def unparse_url(scheme, host, port, path=""):
|
|
|
|
"""
|
|
|
|
Returns a URL string, constructed from the specified compnents.
|
|
|
|
"""
|
|
|
|
return "%s://%s%s"%(scheme, hostport(scheme, host, port), path)
|
|
|
|
|
|
|
|
|
2012-02-08 05:25:00 +00:00
|
|
|
def clean_hanging_newline(t):
|
|
|
|
"""
|
|
|
|
Many editors will silently add a newline to the final line of a
|
|
|
|
document (I'm looking at you, Vim). This function fixes this common
|
|
|
|
problem at the risk of removing a hanging newline in the rare cases
|
|
|
|
where the user actually intends it.
|
|
|
|
"""
|
|
|
|
if t[-1] == "\n":
|
|
|
|
return t[:-1]
|
|
|
|
return t
|
|
|
|
|
|
|
|
|
2011-09-09 03:27:31 +00:00
|
|
|
def parse_size(s):
|
|
|
|
"""
|
|
|
|
Parses a size specification. Valid specifications are:
|
2012-01-20 23:43:00 +00:00
|
|
|
|
2011-09-09 03:27:31 +00:00
|
|
|
123: bytes
|
|
|
|
123k: kilobytes
|
|
|
|
123m: megabytes
|
|
|
|
123g: gigabytes
|
|
|
|
"""
|
|
|
|
if not s:
|
|
|
|
return None
|
|
|
|
mult = None
|
|
|
|
if s[-1].lower() == "k":
|
|
|
|
mult = 1024**1
|
|
|
|
elif s[-1].lower() == "m":
|
|
|
|
mult = 1024**2
|
|
|
|
elif s[-1].lower() == "g":
|
|
|
|
mult = 1024**3
|
|
|
|
|
|
|
|
if mult:
|
|
|
|
s = s[:-1]
|
|
|
|
else:
|
|
|
|
mult = 1
|
|
|
|
try:
|
|
|
|
return int(s) * mult
|
|
|
|
except ValueError:
|
|
|
|
raise ValueError("Invalid size specification: %s"%s)
|