2019-09-10 14:58:26 +00:00
|
|
|
# -*- coding: utf-8 -*-
|
|
|
|
|
2019-11-07 16:05:52 +00:00
|
|
|
from six import binary_type, string_types, StringIO
|
2019-11-07 16:45:45 +00:00
|
|
|
from slugify import slugify
|
2019-10-14 12:43:35 +00:00
|
|
|
|
2019-10-03 16:45:22 +00:00
|
|
|
import base64
|
2019-11-07 16:05:52 +00:00
|
|
|
import csv
|
2019-09-10 14:58:26 +00:00
|
|
|
import errno
|
|
|
|
import json
|
|
|
|
import os
|
2019-10-14 12:47:11 +00:00
|
|
|
import re
|
2019-09-10 14:58:26 +00:00
|
|
|
import requests
|
2019-09-23 12:13:52 +00:00
|
|
|
import xmltodict
|
2019-09-20 14:21:04 +00:00
|
|
|
import toml
|
2019-09-17 09:50:06 +00:00
|
|
|
import yaml
|
2019-09-10 14:58:26 +00:00
|
|
|
|
2019-10-14 12:43:35 +00:00
|
|
|
try:
|
|
|
|
# python 3
|
2019-10-14 14:32:01 +00:00
|
|
|
from urllib.parse import unquote
|
|
|
|
from urllib.parse import unquote_plus
|
2019-10-14 12:47:11 +00:00
|
|
|
from urllib.parse import urlencode
|
|
|
|
from urllib.parse import parse_qs
|
2019-10-14 12:43:35 +00:00
|
|
|
except ImportError:
|
|
|
|
# python 2
|
2019-10-14 14:32:01 +00:00
|
|
|
from urllib import unquote
|
|
|
|
from urllib import unquote_plus
|
2019-10-14 12:47:11 +00:00
|
|
|
from urllib import urlencode
|
|
|
|
from urlparse import parse_qs
|
2019-10-14 12:43:35 +00:00
|
|
|
|
|
|
|
|
2019-11-07 16:45:45 +00:00
|
|
|
def decode(s, format, **kwargs):
|
|
|
|
decode_func = _get_format_decoder(format)
|
|
|
|
if decode_func:
|
|
|
|
decode_opts = kwargs.copy()
|
|
|
|
data = decode_func(s.strip(), **decode_opts)
|
|
|
|
return data
|
|
|
|
else:
|
|
|
|
raise ValueError('Invalid format: {}.'.format(format))
|
|
|
|
|
|
|
|
|
2019-10-03 16:45:22 +00:00
|
|
|
def decode_base64(s, **kwargs):
|
2019-10-14 12:43:35 +00:00
|
|
|
# fix urlencoded chars
|
2019-10-14 14:32:01 +00:00
|
|
|
s = unquote(s)
|
2019-10-14 12:43:35 +00:00
|
|
|
# fix padding
|
|
|
|
m = len(s) % 4
|
|
|
|
if m != 0:
|
|
|
|
s += '=' * (4 - m)
|
2019-10-29 15:22:24 +00:00
|
|
|
data = base64.b64decode(s)
|
2019-11-07 16:45:45 +00:00
|
|
|
subformat = kwargs.pop('subformat', None)
|
|
|
|
encoding = kwargs.pop('encoding', 'utf-8' if subformat else None)
|
2019-10-14 14:32:01 +00:00
|
|
|
if encoding:
|
2019-10-29 15:22:24 +00:00
|
|
|
data = data.decode(encoding)
|
2019-11-07 16:45:45 +00:00
|
|
|
if subformat:
|
|
|
|
decode_func = _get_format_decoder(subformat)
|
2019-10-29 15:22:24 +00:00
|
|
|
if decode_func:
|
|
|
|
data = decode_func(data, **kwargs)
|
2019-10-14 12:43:35 +00:00
|
|
|
return data
|
2019-10-03 16:45:22 +00:00
|
|
|
|
2019-09-10 14:58:26 +00:00
|
|
|
|
2019-11-07 16:05:52 +00:00
|
|
|
def decode_csv(s, **kwargs):
|
|
|
|
# kwargs.setdefault('delimiter', ',')
|
|
|
|
if kwargs.pop('quote', False):
|
|
|
|
kwargs.setdefault('quoting', csv.QUOTE_ALL)
|
|
|
|
columns = kwargs.pop('columns', None)
|
|
|
|
columns_row = kwargs.pop('columns_row', True)
|
|
|
|
f = StringIO(s)
|
|
|
|
r = csv.reader(f, **kwargs)
|
|
|
|
ln = 0
|
|
|
|
data = []
|
|
|
|
for row in r:
|
|
|
|
if ln == 0 and columns_row:
|
|
|
|
if not columns:
|
|
|
|
columns = row
|
|
|
|
ln += 1
|
|
|
|
continue
|
|
|
|
d = dict(zip(columns, row))
|
|
|
|
data.append(d)
|
|
|
|
ln += 1
|
|
|
|
return data
|
|
|
|
|
|
|
|
|
2019-09-10 14:58:26 +00:00
|
|
|
def decode_json(s, **kwargs):
|
|
|
|
data = json.loads(s, **kwargs)
|
2019-09-17 09:50:06 +00:00
|
|
|
return data
|
|
|
|
|
|
|
|
|
2019-10-14 12:47:11 +00:00
|
|
|
def decode_query_string(s, **kwargs):
|
|
|
|
flat = kwargs.pop('flat', True)
|
|
|
|
qs_re = r'^(([\w\-\%\+]+\=[\w\-\%\+]*)+([\&]{1})?)+'
|
|
|
|
qs_pattern = re.compile(qs_re)
|
|
|
|
if qs_pattern.match(s):
|
|
|
|
data = parse_qs(s)
|
|
|
|
if flat:
|
|
|
|
data = { key:value[0] for key, value in data.items() }
|
|
|
|
return data
|
|
|
|
else:
|
|
|
|
raise ValueError('Invalid query string: {}'.format(s))
|
|
|
|
|
|
|
|
|
2019-09-23 12:13:52 +00:00
|
|
|
def decode_xml(s, **kwargs):
|
|
|
|
kwargs.setdefault('dict_constructor', dict)
|
|
|
|
data = xmltodict.parse(s, **kwargs)
|
|
|
|
return data
|
|
|
|
|
|
|
|
|
2019-09-20 14:21:04 +00:00
|
|
|
def decode_toml(s, **kwargs):
|
|
|
|
data = toml.loads(s, **kwargs)
|
|
|
|
return data
|
|
|
|
|
|
|
|
|
2019-09-17 09:50:06 +00:00
|
|
|
def decode_yaml(s, **kwargs):
|
|
|
|
kwargs.setdefault('Loader', yaml.Loader)
|
|
|
|
data = yaml.load(s, **kwargs)
|
|
|
|
return data
|
2019-09-10 14:58:26 +00:00
|
|
|
|
|
|
|
|
2019-11-07 16:45:45 +00:00
|
|
|
def encode(d, format, **kwargs):
|
|
|
|
encode_func = _get_format_encoder(format)
|
|
|
|
if encode_func:
|
|
|
|
s = encode_func(d, **kwargs)
|
|
|
|
return s
|
|
|
|
else:
|
|
|
|
raise ValueError('Invalid format: {}.'.format(format))
|
|
|
|
|
|
|
|
|
2019-10-03 16:45:22 +00:00
|
|
|
def encode_base64(d, **kwargs):
|
2019-10-29 15:22:24 +00:00
|
|
|
data = d
|
2019-11-07 16:45:45 +00:00
|
|
|
subformat = kwargs.pop('subformat', None)
|
|
|
|
encoding = kwargs.pop('encoding', 'utf-8' if subformat else None)
|
|
|
|
if not isinstance(data, string_types) and subformat:
|
|
|
|
encode_func = _get_format_encoder(subformat)
|
2019-10-14 12:43:35 +00:00
|
|
|
if encode_func:
|
2019-10-29 15:22:24 +00:00
|
|
|
data = encode_func(data, **kwargs)
|
|
|
|
if isinstance(data, string_types) and encoding:
|
2019-10-14 12:43:35 +00:00
|
|
|
data = data.encode(encoding)
|
2019-10-29 15:22:24 +00:00
|
|
|
data = base64.b64encode(data)
|
|
|
|
if isinstance(data, binary_type) and encoding:
|
|
|
|
data = data.decode(encoding)
|
2019-10-03 16:45:22 +00:00
|
|
|
return data
|
|
|
|
|
|
|
|
|
2019-11-07 16:05:52 +00:00
|
|
|
def encode_csv(l, **kwargs):
|
|
|
|
# kwargs.setdefault('delimiter', ',')
|
|
|
|
if kwargs.pop('quote', False):
|
|
|
|
kwargs.setdefault('quoting', csv.QUOTE_ALL)
|
|
|
|
kwargs.setdefault('lineterminator', '\n')
|
|
|
|
columns = kwargs.pop('columns', None)
|
|
|
|
columns_row = kwargs.pop('columns_row', True)
|
|
|
|
if not columns and len(l) and isinstance(l[0], dict):
|
|
|
|
keys = [str(key) for key in l[0].keys()]
|
|
|
|
columns = list(sorted(keys))
|
|
|
|
f = StringIO()
|
|
|
|
w = csv.writer(f, **kwargs)
|
|
|
|
if columns_row and columns:
|
|
|
|
w.writerow(columns)
|
|
|
|
for item in l:
|
|
|
|
if isinstance(item, dict):
|
|
|
|
row = [item.get(key, '') for key in columns]
|
|
|
|
elif isinstance(item, (list, tuple, set, )):
|
|
|
|
row = item
|
|
|
|
else:
|
|
|
|
row = [item]
|
|
|
|
w.writerow(row)
|
|
|
|
data = f.getvalue()
|
|
|
|
return data
|
|
|
|
|
|
|
|
|
2019-09-10 14:58:26 +00:00
|
|
|
def encode_json(d, **kwargs):
|
2019-09-17 09:50:06 +00:00
|
|
|
data = json.dumps(d, **kwargs)
|
|
|
|
return data
|
|
|
|
|
|
|
|
|
2019-10-14 12:47:11 +00:00
|
|
|
def encode_query_string(d, **kwargs):
|
|
|
|
data = urlencode(d, **kwargs)
|
|
|
|
return data
|
|
|
|
|
|
|
|
|
2019-09-20 14:21:04 +00:00
|
|
|
def encode_toml(d, **kwargs):
|
|
|
|
data = toml.dumps(d, **kwargs)
|
|
|
|
return data
|
|
|
|
|
|
|
|
|
2019-09-23 12:13:52 +00:00
|
|
|
def encode_xml(d, **kwargs):
|
|
|
|
data = xmltodict.unparse(d, **kwargs)
|
|
|
|
return data
|
|
|
|
|
|
|
|
|
2019-09-17 09:50:06 +00:00
|
|
|
def encode_yaml(d, **kwargs):
|
|
|
|
data = yaml.dump(d, **kwargs)
|
|
|
|
return data
|
2019-09-10 14:58:26 +00:00
|
|
|
|
|
|
|
|
2019-10-03 16:42:44 +00:00
|
|
|
def read_content(s):
|
|
|
|
# s -> filepath or url or data
|
|
|
|
if s.startswith('http://') or s.startswith('https://'):
|
|
|
|
content = read_url(s)
|
|
|
|
elif os.path.isfile(s):
|
|
|
|
content = read_file(s)
|
|
|
|
else:
|
|
|
|
content = s
|
|
|
|
return content
|
|
|
|
|
|
|
|
|
2019-09-10 14:58:26 +00:00
|
|
|
def read_file(filepath):
|
|
|
|
handler = open(filepath, 'r')
|
|
|
|
content = handler.read()
|
|
|
|
handler.close()
|
|
|
|
return content
|
|
|
|
|
|
|
|
|
|
|
|
def read_url(url, *args, **kwargs):
|
|
|
|
response = requests.get(url, *args, **kwargs)
|
2019-11-07 16:04:33 +00:00
|
|
|
if response.status_code == requests.codes.ok:
|
|
|
|
content = response.text
|
|
|
|
return content
|
|
|
|
else:
|
|
|
|
raise ValueError(
|
|
|
|
'Invalid url response status code: {}.'.format(
|
|
|
|
response.status_code))
|
2019-09-10 14:58:26 +00:00
|
|
|
|
|
|
|
|
|
|
|
def write_file(filepath, content):
|
|
|
|
# https://stackoverflow.com/questions/12517451/automatically-creating-directories-with-file-output
|
2019-10-14 14:32:40 +00:00
|
|
|
filedir = os.path.dirname(filepath)
|
|
|
|
if not os.path.exists(filedir):
|
2019-09-10 14:58:26 +00:00
|
|
|
try:
|
2019-10-14 14:32:40 +00:00
|
|
|
os.makedirs(filedir)
|
2019-09-10 14:58:26 +00:00
|
|
|
except OSError as e:
|
|
|
|
# Guard against race condition
|
|
|
|
if e.errno != errno.EEXIST:
|
|
|
|
raise e
|
|
|
|
handler = open(filepath, 'w+')
|
|
|
|
handler.write(content)
|
|
|
|
handler.close()
|
|
|
|
return True
|
2019-11-07 16:45:45 +00:00
|
|
|
|
|
|
|
|
|
|
|
_formats = {
|
|
|
|
'b64': {
|
|
|
|
'decoder': decode_base64,
|
|
|
|
'encoder': encode_base64,
|
|
|
|
},
|
|
|
|
'base64': {
|
|
|
|
'decoder': decode_base64,
|
|
|
|
'encoder': encode_base64,
|
|
|
|
},
|
|
|
|
'csv': {
|
|
|
|
'decoder': decode_csv,
|
|
|
|
'encoder': encode_csv,
|
|
|
|
},
|
|
|
|
'json': {
|
|
|
|
'decoder': decode_json,
|
|
|
|
'encoder': encode_json,
|
|
|
|
},
|
|
|
|
'qs': {
|
|
|
|
'decoder': decode_query_string,
|
|
|
|
'encoder': encode_query_string,
|
|
|
|
},
|
|
|
|
'query_string': {
|
|
|
|
'decoder': decode_query_string,
|
|
|
|
'encoder': encode_query_string,
|
|
|
|
},
|
|
|
|
'toml': {
|
|
|
|
'decoder': decode_toml,
|
|
|
|
'encoder': encode_toml,
|
|
|
|
},
|
|
|
|
'yaml': {
|
|
|
|
'decoder': decode_yaml,
|
|
|
|
'encoder': encode_yaml,
|
|
|
|
},
|
|
|
|
'yml': {
|
|
|
|
'decoder': decode_yaml,
|
|
|
|
'encoder': encode_yaml,
|
|
|
|
},
|
|
|
|
'xml': {
|
|
|
|
'decoder': decode_xml,
|
|
|
|
'encoder': encode_xml,
|
|
|
|
},
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
def _get_format(format):
|
|
|
|
return _formats.get(
|
|
|
|
slugify(format, separator='_'), {})
|
|
|
|
|
|
|
|
|
|
|
|
def _get_format_decoder(format):
|
|
|
|
return _get_format(format).get('decoder', None)
|
|
|
|
|
|
|
|
|
|
|
|
def _get_format_encoder(format):
|
|
|
|
return _get_format(format).get('encoder', None)
|