python-benedict/benedict/utils/io_util.py

293 lines
7.1 KiB
Python
Raw Normal View History

2019-09-10 14:58:26 +00:00
# -*- coding: utf-8 -*-
from six import binary_type, string_types, StringIO
from slugify import slugify
2019-10-03 16:45:22 +00:00
import base64
import csv
2019-09-10 14:58:26 +00:00
import errno
import json
import os
2019-10-14 12:47:11 +00:00
import re
2019-09-10 14:58:26 +00:00
import requests
2019-09-23 12:13:52 +00:00
import xmltodict
2019-09-20 14:21:04 +00:00
import toml
2019-09-17 09:50:06 +00:00
import yaml
2019-09-10 14:58:26 +00:00
try:
# python 3
from urllib.parse import unquote
from urllib.parse import unquote_plus
2019-10-14 12:47:11 +00:00
from urllib.parse import urlencode
from urllib.parse import parse_qs
except ImportError:
# python 2
from urllib import unquote
from urllib import unquote_plus
2019-10-14 12:47:11 +00:00
from urllib import urlencode
from urlparse import parse_qs
def decode(s, format, **kwargs):
decode_func = _get_format_decoder(format)
if decode_func:
decode_opts = kwargs.copy()
data = decode_func(s.strip(), **decode_opts)
return data
else:
raise ValueError('Invalid format: {}.'.format(format))
2019-10-03 16:45:22 +00:00
def decode_base64(s, **kwargs):
# fix urlencoded chars
s = unquote(s)
# fix padding
m = len(s) % 4
if m != 0:
s += '=' * (4 - m)
2019-10-29 15:22:24 +00:00
data = base64.b64decode(s)
subformat = kwargs.pop('subformat', None)
encoding = kwargs.pop('encoding', 'utf-8' if subformat else None)
if encoding:
2019-10-29 15:22:24 +00:00
data = data.decode(encoding)
if subformat:
decode_func = _get_format_decoder(subformat)
2019-10-29 15:22:24 +00:00
if decode_func:
data = decode_func(data, **kwargs)
return data
2019-10-03 16:45:22 +00:00
2019-09-10 14:58:26 +00:00
def decode_csv(s, **kwargs):
# kwargs.setdefault('delimiter', ',')
if kwargs.pop('quote', False):
kwargs.setdefault('quoting', csv.QUOTE_ALL)
columns = kwargs.pop('columns', None)
columns_row = kwargs.pop('columns_row', True)
f = StringIO(s)
r = csv.reader(f, **kwargs)
ln = 0
data = []
for row in r:
if ln == 0 and columns_row:
if not columns:
columns = row
ln += 1
continue
d = dict(zip(columns, row))
data.append(d)
ln += 1
return data
2019-09-10 14:58:26 +00:00
def decode_json(s, **kwargs):
data = json.loads(s, **kwargs)
2019-09-17 09:50:06 +00:00
return data
2019-10-14 12:47:11 +00:00
def decode_query_string(s, **kwargs):
flat = kwargs.pop('flat', True)
qs_re = r'^(([\w\-\%\+]+\=[\w\-\%\+]*)+([\&]{1})?)+'
qs_pattern = re.compile(qs_re)
if qs_pattern.match(s):
data = parse_qs(s)
if flat:
data = { key:value[0] for key, value in data.items() }
return data
else:
raise ValueError('Invalid query string: {}'.format(s))
2019-09-23 12:13:52 +00:00
def decode_xml(s, **kwargs):
kwargs.setdefault('dict_constructor', dict)
data = xmltodict.parse(s, **kwargs)
return data
2019-09-20 14:21:04 +00:00
def decode_toml(s, **kwargs):
data = toml.loads(s, **kwargs)
return data
2019-09-17 09:50:06 +00:00
def decode_yaml(s, **kwargs):
kwargs.setdefault('Loader', yaml.Loader)
data = yaml.load(s, **kwargs)
return data
2019-09-10 14:58:26 +00:00
def encode(d, format, **kwargs):
encode_func = _get_format_encoder(format)
if encode_func:
s = encode_func(d, **kwargs)
return s
else:
raise ValueError('Invalid format: {}.'.format(format))
2019-10-03 16:45:22 +00:00
def encode_base64(d, **kwargs):
2019-10-29 15:22:24 +00:00
data = d
subformat = kwargs.pop('subformat', None)
encoding = kwargs.pop('encoding', 'utf-8' if subformat else None)
if not isinstance(data, string_types) and subformat:
encode_func = _get_format_encoder(subformat)
if encode_func:
2019-10-29 15:22:24 +00:00
data = encode_func(data, **kwargs)
if isinstance(data, string_types) and encoding:
data = data.encode(encoding)
2019-10-29 15:22:24 +00:00
data = base64.b64encode(data)
if isinstance(data, binary_type) and encoding:
data = data.decode(encoding)
2019-10-03 16:45:22 +00:00
return data
def encode_csv(l, **kwargs):
# kwargs.setdefault('delimiter', ',')
if kwargs.pop('quote', False):
kwargs.setdefault('quoting', csv.QUOTE_ALL)
kwargs.setdefault('lineterminator', '\n')
columns = kwargs.pop('columns', None)
columns_row = kwargs.pop('columns_row', True)
if not columns and len(l) and isinstance(l[0], dict):
keys = [str(key) for key in l[0].keys()]
columns = list(sorted(keys))
f = StringIO()
w = csv.writer(f, **kwargs)
if columns_row and columns:
w.writerow(columns)
for item in l:
if isinstance(item, dict):
row = [item.get(key, '') for key in columns]
elif isinstance(item, (list, tuple, set, )):
row = item
else:
row = [item]
w.writerow(row)
data = f.getvalue()
return data
2019-09-10 14:58:26 +00:00
def encode_json(d, **kwargs):
2019-09-17 09:50:06 +00:00
data = json.dumps(d, **kwargs)
return data
2019-10-14 12:47:11 +00:00
def encode_query_string(d, **kwargs):
data = urlencode(d, **kwargs)
return data
2019-09-20 14:21:04 +00:00
def encode_toml(d, **kwargs):
data = toml.dumps(d, **kwargs)
return data
2019-09-23 12:13:52 +00:00
def encode_xml(d, **kwargs):
data = xmltodict.unparse(d, **kwargs)
return data
2019-09-17 09:50:06 +00:00
def encode_yaml(d, **kwargs):
data = yaml.dump(d, **kwargs)
return data
2019-09-10 14:58:26 +00:00
2019-10-03 16:42:44 +00:00
def read_content(s):
# s -> filepath or url or data
if s.startswith('http://') or s.startswith('https://'):
content = read_url(s)
elif os.path.isfile(s):
content = read_file(s)
else:
content = s
return content
2019-09-10 14:58:26 +00:00
def read_file(filepath):
handler = open(filepath, 'r')
content = handler.read()
handler.close()
return content
def read_url(url, *args, **kwargs):
response = requests.get(url, *args, **kwargs)
if response.status_code == requests.codes.ok:
content = response.text
return content
else:
raise ValueError(
'Invalid url response status code: {}.'.format(
response.status_code))
2019-09-10 14:58:26 +00:00
def write_file(filepath, content):
# https://stackoverflow.com/questions/12517451/automatically-creating-directories-with-file-output
2019-10-14 14:32:40 +00:00
filedir = os.path.dirname(filepath)
if not os.path.exists(filedir):
2019-09-10 14:58:26 +00:00
try:
2019-10-14 14:32:40 +00:00
os.makedirs(filedir)
2019-09-10 14:58:26 +00:00
except OSError as e:
# Guard against race condition
if e.errno != errno.EEXIST:
raise e
handler = open(filepath, 'w+')
handler.write(content)
handler.close()
return True
_formats = {
'b64': {
'decoder': decode_base64,
'encoder': encode_base64,
},
'base64': {
'decoder': decode_base64,
'encoder': encode_base64,
},
'csv': {
'decoder': decode_csv,
'encoder': encode_csv,
},
'json': {
'decoder': decode_json,
'encoder': encode_json,
},
'qs': {
'decoder': decode_query_string,
'encoder': encode_query_string,
},
'query_string': {
'decoder': decode_query_string,
'encoder': encode_query_string,
},
'toml': {
'decoder': decode_toml,
'encoder': encode_toml,
},
'yaml': {
'decoder': decode_yaml,
'encoder': encode_yaml,
},
'yml': {
'decoder': decode_yaml,
'encoder': encode_yaml,
},
'xml': {
'decoder': decode_xml,
'encoder': encode_xml,
},
}
def _get_format(format):
return _formats.get(
slugify(format, separator='_'), {})
def _get_format_decoder(format):
return _get_format(format).get('decoder', None)
def _get_format_encoder(format):
return _get_format(format).get('encoder', None)