From d0dfee06813f31effc7e32244e403e8583b5f2e6 Mon Sep 17 00:00:00 2001 From: Fabio Caccamo Date: Fri, 13 Mar 2020 13:54:53 +0100 Subject: [PATCH] Added data format auto-detection when creating instance with data from filepath or url. --- benedict/dicts/io/io_dict.py | 40 ++++++++++----------- benedict/dicts/io/io_util.py | 10 +++++- benedict/serializers/__init__.py | 8 +++++ tests/dicts/io/test_io_dict_base64.py | 6 ++++ tests/dicts/io/test_io_dict_json.py | 6 ++++ tests/dicts/io/test_io_dict_pickle.py | 6 ++++ tests/dicts/io/test_io_dict_query_string.py | 6 ++++ tests/dicts/io/test_io_dict_toml.py | 6 ++++ tests/dicts/io/test_io_dict_xml.py | 6 ++++ tests/dicts/io/test_io_dict_yaml.py | 6 ++++ tests/dicts/io/test_io_util.py | 24 +++++++++++++ 11 files changed, 103 insertions(+), 21 deletions(-) diff --git a/benedict/dicts/io/io_dict.py b/benedict/dicts/io/io_dict.py index 4d6e77d..1223e3c 100644 --- a/benedict/dicts/io/io_dict.py +++ b/benedict/dicts/io/io_dict.py @@ -13,15 +13,15 @@ class IODict(dict): # if first argument is data-string, url or filepath try to decode it. # use 'format' kwarg to specify the decoder to use, default 'json'. if len(args) and type_util.is_string(args[0]): - d = IODict._decode_init(args[0], *args, **kwargs) + d = IODict._decode_init(args[0], **kwargs) super(IODict, self).__init__(d) return super(IODict, self).__init__(*args, **kwargs) @staticmethod - def _decode_init(s, *args, **kwargs): - # TODO: auto-detect format value from file extension, fallback to json. - format = kwargs.pop('format', 'json').lower() + def _decode_init(s, **kwargs): + default_format = io_util.autodetect_format(s, default='json') + format = kwargs.pop('format', default_format).lower() if format in ['b64', 'base64']: kwargs.setdefault('subformat', 'json') # decode data-string and initialize with dict data. @@ -69,7 +69,7 @@ class IODict(dict): """ kwargs['subformat'] = subformat kwargs['encoding'] = encoding - return cls(IODict._decode(s, 'base64', **kwargs)) + return cls(s, format='base64', **kwargs) @classmethod def from_csv(cls, s, columns=None, columns_row=True, **kwargs): @@ -81,7 +81,7 @@ class IODict(dict): """ kwargs['columns'] = columns kwargs['columns_row'] = columns_row - return cls(IODict._decode(s, 'csv', **kwargs)) + return cls(s, format='csv', **kwargs) @classmethod def from_pickle(cls, s, **kwargs): @@ -91,7 +91,7 @@ class IODict(dict): https://docs.python.org/3/library/pickle.html Return a new dict instance. A ValueError is raised in case of failure. """ - return cls(IODict._decode(s, 'pickle', **kwargs)) + return cls(s, format='pickle', **kwargs) @classmethod def from_json(cls, s, **kwargs): @@ -101,7 +101,7 @@ class IODict(dict): https://docs.python.org/3/library/json.html Return a new dict instance. A ValueError is raised in case of failure. """ - return cls(IODict._decode(s, 'json', **kwargs)) + return cls(s, format='json', **kwargs) @classmethod def from_query_string(cls, s, **kwargs): @@ -109,7 +109,7 @@ class IODict(dict): Load and decode query-string from url, filepath or data-string. Return a new dict instance. A ValueError is raised in case of failure. """ - return cls(IODict._decode(s, 'query_string', **kwargs)) + return cls(s, format='query_string', **kwargs) @classmethod def from_toml(cls, s, **kwargs): @@ -119,7 +119,7 @@ class IODict(dict): https://pypi.org/project/toml/ Return a new dict instance. A ValueError is raised in case of failure. """ - return cls(IODict._decode(s, 'toml', **kwargs)) + return cls(s, format='toml', **kwargs) @classmethod def from_xml(cls, s, **kwargs): @@ -129,7 +129,7 @@ class IODict(dict): https://github.com/martinblech/xmltodict Return a new dict instance. A ValueError is raised in case of failure. """ - return cls(IODict._decode(s, 'xml', **kwargs)) + return cls(s, format='xml', **kwargs) @classmethod def from_yaml(cls, s, **kwargs): @@ -139,7 +139,7 @@ class IODict(dict): https://pyyaml.org/wiki/PyYAMLDocumentation Return a new dict instance. A ValueError is raised in case of failure. """ - return cls(IODict._decode(s, 'yaml', **kwargs)) + return cls(s, format='yaml', **kwargs) def to_base64(self, subformat='json', encoding='utf-8', **kwargs): """ @@ -151,7 +151,7 @@ class IODict(dict): """ kwargs['subformat'] = subformat kwargs['encoding'] = encoding - return IODict._encode(self, 'base64', **kwargs) + return self._encode(self, 'base64', **kwargs) def to_csv(self, key='values', columns=None, columns_row=True, **kwargs): """ @@ -163,7 +163,7 @@ class IODict(dict): """ kwargs['columns'] = columns kwargs['columns_row'] = columns_row - return IODict._encode(self[key], 'csv', **kwargs) + return self._encode(self[key], 'csv', **kwargs) def to_pickle(self, **kwargs): """ @@ -174,7 +174,7 @@ class IODict(dict): Return the encoded string and optionally save it at 'filepath'. A ValueError is raised in case of failure. """ - return IODict._encode(self, 'pickle', **kwargs) + return self._encode(self, 'pickle', **kwargs) def to_json(self, **kwargs): """ @@ -184,7 +184,7 @@ class IODict(dict): Return the encoded string and optionally save it at 'filepath'. A ValueError is raised in case of failure. """ - return IODict._encode(self, 'json', **kwargs) + return self._encode(self, 'json', **kwargs) def to_query_string(self, **kwargs): """ @@ -192,7 +192,7 @@ class IODict(dict): Return the encoded string and optionally save it at 'filepath'. A ValueError is raised in case of failure. """ - return IODict._encode(self, 'query_string', **kwargs) + return self._encode(self, 'query_string', **kwargs) def to_toml(self, **kwargs): """ @@ -202,7 +202,7 @@ class IODict(dict): Return the encoded string and optionally save it at 'filepath'. A ValueError is raised in case of failure. """ - return IODict._encode(self, 'toml', **kwargs) + return self._encode(self, 'toml', **kwargs) def to_xml(self, **kwargs): """ @@ -212,7 +212,7 @@ class IODict(dict): Return the encoded string and optionally save it at 'filepath'. A ValueError is raised in case of failure. """ - return IODict._encode(self, 'xml', **kwargs) + return self._encode(self, 'xml', **kwargs) def to_yaml(self, **kwargs): """ @@ -222,4 +222,4 @@ class IODict(dict): Return the encoded string and optionally save it at 'filepath'. A ValueError is raised in case of failure. """ - return IODict._encode(self, 'yaml', **kwargs) + return self._encode(self, 'yaml', **kwargs) diff --git a/benedict/dicts/io/io_util.py b/benedict/dicts/io/io_util.py index b687414..e936620 100644 --- a/benedict/dicts/io/io_util.py +++ b/benedict/dicts/io/io_util.py @@ -1,13 +1,21 @@ # -*- coding: utf-8 -*- from benedict.serializers import ( - get_serializer_by_format, get_serializers_extensions, ) + get_format_by_path, get_serializer_by_format, get_serializers_extensions, ) import errno import os import requests +def autodetect_format(s, default=None): + if is_data(s): + return default + elif is_url(s) or is_filepath(s): + return get_format_by_path(s) + return default + + def decode(s, format, **kwargs): serializer = get_serializer_by_format(format) if not serializer: diff --git a/benedict/serializers/__init__.py b/benedict/serializers/__init__.py index 5021bbf..2993050 100644 --- a/benedict/serializers/__init__.py +++ b/benedict/serializers/__init__.py @@ -39,6 +39,14 @@ _SERIALIZERS_EXTENSIONS = [ '.{}'.format(extension) for extension in _SERIALIZERS.keys()] +def get_format_by_path(path): + path = path.lower() + for extension in _SERIALIZERS_EXTENSIONS: + if path.endswith(extension): + return extension[1:] + return None + + def get_serializer_by_format(format): return _SERIALIZERS.get((format or '').lower().replace(' ', '_')) diff --git a/tests/dicts/io/test_io_dict_base64.py b/tests/dicts/io/test_io_dict_base64.py index 847dd94..bbe42d8 100644 --- a/tests/dicts/io/test_io_dict_base64.py +++ b/tests/dicts/io/test_io_dict_base64.py @@ -53,6 +53,9 @@ class io_dict_base64_test_case(io_dict_test_case): # constructor d = IODict(filepath, format='base64') self.assertTrue(isinstance(d, dict)) + # constructor with format autodetection + d = IODict(filepath) + self.assertTrue(isinstance(d, dict)) def test_from_base64_with_valid_file_valid_content_invalid_format(self): filepath = self.input_path('valid-content.json') @@ -97,6 +100,9 @@ class io_dict_base64_test_case(io_dict_test_case): # constructor d = IODict(url, format='base64') self.assertTrue(isinstance(d, dict)) + # constructor with format autodetection + d = IODict(url) + self.assertTrue(isinstance(d, dict)) def test_from_base64_with_valid_url_invalid_content(self): url = 'https://github.com/fabiocaccamo/python-benedict' diff --git a/tests/dicts/io/test_io_dict_json.py b/tests/dicts/io/test_io_dict_json.py index 6d492bf..43bb30c 100644 --- a/tests/dicts/io/test_io_dict_json.py +++ b/tests/dicts/io/test_io_dict_json.py @@ -78,6 +78,9 @@ class io_dict_json_test_case(io_dict_test_case): # constructor d = IODict(filepath, format='json') self.assertTrue(isinstance(d, dict)) + # constructor with format autodetection + d = IODict(filepath) + self.assertTrue(isinstance(d, dict)) def test_from_json_with_valid_file_valid_content_invalid_format(self): filepath = self.input_path('valid-content.base64') @@ -122,6 +125,9 @@ class io_dict_json_test_case(io_dict_test_case): # constructor d = IODict(url, format='json') self.assertTrue(isinstance(d, dict)) + # constructor with format autodetection + d = IODict(url) + self.assertTrue(isinstance(d, dict)) def test_from_json_with_valid_url_invalid_content(self): url = 'https://github.com/fabiocaccamo/python-benedict' diff --git a/tests/dicts/io/test_io_dict_pickle.py b/tests/dicts/io/test_io_dict_pickle.py index 824890f..0bdef19 100644 --- a/tests/dicts/io/test_io_dict_pickle.py +++ b/tests/dicts/io/test_io_dict_pickle.py @@ -48,6 +48,9 @@ class io_dict_pickle_test_case(io_dict_test_case): # constructor d = IODict(filepath, format='pickle') self.assertTrue(isinstance(d, dict)) + # constructor with format autodetection + d = IODict(filepath) + self.assertTrue(isinstance(d, dict)) def test_from_pickle_with_valid_file_valid_content_invalid_format(self): filepath = self.input_path('valid-content.json') @@ -92,6 +95,9 @@ class io_dict_pickle_test_case(io_dict_test_case): # constructor d = IODict(url, format='pickle') self.assertTrue(isinstance(d, dict)) + # constructor with format autodetection + d = IODict(url) + self.assertTrue(isinstance(d, dict)) def test_from_pickle_with_valid_url_invalid_content(self): url = 'https://github.com/fabiocaccamo/python-benedict' diff --git a/tests/dicts/io/test_io_dict_query_string.py b/tests/dicts/io/test_io_dict_query_string.py index 3c5089d..a1500f6 100644 --- a/tests/dicts/io/test_io_dict_query_string.py +++ b/tests/dicts/io/test_io_dict_query_string.py @@ -36,6 +36,9 @@ class io_dict_query_string_test_case(io_dict_test_case): # constructor d = IODict(filepath, format='query_string') self.assertTrue(isinstance(d, dict)) + # constructor with format autodetection + d = IODict(filepath) + self.assertTrue(isinstance(d, dict)) def test_from_query_string_with_valid_file_valid_content_invalid_format(self): filepath = self.input_path('valid-content.base64') @@ -80,6 +83,9 @@ class io_dict_query_string_test_case(io_dict_test_case): # constructor d = IODict(url, format='query_string') self.assertTrue(isinstance(d, dict)) + # constructor with format autodetection + d = IODict(url) + self.assertTrue(isinstance(d, dict)) def test_from_query_string_with_valid_url_invalid_content(self): url = 'https://github.com/fabiocaccamo/python-benedict' diff --git a/tests/dicts/io/test_io_dict_toml.py b/tests/dicts/io/test_io_dict_toml.py index 772c2b0..08feb6f 100644 --- a/tests/dicts/io/test_io_dict_toml.py +++ b/tests/dicts/io/test_io_dict_toml.py @@ -41,6 +41,9 @@ d = 4 # constructor d = IODict(filepath, format='toml') self.assertTrue(isinstance(d, dict)) + # constructor with format autodetection + d = IODict(filepath) + self.assertTrue(isinstance(d, dict)) def test_from_toml_with_valid_file_valid_content_invalid_format(self): # filepath = self.input_path('valid-content.base64') @@ -85,6 +88,9 @@ d = 4 # constructor d = IODict(url, format='toml') self.assertTrue(isinstance(d, dict)) + # constructor with format autodetection + d = IODict(url) + self.assertTrue(isinstance(d, dict)) def test_from_toml_with_valid_url_invalid_content(self): url = 'https://github.com/fabiocaccamo/python-benedict' diff --git a/tests/dicts/io/test_io_dict_xml.py b/tests/dicts/io/test_io_dict_xml.py index e6b019c..3e311dc 100644 --- a/tests/dicts/io/test_io_dict_xml.py +++ b/tests/dicts/io/test_io_dict_xml.py @@ -44,6 +44,9 @@ class io_dict_xml_test_case(io_dict_test_case): # constructor d = IODict(filepath, format='xml') self.assertTrue(isinstance(d, dict)) + # constructor with format autodetection + d = IODict(filepath) + self.assertTrue(isinstance(d, dict)) def test_from_xml_with_valid_file_valid_content_invalid_format(self): filepath = self.input_path('valid-content.base64') @@ -88,6 +91,9 @@ class io_dict_xml_test_case(io_dict_test_case): # constructor d = IODict(url, format='xml') self.assertTrue(isinstance(d, dict)) + # constructor with format autodetection + d = IODict(url) + self.assertTrue(isinstance(d, dict)) def test_from_xml_with_valid_url_invalid_content(self): url = 'https://github.com/fabiocaccamo/python-benedict' diff --git a/tests/dicts/io/test_io_dict_yaml.py b/tests/dicts/io/test_io_dict_yaml.py index 95e3f61..ae75487 100644 --- a/tests/dicts/io/test_io_dict_yaml.py +++ b/tests/dicts/io/test_io_dict_yaml.py @@ -40,6 +40,9 @@ b: # constructor d = IODict(filepath, format='yaml') self.assertTrue(isinstance(d, dict)) + # constructor with format autodetection + d = IODict(filepath) + self.assertTrue(isinstance(d, dict)) def test_from_yaml_with_valid_file_valid_content_invalid_format(self): filepath = self.input_path('valid-content.base64') @@ -84,6 +87,9 @@ b: # constructor d = IODict(url, format='yaml') self.assertTrue(isinstance(d, dict)) + # constructor with format autodetection + d = IODict(url) + self.assertTrue(isinstance(d, dict)) def test_from_yaml_with_valid_url_invalid_content(self): url = 'https://github.com/fabiocaccamo/python-benedict' diff --git a/tests/dicts/io/test_io_util.py b/tests/dicts/io/test_io_util.py index 5ed9644..71e929c 100644 --- a/tests/dicts/io/test_io_util.py +++ b/tests/dicts/io/test_io_util.py @@ -11,6 +11,30 @@ class io_util_test_case(unittest.TestCase): # TODO pass + def test_autodetect_format_by_data(self): + s = '{"a": 1, "b": 2, "c": 3}' + self.assertEqual(io_util.autodetect_format(s), None) + + def test_autodetect_format_by_data_with_default(self): + s = '{"a": 1, "b": 2, "c": 3}' + self.assertEqual(io_util.autodetect_format(s, default='json'), 'json') + + def test_autodetect_format_by_path(self): + s = 'path-to/data.xml' + self.assertEqual(io_util.autodetect_format(s), 'xml') + + def test_autodetect_format_by_path_with_unsupported_format(self): + s = 'path-to/data.jpg' + self.assertEqual(io_util.autodetect_format(s), None) + + def test_autodetect_format_by_url(self): + s = 'https://github.com/fabiocaccamo/python-benedict.xml' + self.assertEqual(io_util.autodetect_format(s), 'xml') + + def test_autodetect_format_by_url_with_unsupported_format(self): + s = 'https://github.com/fabiocaccamo/python-benedict.jpg' + self.assertEqual(io_util.autodetect_format(s), None) + def test_decode_with_invalid_format(self): with self.assertRaises(ValueError): io_util.decode('', format='xxx')