Add possibility to restrict input data sources. #287
This commit is contained in:
parent
c5308777f3
commit
db2b06bfa4
20
README.md
20
README.md
|
@ -218,7 +218,7 @@ For simplifying I/O operations, `benedict` supports a variety of input/output me
|
|||
|
||||
#### Input via constructor
|
||||
|
||||
It is possible to create a `benedict` instance directly from data-source (`filepath`, `url`, `s3` or `data-string`) by passing the data source and the data format (optional, default "json") in the constructor.
|
||||
It is possible to create a `benedict` instance directly from data-source (`filepath`, `url`, `s3` or `data` string) by passing the data source and the data format (optional, default "json") in the constructor.
|
||||
|
||||
```python
|
||||
# filepath
|
||||
|
@ -230,14 +230,28 @@ d = benedict("https://localhost:8000/data.xml", format="xml")
|
|||
# s3
|
||||
d = benedict("s3://my-bucket/data.xml", s3_options={"aws_access_key_id": "...", "aws_secret_access_key": "..."})
|
||||
|
||||
# data-string
|
||||
# data
|
||||
d = benedict('{"a": 1, "b": 2, "c": 3, "x": 7, "y": 8, "z": 9}')
|
||||
```
|
||||
|
||||
#### Input methods
|
||||
|
||||
- All *input* methods can be accessed as class methods and are prefixed by `from_*` followed by the format name.
|
||||
- In all *input* methods, the first argument can represent: **url**, **filepath** or **data-string**.
|
||||
- In all *input* methods, the first argument can represent a source: **file** path, **url**, **s3** url, or **data** string.
|
||||
|
||||
#### Input sources
|
||||
|
||||
All supported sources (**file**, **url**, **s3**, **data**) are allowed by default, but in certains situations when the input data comes from **untrusted sources** it may be useful to restrict the allowed sources using the `sources` argument:
|
||||
|
||||
```python
|
||||
# url
|
||||
d = benedict("https://localhost:8000/data.json", sources=["url"]) # -> ok
|
||||
d = benedict.from_json("https://localhost:8000/data.json", sources=["url"]) # -> ok
|
||||
|
||||
# s3
|
||||
d = benedict("s3://my-bucket/data.json", sources=["url"]) # -> raise ValueError
|
||||
d = benedict.from_json("s3://my-bucket/data.json", sources=["url"]) # -> raise ValueError
|
||||
```
|
||||
|
||||
#### Output methods
|
||||
|
||||
|
|
|
@ -14,6 +14,7 @@ import fsutil
|
|||
|
||||
from benedict.extras import require_s3
|
||||
from benedict.serializers import get_format_by_path, get_serializer_by_format
|
||||
from benedict.utils import type_util
|
||||
|
||||
|
||||
def autodetect_format(s):
|
||||
|
@ -23,6 +24,24 @@ def autodetect_format(s):
|
|||
return None
|
||||
|
||||
|
||||
def check_source(source, allowed_sources):
|
||||
# enforce allowed_sources to be a list of strings
|
||||
if not allowed_sources:
|
||||
allowed_sources = ["*"]
|
||||
elif type_util.is_string(allowed_sources):
|
||||
allowed_sources = [allowed_sources]
|
||||
elif type_util.is_list_or_tuple(allowed_sources):
|
||||
allowed_sources = list(allowed_sources)
|
||||
# check if any "all" marker is present
|
||||
all_sources = ["*", "all", "auto"]
|
||||
for source_item in all_sources:
|
||||
if source_item in allowed_sources:
|
||||
# all sources
|
||||
return
|
||||
if source not in allowed_sources:
|
||||
raise ValueError(f"Invalid source: '{source}' (source not allowed).")
|
||||
|
||||
|
||||
def decode(s, format, **kwargs):
|
||||
s = str(s)
|
||||
serializer = get_serializer_by_format(format)
|
||||
|
@ -92,16 +111,21 @@ def read_content(s, format=None, options=None):
|
|||
# s -> filepath or url or data
|
||||
# options.setdefault("format", format)
|
||||
options = options or {}
|
||||
sources = options.pop("sources", None)
|
||||
s = s.strip()
|
||||
if is_data(s):
|
||||
check_source("data", allowed_sources=sources)
|
||||
return s
|
||||
elif is_url(s):
|
||||
check_source("url", allowed_sources=sources)
|
||||
requests_options = options.pop("requests_options", None) or {}
|
||||
return read_content_from_url(s, requests_options, format)
|
||||
elif is_s3(s):
|
||||
check_source("s3", allowed_sources=sources)
|
||||
s3_options = options.pop("s3_options", None) or {}
|
||||
return read_content_from_s3(s, s3_options, format)
|
||||
elif is_filepath(s):
|
||||
check_source("file", allowed_sources=sources)
|
||||
return read_content_from_file(s, format)
|
||||
# one-line data?!
|
||||
return s
|
||||
|
|
|
@ -0,0 +1,72 @@
|
|||
from benedict import benedict
|
||||
from tests.dicts.io.test_io_dict import io_dict_test_case
|
||||
|
||||
|
||||
class github_issue_0287_test_case(io_dict_test_case):
|
||||
"""
|
||||
This class describes a github issue 0287 test case.
|
||||
https://github.com/fabiocaccamo/python-benedict/issues/287
|
||||
|
||||
To run this specific test:
|
||||
- Run python -m unittest tests.github.test_issue_0287
|
||||
"""
|
||||
|
||||
def test_sources_argument_with_all_list(self):
|
||||
filepath = self.input_path("valid-content.json")
|
||||
_ = benedict(filepath, sources=["*"])
|
||||
_ = benedict.from_json(filepath, sources=["*"])
|
||||
|
||||
_ = benedict(filepath, sources=["all"])
|
||||
_ = benedict.from_json(filepath, sources=["all"])
|
||||
|
||||
_ = benedict(filepath, sources=["auto"])
|
||||
_ = benedict.from_json(filepath, sources=["auto"])
|
||||
|
||||
def test_sources_argument_with_all_string(self):
|
||||
filepath = self.input_path("valid-content.json")
|
||||
_ = benedict(filepath, sources="*")
|
||||
_ = benedict.from_json(filepath, sources="*")
|
||||
|
||||
_ = benedict(filepath, sources="all")
|
||||
_ = benedict.from_json(filepath, sources="all")
|
||||
|
||||
_ = benedict(filepath, sources="auto")
|
||||
_ = benedict.from_json(filepath, sources="auto")
|
||||
|
||||
def test_sources_argument_with_list(self):
|
||||
filepath = self.input_path("valid-content.json")
|
||||
|
||||
_ = benedict(filepath, sources=["file"])
|
||||
with self.assertRaises(ValueError):
|
||||
_ = benedict(filepath, sources=["url"])
|
||||
with self.assertRaises(ValueError):
|
||||
_ = benedict(filepath, sources=["s3"])
|
||||
with self.assertRaises(ValueError):
|
||||
_ = benedict(filepath, sources=["data"])
|
||||
|
||||
_ = benedict.from_json(filepath, sources=["file"])
|
||||
with self.assertRaises(ValueError):
|
||||
_ = benedict.from_json(filepath, sources=["url"])
|
||||
with self.assertRaises(ValueError):
|
||||
_ = benedict.from_json(filepath, sources=["s3"])
|
||||
with self.assertRaises(ValueError):
|
||||
_ = benedict.from_json(filepath, sources=["data"])
|
||||
|
||||
def test_sources_argument_with_string(self):
|
||||
filepath = self.input_path("valid-content.json")
|
||||
|
||||
_ = benedict(filepath, sources="file")
|
||||
with self.assertRaises(ValueError):
|
||||
_ = benedict(filepath, sources="url")
|
||||
with self.assertRaises(ValueError):
|
||||
_ = benedict(filepath, sources="s3")
|
||||
with self.assertRaises(ValueError):
|
||||
_ = benedict(filepath, sources="data")
|
||||
|
||||
_ = benedict.from_json(filepath, sources="file")
|
||||
with self.assertRaises(ValueError):
|
||||
_ = benedict.from_json(filepath, sources="url")
|
||||
with self.assertRaises(ValueError):
|
||||
_ = benedict.from_json(filepath, sources="s3")
|
||||
with self.assertRaises(ValueError):
|
||||
_ = benedict.from_json(filepath, sources="data")
|
Loading…
Reference in New Issue