Add possibility to restrict input data sources. #287

This commit is contained in:
Fabio Caccamo 2023-11-08 19:04:29 +01:00
parent c5308777f3
commit db2b06bfa4
3 changed files with 113 additions and 3 deletions

View File

@ -218,7 +218,7 @@ For simplifying I/O operations, `benedict` supports a variety of input/output me
#### Input via constructor
It is possible to create a `benedict` instance directly from data-source (`filepath`, `url`, `s3` or `data-string`) by passing the data source and the data format (optional, default "json") in the constructor.
It is possible to create a `benedict` instance directly from data-source (`filepath`, `url`, `s3` or `data` string) by passing the data source and the data format (optional, default "json") in the constructor.
```python
# filepath
@ -230,14 +230,28 @@ d = benedict("https://localhost:8000/data.xml", format="xml")
# s3
d = benedict("s3://my-bucket/data.xml", s3_options={"aws_access_key_id": "...", "aws_secret_access_key": "..."})
# data-string
# data
d = benedict('{"a": 1, "b": 2, "c": 3, "x": 7, "y": 8, "z": 9}')
```
#### Input methods
- All *input* methods can be accessed as class methods and are prefixed by `from_*` followed by the format name.
- In all *input* methods, the first argument can represent: **url**, **filepath** or **data-string**.
- In all *input* methods, the first argument can represent a source: **file** path, **url**, **s3** url, or **data** string.
#### Input sources
All supported sources (**file**, **url**, **s3**, **data**) are allowed by default, but in certains situations when the input data comes from **untrusted sources** it may be useful to restrict the allowed sources using the `sources` argument:
```python
# url
d = benedict("https://localhost:8000/data.json", sources=["url"]) # -> ok
d = benedict.from_json("https://localhost:8000/data.json", sources=["url"]) # -> ok
# s3
d = benedict("s3://my-bucket/data.json", sources=["url"]) # -> raise ValueError
d = benedict.from_json("s3://my-bucket/data.json", sources=["url"]) # -> raise ValueError
```
#### Output methods

View File

@ -14,6 +14,7 @@ import fsutil
from benedict.extras import require_s3
from benedict.serializers import get_format_by_path, get_serializer_by_format
from benedict.utils import type_util
def autodetect_format(s):
@ -23,6 +24,24 @@ def autodetect_format(s):
return None
def check_source(source, allowed_sources):
# enforce allowed_sources to be a list of strings
if not allowed_sources:
allowed_sources = ["*"]
elif type_util.is_string(allowed_sources):
allowed_sources = [allowed_sources]
elif type_util.is_list_or_tuple(allowed_sources):
allowed_sources = list(allowed_sources)
# check if any "all" marker is present
all_sources = ["*", "all", "auto"]
for source_item in all_sources:
if source_item in allowed_sources:
# all sources
return
if source not in allowed_sources:
raise ValueError(f"Invalid source: '{source}' (source not allowed).")
def decode(s, format, **kwargs):
s = str(s)
serializer = get_serializer_by_format(format)
@ -92,16 +111,21 @@ def read_content(s, format=None, options=None):
# s -> filepath or url or data
# options.setdefault("format", format)
options = options or {}
sources = options.pop("sources", None)
s = s.strip()
if is_data(s):
check_source("data", allowed_sources=sources)
return s
elif is_url(s):
check_source("url", allowed_sources=sources)
requests_options = options.pop("requests_options", None) or {}
return read_content_from_url(s, requests_options, format)
elif is_s3(s):
check_source("s3", allowed_sources=sources)
s3_options = options.pop("s3_options", None) or {}
return read_content_from_s3(s, s3_options, format)
elif is_filepath(s):
check_source("file", allowed_sources=sources)
return read_content_from_file(s, format)
# one-line data?!
return s

View File

@ -0,0 +1,72 @@
from benedict import benedict
from tests.dicts.io.test_io_dict import io_dict_test_case
class github_issue_0287_test_case(io_dict_test_case):
"""
This class describes a github issue 0287 test case.
https://github.com/fabiocaccamo/python-benedict/issues/287
To run this specific test:
- Run python -m unittest tests.github.test_issue_0287
"""
def test_sources_argument_with_all_list(self):
filepath = self.input_path("valid-content.json")
_ = benedict(filepath, sources=["*"])
_ = benedict.from_json(filepath, sources=["*"])
_ = benedict(filepath, sources=["all"])
_ = benedict.from_json(filepath, sources=["all"])
_ = benedict(filepath, sources=["auto"])
_ = benedict.from_json(filepath, sources=["auto"])
def test_sources_argument_with_all_string(self):
filepath = self.input_path("valid-content.json")
_ = benedict(filepath, sources="*")
_ = benedict.from_json(filepath, sources="*")
_ = benedict(filepath, sources="all")
_ = benedict.from_json(filepath, sources="all")
_ = benedict(filepath, sources="auto")
_ = benedict.from_json(filepath, sources="auto")
def test_sources_argument_with_list(self):
filepath = self.input_path("valid-content.json")
_ = benedict(filepath, sources=["file"])
with self.assertRaises(ValueError):
_ = benedict(filepath, sources=["url"])
with self.assertRaises(ValueError):
_ = benedict(filepath, sources=["s3"])
with self.assertRaises(ValueError):
_ = benedict(filepath, sources=["data"])
_ = benedict.from_json(filepath, sources=["file"])
with self.assertRaises(ValueError):
_ = benedict.from_json(filepath, sources=["url"])
with self.assertRaises(ValueError):
_ = benedict.from_json(filepath, sources=["s3"])
with self.assertRaises(ValueError):
_ = benedict.from_json(filepath, sources=["data"])
def test_sources_argument_with_string(self):
filepath = self.input_path("valid-content.json")
_ = benedict(filepath, sources="file")
with self.assertRaises(ValueError):
_ = benedict(filepath, sources="url")
with self.assertRaises(ValueError):
_ = benedict(filepath, sources="s3")
with self.assertRaises(ValueError):
_ = benedict(filepath, sources="data")
_ = benedict.from_json(filepath, sources="file")
with self.assertRaises(ValueError):
_ = benedict.from_json(filepath, sources="url")
with self.assertRaises(ValueError):
_ = benedict.from_json(filepath, sources="s3")
with self.assertRaises(ValueError):
_ = benedict.from_json(filepath, sources="data")