python-benedict/benedict/dicts/parse/parse_util.py

222 lines
5.1 KiB
Python
Raw Normal View History

2019-05-17 11:13:15 +00:00
# -*- coding: utf-8 -*-
from benedict.serializers import JSONSerializer
from benedict.utils import type_util
2019-05-17 11:13:15 +00:00
from datetime import datetime
from dateutil import parser as date_parser
from decimal import Decimal, DecimalException
from MailChecker import MailChecker
from phonenumbers import phonenumberutil, PhoneNumberFormat
2019-05-17 11:13:15 +00:00
from slugify import slugify
import ftfy
import phonenumbers
import re
2019-05-17 11:13:15 +00:00
2020-02-03 18:01:28 +00:00
def _parse_with(val, type_checker, parser, **kwargs):
if val is None:
return None
if callable(type_checker) and type_checker(val):
return val
s = str(val)
2020-02-03 18:01:28 +00:00
if not len(s):
return None
return parser(s, **kwargs)
def _parse_bool(val):
val = val.lower()
2022-02-13 10:35:43 +00:00
if val in ["1", "true", "yes", "ok", "on"]:
2020-01-28 16:32:58 +00:00
return True
2022-02-13 10:35:43 +00:00
elif val in ["0", "false", "no", "ko", "off"]:
2020-01-28 16:32:58 +00:00
return False
return None
2019-05-17 11:13:15 +00:00
2019-07-09 10:57:01 +00:00
def parse_bool(val):
2020-02-03 18:01:28 +00:00
return _parse_with(val, type_util.is_bool, _parse_bool)
def parse_date(val, format=None):
val = parse_datetime(val, format)
if val:
return val.date()
return None
def _parse_datetime_with_format(val, format):
try:
return datetime.strptime(val, format)
except Exception:
return None
def _parse_datetime_without_format(val):
try:
return date_parser.parse(val)
except Exception:
return _parse_datetime_from_timestamp(val)
def _parse_datetime_from_timestamp(val):
try:
return datetime.fromtimestamp(float(val))
except Exception:
return None
2019-05-17 11:13:15 +00:00
def parse_datetime(val, format=None):
if type_util.is_datetime(val):
2019-05-17 11:13:15 +00:00
return val
s = str(val)
if format:
return _parse_datetime_with_format(s, format)
else:
return _parse_datetime_without_format(s)
def _parse_decimal(val):
2020-01-28 16:32:58 +00:00
try:
return Decimal(val)
except (ValueError, DecimalException):
return None
2019-05-17 11:13:15 +00:00
2019-07-09 10:57:01 +00:00
2019-05-17 11:13:15 +00:00
def parse_decimal(val):
2020-02-03 18:01:28 +00:00
return _parse_with(val, type_util.is_decimal, _parse_decimal)
def _parse_dict(val):
serializer = JSONSerializer()
2019-05-17 11:13:15 +00:00
try:
d = serializer.decode(val)
if type_util.is_dict(d):
return d
return None
except Exception:
2020-01-28 16:32:58 +00:00
return None
2019-05-17 11:13:15 +00:00
2019-07-09 10:57:01 +00:00
2019-05-17 11:13:15 +00:00
def parse_dict(val):
2020-02-03 18:01:28 +00:00
return _parse_with(val, type_util.is_dict, _parse_dict)
def _parse_float(val):
2019-05-17 11:13:15 +00:00
try:
return float(val)
except ValueError:
2020-01-28 16:32:58 +00:00
return None
2019-05-17 11:13:15 +00:00
2019-07-09 10:57:01 +00:00
2019-05-17 11:13:15 +00:00
def parse_float(val):
2020-02-03 18:01:28 +00:00
return _parse_with(val, type_util.is_float, _parse_float)
2019-05-17 11:13:15 +00:00
2019-07-09 10:57:01 +00:00
def _parse_email(val, check_blacklist=True):
val = val.lower()
if check_blacklist:
if not MailChecker.is_valid(val):
return None
else:
if not MailChecker.is_valid_email_format(val):
return None
return val
2019-07-09 10:57:01 +00:00
def parse_email(val, check_blacklist=True):
2021-10-12 12:27:35 +00:00
return _parse_with(val, None, _parse_email, check_blacklist=check_blacklist)
def _parse_int(val):
2019-05-17 11:13:15 +00:00
try:
return int(val)
2019-05-20 10:01:25 +00:00
except ValueError:
2020-01-28 16:32:58 +00:00
return None
2019-05-17 11:13:15 +00:00
2019-07-09 10:57:01 +00:00
def parse_int(val):
2020-02-03 18:01:28 +00:00
return _parse_with(val, type_util.is_integer, _parse_int)
def _parse_list(val, separator=None):
2021-10-12 12:27:35 +00:00
if (
2022-02-13 10:35:43 +00:00
val.startswith("{")
and val.endswith("}")
or val.startswith("[")
and val.endswith("]")
2021-10-12 12:27:35 +00:00
):
2021-01-14 17:28:01 +00:00
try:
serializer = JSONSerializer()
l = serializer.decode(val)
if type_util.is_list(l):
return l
return None
except Exception:
pass
if separator:
l = list(val.split(separator))
return l
return None
2019-05-17 11:13:15 +00:00
2019-07-09 10:57:01 +00:00
def parse_list(val, separator=None):
2021-10-12 12:27:35 +00:00
val = _parse_with(val, type_util.is_list_or_tuple, _parse_list, separator=separator)
2020-02-03 18:01:28 +00:00
return list(val) if type_util.is_list_or_tuple(val) else val
def _parse_phonenumber(val, country_code=None):
try:
phone_obj = phonenumbers.parse(val, country_code)
if phonenumbers.is_valid_number(phone_obj):
return {
2022-02-13 10:35:43 +00:00
"e164": phonenumbers.format_number(phone_obj, PhoneNumberFormat.E164),
"international": phonenumbers.format_number(
2021-10-12 12:27:35 +00:00
phone_obj, PhoneNumberFormat.INTERNATIONAL
),
2022-02-13 10:35:43 +00:00
"national": phonenumbers.format_number(
2021-10-12 12:27:35 +00:00
phone_obj, PhoneNumberFormat.NATIONAL
),
}
2020-01-28 16:32:58 +00:00
return None
except phonenumberutil.NumberParseException:
return None
2019-07-09 10:57:01 +00:00
def parse_phonenumber(val, country_code=None):
s = parse_str(val)
if not s:
return None
2022-02-13 10:35:43 +00:00
phone_raw = re.sub(r"[^0-9\+]", " ", s)
phone_raw = phone_raw.strip()
2022-02-13 10:35:43 +00:00
if phone_raw.startswith("00"):
phone_raw = "+{}".format(phone_raw[2:])
if country_code and len(country_code) >= 2:
country_code = country_code[0:2].upper()
2021-10-12 12:27:35 +00:00
return _parse_with(phone_raw, None, _parse_phonenumber, country_code=country_code)
def _parse_slug(val):
return slugify(val)
2019-05-17 11:13:15 +00:00
def parse_slug(val):
s = parse_str(val)
return _parse_slug(s)
2019-05-17 11:13:15 +00:00
2019-07-09 10:57:01 +00:00
2019-05-17 11:13:15 +00:00
def parse_str(val):
if type_util.is_string(val):
2022-02-17 23:23:21 +00:00
val = ftfy.fix_text(val)
else:
val = str(val)
val = val.strip()
2022-02-13 10:35:43 +00:00
val = " ".join(val.split())
2019-06-01 15:22:00 +00:00
return val
def parse_uuid(val):
s = parse_str(val)
return s if type_util.is_uuid(s) else None