import hashlib
from datetime import datetime
from functools import wraps
from dateparser.data.languages_info import language_order
from .parser import date_order_chart
from .utils import registry
[docs]
@registry
class Settings:
"""Control and configure default parsing behavior of dateparser.
Currently, supported settings are:
* `DATE_ORDER`
* `PREFER_LOCALE_DATE_ORDER`
* `TIMEZONE`
* `TO_TIMEZONE`
* `RETURN_AS_TIMEZONE_AWARE`
* `PREFER_MONTH_OF_YEAR`
* `PREFER_DAY_OF_MONTH`
* `PREFER_DATES_FROM`
* `RELATIVE_BASE`
* `STRICT_PARSING`
* `REQUIRE_PARTS`
* `SKIP_TOKENS`
* `NORMALIZE`
* `RETURN_TIME_AS_PERIOD`
* `PARSERS`
* `DEFAULT_LANGUAGES`
* `LANGUAGE_DETECTION_CONFIDENCE_THRESHOLD`
* `CACHE_SIZE_LIMIT`
"""
_default = True
_pyfile_data = None
_mod_settings = dict()
def __init__(self, settings=None):
if settings:
self._updateall(settings.items())
else:
self._updateall(self._get_settings_from_pyfile().items())
[docs]
@classmethod
def get_key(cls, settings=None):
if not settings:
return "default"
keys = sorted(["%s-%s" % (key, str(settings[key])) for key in settings])
return hashlib.md5("".join(keys).encode("utf-8")).hexdigest()
@classmethod
def _get_settings_from_pyfile(cls):
if not cls._pyfile_data:
from dateparser_data import settings
cls._pyfile_data = settings.settings
return cls._pyfile_data
def _updateall(self, iterable):
for key, value in iterable:
setattr(self, key, value)
[docs]
def replace(self, mod_settings=None, **kwds):
for k, v in kwds.items():
if v is None:
raise TypeError('Invalid {{"{}": {}}}'.format(k, v))
for x in self._get_settings_from_pyfile().keys():
kwds.setdefault(x, getattr(self, x))
kwds["_default"] = False
if mod_settings:
kwds["_mod_settings"] = mod_settings
return self.__class__(settings=kwds)
settings = Settings()
[docs]
def apply_settings(f):
@wraps(f)
def wrapper(*args, **kwargs):
mod_settings = kwargs.get("settings")
kwargs["settings"] = mod_settings or settings
if isinstance(kwargs["settings"], dict):
kwargs["settings"] = settings.replace(
mod_settings=mod_settings, **kwargs["settings"]
)
if not isinstance(kwargs["settings"], Settings):
raise TypeError(
"settings can only be either dict or instance of Settings class"
)
return f(*args, **kwargs)
return wrapper
[docs]
class SettingValidationError(ValueError):
pass
def _check_repeated_values(setting_name, setting_value):
if len(setting_value) != len(set(setting_value)):
raise SettingValidationError(
'There are repeated values in the "{}" setting'.format(setting_name)
)
return
def _check_require_part(setting_name, setting_value):
"""Returns `True` if the provided list of parts contains valid values"""
invalid_values = set(setting_value) - {"day", "month", "year"}
if invalid_values:
raise SettingValidationError(
'"{}" setting contains invalid values: {}'.format(
setting_name, ", ".join(invalid_values)
)
)
_check_repeated_values(setting_name, setting_value)
def _check_parsers(setting_name, setting_value):
"""Returns `True` if the provided list of parsers contains valid values"""
existing_parsers = [
"timestamp",
"relative-time",
"custom-formats",
"absolute-time",
"no-spaces-time",
"negative-timestamp",
] # FIXME: Extract the list of existing parsers from another place (#798)
unknown_parsers = set(setting_value) - set(existing_parsers)
if unknown_parsers:
raise SettingValidationError(
'Found unknown parsers in the "{}" setting: {}'.format(
setting_name, ", ".join(unknown_parsers)
)
)
_check_repeated_values(setting_name, setting_value)
def _check_default_languages(setting_name, setting_value):
unsupported_languages = set(setting_value) - set(language_order)
if unsupported_languages:
raise SettingValidationError(
"Found invalid languages in the '{}' setting: {}".format(
setting_name, ", ".join(map(repr, unsupported_languages))
)
)
_check_repeated_values(setting_name, setting_value)
def _check_between_0_and_1(setting_name, setting_value):
is_valid = 0 <= setting_value <= 1
if not is_valid:
raise SettingValidationError(
"{} is not a valid value for {}. It can take values between 0 and "
"1.".format(
setting_value,
setting_name,
)
)
[docs]
def check_settings(settings):
"""
Check if provided settings are valid, if not it raises `SettingValidationError`.
Only checks for the modified settings.
"""
settings_values = {
"DATE_ORDER": {
"values": tuple(date_order_chart.keys()),
"type": str,
},
"TIMEZONE": {
# we don't check invalid Timezones as they raise an error
"type": str,
},
"TO_TIMEZONE": {
# It defaults to None, but it's not allowed to use it directly
# "values" can take unlimited options
"type": str
},
"RETURN_AS_TIMEZONE_AWARE": {
# It defaults to 'default', but it's not allowed to use it directly
"type": bool
},
"PREFER_MONTH_OF_YEAR": {"values": ("current", "first", "last"), "type": str},
"PREFER_DAY_OF_MONTH": {"values": ("current", "first", "last"), "type": str},
"PREFER_DATES_FROM": {
"values": ("current_period", "past", "future"),
"type": str,
},
"RELATIVE_BASE": {
# "values" can take unlimited options
"type": datetime
},
"STRICT_PARSING": {"type": bool},
"REQUIRE_PARTS": {
# "values" covered by the 'extra_check'
"type": list,
"extra_check": _check_require_part,
},
"SKIP_TOKENS": {
# "values" can take unlimited options
"type": list,
},
"NORMALIZE": {"type": bool},
"RETURN_TIME_AS_PERIOD": {"type": bool},
"PARSERS": {
# "values" covered by the 'extra_check'
"type": list,
"extra_check": _check_parsers,
},
"FUZZY": {"type": bool},
"PREFER_LOCALE_DATE_ORDER": {"type": bool},
"DEFAULT_LANGUAGES": {"type": list, "extra_check": _check_default_languages},
"LANGUAGE_DETECTION_CONFIDENCE_THRESHOLD": {
"type": float,
"extra_check": _check_between_0_and_1,
},
"CACHE_SIZE_LIMIT": {
"type": int,
},
}
modified_settings = settings._mod_settings # check only modified settings
# check settings keys:
for setting in modified_settings:
if setting not in settings_values:
raise SettingValidationError('"{}" is not a valid setting'.format(setting))
for setting_name, setting_value in modified_settings.items():
setting_type = type(setting_value)
setting_props = settings_values[setting_name]
# check type:
if not setting_type == setting_props["type"]:
raise SettingValidationError(
'"{}" must be "{}", not "{}".'.format(
setting_name, setting_props["type"].__name__, setting_type.__name__
)
)
# check values:
if setting_props.get("values") and setting_value not in setting_props["values"]:
raise SettingValidationError(
'"{}" is not a valid value for "{}", it should be: "{}" or "{}"'.format(
setting_value,
setting_name,
'", "'.join(setting_props["values"][:-1]),
setting_props["values"][-1],
)
)
# specific checks
extra_check = setting_props.get("extra_check")
if extra_check:
extra_check(setting_name, setting_value)