Source code for dateparser.utils

import calendar
import logging
import types
import unicodedata
from collections import OrderedDict
from datetime import datetime

import regex as re
from pytz import UTC, UnknownTimeZoneError, timezone
from tzlocal import get_localzone

from dateparser.timezone_parser import StaticTzInfo, _tz_offsets


[docs] def strip_braces(date_string): return re.sub(r"[{}()<>\[\]]+", "", date_string)
[docs] def normalize_unicode(string, form="NFKD"): return "".join( c for c in unicodedata.normalize(form, string) if unicodedata.category(c) != "Mn" )
[docs] def combine_dicts(primary_dict, supplementary_dict): combined_dict = OrderedDict() for key, value in primary_dict.items(): if key in supplementary_dict: if isinstance(value, list): combined_dict[key] = value + supplementary_dict[key] elif isinstance(value, dict): combined_dict[key] = combine_dicts(value, supplementary_dict[key]) else: combined_dict[key] = supplementary_dict[key] else: combined_dict[key] = primary_dict[key] remaining_keys = [ key for key in supplementary_dict.keys() if key not in primary_dict.keys() ] for key in remaining_keys: combined_dict[key] = supplementary_dict[key] return combined_dict
[docs] def find_date_separator(format): m = re.search(r"(?:(?:%[dbBmaA])(\W))+", format) if m: return m.group(1)
def _get_missing_parts(fmt): """ Return a list containing missing parts (day, month, year) from a date format checking its directives """ directive_mapping = { "day": ["%d", "%-d", "%j", "%-j"], "month": ["%b", "%B", "%m", "%-m"], "year": ["%y", "%-y", "%Y"], } missing = [ field for field in ("day", "month", "year") if not any(directive in fmt for directive in directive_mapping[field]) ] return missing
[docs] def get_timezone_from_tz_string(tz_string): try: return timezone(tz_string) except UnknownTimeZoneError as e: for name, info in _tz_offsets: if info["regex"].search(" %s" % tz_string): return StaticTzInfo(name, info["offset"]) else: raise e
[docs] def localize_timezone(date_time, tz_string): if date_time.tzinfo: return date_time tz = get_timezone_from_tz_string(tz_string) if hasattr(tz, "localize"): date_time = tz.localize(date_time) else: date_time = date_time.replace(tzinfo=tz) return date_time
[docs] def apply_tzdatabase_timezone(date_time, pytz_string): usr_timezone = timezone(pytz_string) if date_time.tzinfo != usr_timezone: date_time = date_time.astimezone(usr_timezone) return date_time
[docs] def apply_dateparser_timezone(utc_datetime, offset_or_timezone_abb): for name, info in _tz_offsets: if info["regex"].search(" %s" % offset_or_timezone_abb): tz = StaticTzInfo(name, info["offset"]) return utc_datetime.astimezone(tz)
[docs] def apply_timezone(date_time, tz_string): if not date_time.tzinfo: if hasattr(UTC, "localize"): date_time = UTC.localize(date_time) else: date_time = date_time.replace(tzinfo=UTC) new_datetime = apply_dateparser_timezone(date_time, tz_string) if not new_datetime: new_datetime = apply_tzdatabase_timezone(date_time, tz_string) return new_datetime
[docs] def apply_timezone_from_settings(date_obj, settings): tz = get_localzone() if settings is None: return date_obj if "local" in settings.TIMEZONE.lower(): if hasattr(tz, "localize"): date_obj = tz.localize(date_obj) else: date_obj = date_obj.replace(tzinfo=tz) else: date_obj = localize_timezone(date_obj, settings.TIMEZONE) if settings.TO_TIMEZONE: date_obj = apply_timezone(date_obj, settings.TO_TIMEZONE) if settings.RETURN_AS_TIMEZONE_AWARE is not True: date_obj = date_obj.replace(tzinfo=None) return date_obj
[docs] def get_last_day_of_month(year, month): return calendar.monthrange(year, month)[1]
[docs] def get_previous_leap_year(year): return _get_leap_year(year, future=False)
[docs] def get_next_leap_year(year): return _get_leap_year(year, future=True)
def _get_leap_year(year, future): """ Iterate through previous or next years until it gets a valid leap year This is performed to avoid missing or including centurial leap years """ step = 1 if future else -1 leap_year = year + step while not calendar.isleap(leap_year): leap_year += step return leap_year
[docs] def set_correct_day_from_settings(date_obj, settings, current_day=None): """Set correct day attending the `PREFER_DAY_OF_MONTH` setting.""" options = { "first": 1, "last": get_last_day_of_month(date_obj.year, date_obj.month), "current": current_day or datetime.now().day, } try: return date_obj.replace(day=options[settings.PREFER_DAY_OF_MONTH]) except ValueError: return date_obj.replace(day=options["last"])
[docs] def set_correct_month_from_settings(date_obj, settings, current_month=None): """Set correct month attending the `PREFER_MONTH_OF_YEAR` setting.""" options = {"first": 1, "last": 12, "current": current_month or datetime.now().month} try: return date_obj.replace(month=options[settings.PREFER_MONTH_OF_YEAR]) except ValueError: return date_obj.replace(month=options["last"])
[docs] def registry(cls): def choose(creator): def constructor(cls, *args, **kwargs): key = cls.get_key(*args, **kwargs) if not hasattr(cls, "__registry_dict"): setattr(cls, "__registry_dict", {}) registry_dict = getattr(cls, "__registry_dict") if key not in registry_dict: registry_dict[key] = creator(cls, *args) setattr(registry_dict[key], "registry_key", key) return registry_dict[key] return staticmethod(constructor) if not ( hasattr(cls, "get_key") and isinstance(cls.get_key, types.MethodType) and cls.get_key.__self__ is cls ): raise NotImplementedError( "Registry classes require to implement class method get_key" ) setattr(cls, "__new__", choose(cls.__new__)) return cls
[docs] def get_logger(): setup_logging() return logging.getLogger("dateparser")
[docs] def setup_logging(): if len(logging.root.handlers): return config = { "version": 1, "disable_existing_loggers": True, "formatters": { "console": { "format": "%(asctime)s %(levelname)s: [%(name)s] %(message)s", }, }, "handlers": { "console": { "level": logging.DEBUG, "class": "logging.StreamHandler", "formatter": "console", "stream": "ext://sys.stdout", }, }, "root": { "level": logging.DEBUG, "handlers": ["console"], }, } logging.config.dictConfig(config)