Source code for dateparser.utils

import calendar
import logging
import types
import unicodedata
from datetime import datetime

import regex as re
from tzlocal import get_localzone
from pytz import UTC, timezone, UnknownTimeZoneError
from collections import OrderedDict

from dateparser.timezone_parser import _tz_offsets, StaticTzInfo


[docs]def strip_braces(date_string): return re.sub(r'[{}()<>\[\]]+', '', date_string)
[docs]def normalize_unicode(string, form='NFKD'): return ''.join( c for c in unicodedata.normalize(form, string) if unicodedata.category(c) != 'Mn' )
[docs]def combine_dicts(primary_dict, supplementary_dict): combined_dict = OrderedDict() for key, value in primary_dict.items(): if key in supplementary_dict: if isinstance(value, list): combined_dict[key] = value + supplementary_dict[key] elif isinstance(value, dict): combined_dict[key] = combine_dicts(value, supplementary_dict[key]) else: combined_dict[key] = supplementary_dict[key] else: combined_dict[key] = primary_dict[key] remaining_keys = [key for key in supplementary_dict.keys() if key not in primary_dict.keys()] for key in remaining_keys: combined_dict[key] = supplementary_dict[key] return combined_dict
[docs]def find_date_separator(format): m = re.search(r'(?:(?:%[dbBmaA])(\W))+', format) if m: return m.group(1)
def _get_missing_parts(fmt): """ Return a list containing missing parts (day, month, year) from a date format checking its directives """ directive_mapping = { 'day': ['%d', '%-d', '%j', '%-j'], 'month': ['%b', '%B', '%m', '%-m'], 'year': ['%y', '%-y', '%Y'] } missing = [ field for field in ('day', 'month', 'year') if not any(directive in fmt for directive in directive_mapping[field]) ] return missing
[docs]def localize_timezone(date_time, tz_string): if date_time.tzinfo: return date_time tz = None try: tz = timezone(tz_string) except UnknownTimeZoneError as e: for name, info in _tz_offsets: if info['regex'].search(' %s' % tz_string): tz = StaticTzInfo(name, info['offset']) break else: raise e if hasattr(tz, 'localize'): date_time = tz.localize(date_time) else: date_time = date_time.replace(tzinfo=tz) return date_time
[docs]def apply_tzdatabase_timezone(date_time, pytz_string): usr_timezone = timezone(pytz_string) if date_time.tzinfo != usr_timezone: date_time = date_time.astimezone(usr_timezone) return date_time
[docs]def apply_dateparser_timezone(utc_datetime, offset_or_timezone_abb): for name, info in _tz_offsets: if info['regex'].search(' %s' % offset_or_timezone_abb): tz = StaticTzInfo(name, info['offset']) return utc_datetime.astimezone(tz)
[docs]def apply_timezone(date_time, tz_string): if not date_time.tzinfo: if hasattr(UTC, 'localize'): date_time = UTC.localize(date_time) else: date_time = date_time.replace(tzinfo=UTC) new_datetime = apply_dateparser_timezone(date_time, tz_string) if not new_datetime: new_datetime = apply_tzdatabase_timezone(date_time, tz_string) return new_datetime
[docs]def apply_timezone_from_settings(date_obj, settings): tz = get_localzone() if settings is None: return date_obj if 'local' in settings.TIMEZONE.lower(): if hasattr(tz, 'localize'): date_obj = tz.localize(date_obj) else: date_obj = date_obj.replace(tzinfo=tz) else: date_obj = localize_timezone(date_obj, settings.TIMEZONE) if settings.TO_TIMEZONE: date_obj = apply_timezone(date_obj, settings.TO_TIMEZONE) if settings.RETURN_AS_TIMEZONE_AWARE is not True: date_obj = date_obj.replace(tzinfo=None) return date_obj
[docs]def get_last_day_of_month(year, month): return calendar.monthrange(year, month)[1]
[docs]def get_previous_leap_year(year): return _get_leap_year(year, future=False)
[docs]def get_next_leap_year(year): return _get_leap_year(year, future=True)
def _get_leap_year(year, future): """ Iterate through previous or next years until it gets a valid leap year This is performed to avoid missing or including centurial leap years """ step = 1 if future else -1 leap_year = year + step while not calendar.isleap(leap_year): leap_year += step return leap_year
[docs]def set_correct_day_from_settings(date_obj, settings, current_day=None): """ Set correct day attending the `PREFER_DAY_OF_MONTH` setting.""" options = { 'first': 1, 'last': get_last_day_of_month(date_obj.year, date_obj.month), 'current': current_day or datetime.now().day } try: return date_obj.replace(day=options[settings.PREFER_DAY_OF_MONTH]) except ValueError: return date_obj.replace(day=options['last'])
[docs]def registry(cls): def choose(creator): def constructor(cls, *args, **kwargs): key = cls.get_key(*args, **kwargs) if not hasattr(cls, "__registry_dict"): setattr(cls, "__registry_dict", {}) registry_dict = getattr(cls, "__registry_dict") if key not in registry_dict: registry_dict[key] = creator(cls, *args) setattr(registry_dict[key], 'registry_key', key) return registry_dict[key] return staticmethod(constructor) if not (hasattr(cls, "get_key") and isinstance(cls.get_key, types.MethodType) and cls.get_key.__self__ is cls): raise NotImplementedError("Registry classes require to implement class method get_key") setattr(cls, '__new__', choose(cls.__new__)) return cls
[docs]def get_logger(): setup_logging() return logging.getLogger('dateparser')
[docs]def setup_logging(): if len(logging.root.handlers): return config = { 'version': 1, 'disable_existing_loggers': True, 'formatters': { 'console': { 'format': "%(asctime)s %(levelname)s: [%(name)s] %(message)s", }, }, 'handlers': { 'console': { 'level': logging.DEBUG, 'class': "logging.StreamHandler", 'formatter': "console", 'stream': "ext://sys.stdout", }, }, 'root': { 'level': logging.DEBUG, 'handlers': ["console"], }, } logging.config.dictConfig(config)