PTN 1.3

2020-05-01 18:03:37 +02:00
parent 1152befcf1
commit e60525fee1
95 changed files with 306 additions and 21444 deletions
@@ -1,15 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-"""
-Common module
-"""
-import re
-
-seps = r' [](){}+*|=-_~#/\\.,;:'  # list of tags/words separators
-seps_no_groups = seps.replace('[](){}', '')
-seps_no_fs = seps.replace('/', '').replace('\\', '')
-
-title_seps = r'-+/\|'  # separators for title
-
-dash = (r'-', r'['+re.escape(seps_no_fs)+']')  # abbreviation used by many rebulk objects.
-alt_dash = (r'@', r'['+re.escape(seps_no_fs)+']')  # abbreviation used by many rebulk objects.
@@ -1,75 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-"""
-Comparators
-"""
-try:
-    from functools import cmp_to_key
-except ImportError:
-    from ...backports import cmp_to_key
-
-
-def marker_comparator_predicate(match):
-    """
-    Match predicate used in comparator
-    """
-    return (
-        not match.private
-        and match.name not in ('proper_count', 'title')
-        and not (match.name == 'container' and 'extension' in match.tags)
-        and not (match.name == 'other' and match.value == 'Rip')
-    )
-
-
-def marker_weight(matches, marker, predicate):
-    """
-    Compute the comparator weight of a marker
-    :param matches:
-    :param marker:
-    :param predicate:
-    :return:
-    """
-    return len(set(match.name for match in matches.range(*marker.span, predicate=predicate)))
-
-
-def marker_comparator(matches, markers, predicate):
-    """
-    Builds a comparator that returns markers sorted from the most valuable to the less.
-
-    Take the parts where matches count is higher, then when length is higher, then when position is at left.
-
-    :param matches:
-    :type matches:
-    :param markers:
-    :param predicate:
-    :return:
-    :rtype:
-    """
-
-    def comparator(marker1, marker2):
-        """
-        The actual comparator function.
-        """
-        matches_count = marker_weight(matches, marker2, predicate) - marker_weight(matches, marker1, predicate)
-        if matches_count:
-            return matches_count
-
-        # give preference to rightmost path
-        return markers.index(marker2) - markers.index(marker1)
-
-    return comparator
-
-
-def marker_sorted(markers, matches, predicate=marker_comparator_predicate):
-    """
-    Sort markers from matches, from the most valuable to the less.
-
-    :param markers:
-    :type markers:
-    :param matches:
-    :type matches:
-    :param predicate:
-    :return:
-    :rtype:
-    """
-    return sorted(markers, key=cmp_to_key(marker_comparator(matches, markers, predicate=predicate)))
@@ -1,125 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-"""
-Date
-"""
-from dateutil import parser
-
-from rebulk.remodule import re
-
-_dsep = r'[-/ \.]'
-_dsep_bis = r'[-/ \.x]'
-
-date_regexps = [
-    re.compile(r'%s((\d{8}))%s' % (_dsep, _dsep), re.IGNORECASE),
-    re.compile(r'%s((\d{6}))%s' % (_dsep, _dsep), re.IGNORECASE),
-    re.compile(r'(?:^|[^\d])((\d{2})%s(\d{1,2})%s(\d{1,2}))(?:$|[^\d])' % (_dsep, _dsep), re.IGNORECASE),
-    re.compile(r'(?:^|[^\d])((\d{1,2})%s(\d{1,2})%s(\d{2}))(?:$|[^\d])' % (_dsep, _dsep), re.IGNORECASE),
-    re.compile(r'(?:^|[^\d])((\d{4})%s(\d{1,2})%s(\d{1,2}))(?:$|[^\d])' % (_dsep_bis, _dsep), re.IGNORECASE),
-    re.compile(r'(?:^|[^\d])((\d{1,2})%s(\d{1,2})%s(\d{4}))(?:$|[^\d])' % (_dsep, _dsep_bis), re.IGNORECASE),
-    re.compile(r'(?:^|[^\d])((\d{1,2}(?:st|nd|rd|th)?%s(?:[a-z]{3,10})%s\d{4}))(?:$|[^\d])' % (_dsep, _dsep),
-               re.IGNORECASE)]
-
-
-def valid_year(year):
-    """Check if number is a valid year"""
-    return 1920 <= year < 2030
-
-
-def _is_int(string):
-    """
-    Check if the input string is an integer
-
-    :param string:
-    :type string:
-    :return:
-    :rtype:
-    """
-    try:
-        int(string)
-        return True
-    except ValueError:
-        return False
-
-
-def _guess_day_first_parameter(groups):  # pylint:disable=inconsistent-return-statements
-    """
-    If day_first is not defined, use some heuristic to fix it.
-    It helps to solve issues with python dateutils 2.5.3 parser changes.
-
-    :param groups: match groups found for the date
-    :type groups: list of match objects
-    :return: day_first option guessed value
-    :rtype: bool
-    """
-
-    # If match starts with a long year, then day_first is force to false.
-    if _is_int(groups[0]) and valid_year(int(groups[0][:4])):
-        return False
-    # If match ends with a long year, the day_first is forced to true.
-    if _is_int(groups[-1]) and valid_year(int(groups[-1][-4:])):
-        return True
-    # If match starts with a short year, then day_first is force to false.
-    if _is_int(groups[0]) and int(groups[0][:2]) > 31:
-        return False
-    # If match ends with a short year, then day_first is force to true.
-    if _is_int(groups[-1]) and int(groups[-1][-2:]) > 31:
-        return True
-
-
-def search_date(string, year_first=None, day_first=None):  # pylint:disable=inconsistent-return-statements
-    """Looks for date patterns, and if found return the date and group span.
-
-    Assumes there are sentinels at the beginning and end of the string that
-    always allow matching a non-digit delimiting the date.
-
-    Year can be defined on two digit only. It will return the nearest possible
-    date from today.
-
-    >>> search_date(' This happened on 2002-04-22. ')
-    (18, 28, datetime.date(2002, 4, 22))
-
-    >>> search_date(' And this on 17-06-1998. ')
-    (13, 23, datetime.date(1998, 6, 17))
-
-    >>> search_date(' no date in here ')
-    """
-    for date_re in date_regexps:
-        search_match = date_re.search(string)
-        if not search_match:
-            continue
-
-        start, end = search_match.start(1), search_match.end(1)
-        groups = search_match.groups()[1:]
-        match = '-'.join(groups)
-
-        if match is None:
-            continue
-
-        if year_first and day_first is None:
-            day_first = False
-
-        if day_first is None:
-            day_first = _guess_day_first_parameter(groups)
-
-        # If day_first/year_first is undefined, parse is made using both possible values.
-        yearfirst_opts = [False, True]
-        if year_first is not None:
-            yearfirst_opts = [year_first]
-
-        dayfirst_opts = [True, False]
-        if day_first is not None:
-            dayfirst_opts = [day_first]
-
-        kwargs_list = ({'dayfirst': d, 'yearfirst': y}
-                       for d in dayfirst_opts for y in yearfirst_opts)
-        for kwargs in kwargs_list:
-            try:
-                date = parser.parse(match, **kwargs)
-            except (ValueError, TypeError):  # pragma: no cover
-                # see https://bugs.launchpad.net/dateutil/+bug/1247643
-                date = None
-
-            # check date plausibility
-            if date and valid_year(date.year):  # pylint:disable=no-member
-                return start, end, date.date()  # pylint:disable=no-member
@@ -1,53 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-"""
-Expected property factory
-"""
-import re
-
-from rebulk import Rebulk
-from rebulk.utils import find_all
-
-from . import dash, seps
-
-
-def build_expected_function(context_key):
-    """
-    Creates a expected property function
-    :param context_key:
-    :type context_key:
-    :param cleanup:
-    :type cleanup:
-    :return:
-    :rtype:
-    """
-
-    def expected(input_string, context):
-        """
-        Expected property functional pattern.
-        :param input_string:
-        :type input_string:
-        :param context:
-        :type context:
-        :return:
-        :rtype:
-        """
-        ret = []
-        for search in context.get(context_key):
-            if search.startswith('re:'):
-                search = search[3:]
-                search = search.replace(' ', '-')
-                matches = Rebulk().regex(search, abbreviations=[dash], flags=re.IGNORECASE) \
-                    .matches(input_string, context)
-                for match in matches:
-                    ret.append(match.span)
-            else:
-                value = search
-                for sep in seps:
-                    input_string = input_string.replace(sep, ' ')
-                    search = search.replace(sep, ' ')
-                for start in find_all(input_string, search, ignore_case=True):
-                    ret.append({'start': start, 'end': start + len(search), 'value': value})
-        return ret
-
-    return expected
@@ -1,136 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-"""
-Formatters
-"""
-from rebulk.formatters import formatters
-from rebulk.remodule import re
-from . import seps
-
-_excluded_clean_chars = ',:;-/\\'
-clean_chars = ""
-for sep in seps:
-    if sep not in _excluded_clean_chars:
-        clean_chars += sep
-
-
-def _potential_before(i, input_string):
-    """
-    Check if the character at position i can be a potential single char separator considering what's before it.
-
-    :param i:
-    :type i: int
-    :param input_string:
-    :type input_string: str
-    :return:
-    :rtype: bool
-    """
-    return i - 1 >= 0 and input_string[i] in seps and input_string[i - 2] in seps and input_string[i - 1] not in seps
-
-
-def _potential_after(i, input_string):
-    """
-    Check if the character at position i can be a potential single char separator considering what's after it.
-
-    :param i:
-    :type i: int
-    :param input_string:
-    :type input_string: str
-    :return:
-    :rtype: bool
-    """
-    return i + 2 >= len(input_string) or \
-           input_string[i + 2] == input_string[i] and input_string[i + 1] not in seps
-
-
-def cleanup(input_string):
-    """
-    Removes and strip separators from input_string (but keep ',;' characters)
-
-    It also keep separators for single characters (Mavels Agents of S.H.I.E.L.D.)
-
-    :param input_string:
-    :type input_string: str
-    :return:
-    :rtype:
-    """
-    clean_string = input_string
-    for char in clean_chars:
-        clean_string = clean_string.replace(char, ' ')
-
-    # Restore input separator if they separate single characters.
-    # Useful for Mavels Agents of S.H.I.E.L.D.
-    # https://github.com/guessit-io/guessit/issues/278
-
-    indices = [i for i, letter in enumerate(clean_string) if letter in seps]
-
-    dots = set()
-    if indices:
-        clean_list = list(clean_string)
-
-        potential_indices = []
-
-        for i in indices:
-            if _potential_before(i, input_string) and _potential_after(i, input_string):
-                potential_indices.append(i)
-
-        replace_indices = []
-
-        for potential_index in potential_indices:
-            if potential_index - 2 in potential_indices or potential_index + 2 in potential_indices:
-                replace_indices.append(potential_index)
-
-        if replace_indices:
-            for replace_index in replace_indices:
-                dots.add(input_string[replace_index])
-                clean_list[replace_index] = input_string[replace_index]
-            clean_string = ''.join(clean_list)
-
-    clean_string = strip(clean_string, ''.join([c for c in seps if c not in dots]))
-
-    clean_string = re.sub(' +', ' ', clean_string)
-    return clean_string
-
-
-def strip(input_string, chars=seps):
-    """
-    Strip separators from input_string
-    :param input_string:
-    :param chars:
-    :type input_string:
-    :return:
-    :rtype:
-    """
-    return input_string.strip(chars)
-
-
-def raw_cleanup(raw):
-    """
-    Cleanup a raw value to perform raw comparison
-    :param raw:
-    :type raw:
-    :return:
-    :rtype:
-    """
-    return formatters(cleanup, strip)(raw.lower())
-
-
-def reorder_title(title, articles=('the',), separators=(',', ', ')):
-    """
-    Reorder the title
-    :param title:
-    :type title:
-    :param articles:
-    :type articles:
-    :param separators:
-    :type separators:
-    :return:
-    :rtype:
-    """
-    ltitle = title.lower()
-    for article in articles:
-        for separator in separators:
-            suffix = separator + article
-            if ltitle[-len(suffix):] == suffix:
-                return title[-len(suffix) + len(separator):] + ' ' + title[:-len(suffix)]
-    return title
@@ -1,165 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-"""
-parse numeral from various formats
-"""
-from rebulk.remodule import re
-
-digital_numeral = r'\d{1,4}'
-
-roman_numeral = r'(?=[MCDLXVI]+)M{0,4}(?:CM|CD|D?C{0,3})(?:XC|XL|L?X{0,3})(?:IX|IV|V?I{0,3})'
-
-english_word_numeral_list = [
-    'zero', 'one', 'two', 'three', 'four', 'five', 'six', 'seven', 'eight', 'nine', 'ten',
-    'eleven', 'twelve', 'thirteen', 'fourteen', 'fifteen', 'sixteen', 'seventeen', 'eighteen', 'nineteen', 'twenty'
-]
-
-french_word_numeral_list = [
-    'zéro', 'un', 'deux', 'trois', 'quatre', 'cinq', 'six', 'sept', 'huit', 'neuf', 'dix',
-    'onze', 'douze', 'treize', 'quatorze', 'quinze', 'seize', 'dix-sept', 'dix-huit', 'dix-neuf', 'vingt'
-]
-
-french_alt_word_numeral_list = [
-    'zero', 'une', 'deux', 'trois', 'quatre', 'cinq', 'six', 'sept', 'huit', 'neuf', 'dix',
-    'onze', 'douze', 'treize', 'quatorze', 'quinze', 'seize', 'dixsept', 'dixhuit', 'dixneuf', 'vingt'
-]
-
-
-def __build_word_numeral(*args):
-    """
-    Build word numeral regexp from list.
-
-    :param args:
-    :type args:
-    :param kwargs:
-    :type kwargs:
-    :return:
-    :rtype:
-    """
-    re_ = None
-    for word_list in args:
-        for word in word_list:
-            if not re_:
-                re_ = r'(?:(?=\w+)'
-            else:
-                re_ += '|'
-            re_ += word
-    re_ += ')'
-    return re_
-
-
-word_numeral = __build_word_numeral(english_word_numeral_list, french_word_numeral_list, french_alt_word_numeral_list)
-
-numeral = '(?:' + digital_numeral + '|' + roman_numeral + '|' + word_numeral + ')'
-
-__romanNumeralMap = (
-    ('M', 1000),
-    ('CM', 900),
-    ('D', 500),
-    ('CD', 400),
-    ('C', 100),
-    ('XC', 90),
-    ('L', 50),
-    ('XL', 40),
-    ('X', 10),
-    ('IX', 9),
-    ('V', 5),
-    ('IV', 4),
-    ('I', 1)
-)
-
-__romanNumeralPattern = re.compile('^' + roman_numeral + '$')
-
-
-def __parse_roman(value):
-    """
-    convert Roman numeral to integer
-
-    :param value: Value to parse
-    :type value: string
-    :return:
-    :rtype:
-    """
-    if not __romanNumeralPattern.search(value):
-        raise ValueError('Invalid Roman numeral: %s' % value)
-
-    result = 0
-    index = 0
-    for num, integer in __romanNumeralMap:
-        while value[index:index + len(num)] == num:
-            result += integer
-            index += len(num)
-    return result
-
-
-def __parse_word(value):
-    """
-    Convert Word numeral to integer
-
-    :param value: Value to parse
-    :type value: string
-    :return:
-    :rtype:
-    """
-    for word_list in [english_word_numeral_list, french_word_numeral_list, french_alt_word_numeral_list]:
-        try:
-            return word_list.index(value.lower())
-        except ValueError:
-            pass
-    raise ValueError  # pragma: no cover
-
-
-_clean_re = re.compile(r'[^\d]*(\d+)[^\d]*')
-
-
-def parse_numeral(value, int_enabled=True, roman_enabled=True, word_enabled=True, clean=True):
-    """
-    Parse a numeric value into integer.
-
-    :param value: Value to parse. Can be an integer, roman numeral or word.
-    :type value: string
-    :param int_enabled:
-    :type int_enabled:
-    :param roman_enabled:
-    :type roman_enabled:
-    :param word_enabled:
-    :type word_enabled:
-    :param clean:
-    :type clean:
-    :return: Numeric value, or None if value can't be parsed
-    :rtype: int
-    """
-    # pylint: disable=too-many-branches
-    if int_enabled:
-        try:
-            if clean:
-                match = _clean_re.match(value)
-                if match:
-                    clean_value = match.group(1)
-                    return int(clean_value)
-            return int(value)
-        except ValueError:
-            pass
-    if roman_enabled:
-        try:
-            if clean:
-                for word in value.split():
-                    try:
-                        return __parse_roman(word.upper())
-                    except ValueError:
-                        pass
-            return __parse_roman(value)
-        except ValueError:
-            pass
-    if word_enabled:
-        try:
-            if clean:
-                for word in value.split():
-                    try:
-                        return __parse_word(word)
-                    except ValueError:  # pragma: no cover
-                        pass
-            return __parse_word(value)  # pragma: no cover
-        except ValueError:  # pragma: no cover
-            pass
-    raise ValueError('Invalid numeral: ' + value)   # pragma: no cover
@@ -1,27 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-"""
-Pattern utility functions
-"""
-
-
-def is_disabled(context, name):
-    """Whether a specific pattern is disabled.
-
-    The context object might define an inclusion list (includes) or an exclusion list (excludes)
-    A pattern is considered disabled if it's found in the exclusion list or
-    it's not found in the inclusion list and the inclusion list is not empty or not defined.
-
-    :param context:
-    :param name:
-    :return:
-    """
-    if not context:
-        return False
-
-    excludes = context.get('excludes')
-    if excludes and name in excludes:
-        return True
-
-    includes = context.get('includes')
-    return includes and name not in includes
@@ -1,106 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-"""
-Quantities: Size
-"""
-import re
-from abc import abstractmethod
-
-import six
-
-from ..common import seps
-
-
-class Quantity(object):
-    """
-    Represent a quantity object with magnitude and units.
-    """
-
-    parser_re = re.compile(r'(?P<magnitude>\d+(?:[.]\d+)?)(?P<units>[^\d]+)?')
-
-    def __init__(self, magnitude, units):
-        self.magnitude = magnitude
-        self.units = units
-
-    @classmethod
-    @abstractmethod
-    def parse_units(cls, value):
-        """
-        Parse a string to a proper unit notation.
-        """
-        raise NotImplementedError
-
-    @classmethod
-    def fromstring(cls, string):
-        """
-        Parse the string into a quantity object.
-        :param string:
-        :return:
-        """
-        values = cls.parser_re.match(string).groupdict()
-        try:
-            magnitude = int(values['magnitude'])
-        except ValueError:
-            magnitude = float(values['magnitude'])
-        units = cls.parse_units(values['units'])
-
-        return cls(magnitude, units)
-
-    def __hash__(self):
-        return hash(str(self))
-
-    def __eq__(self, other):
-        if isinstance(other, six.string_types):
-            return str(self) == other
-        if not isinstance(other, self.__class__):
-            return NotImplemented
-        return self.magnitude == other.magnitude and self.units == other.units
-
-    def __ne__(self, other):
-        return not self == other
-
-    def __repr__(self):
-        return '<{0} [{1}]>'.format(self.__class__.__name__, self)
-
-    def __str__(self):
-        return '{0}{1}'.format(self.magnitude, self.units)
-
-
-class Size(Quantity):
-    """
-    Represent size.
-
-    e.g.: 1.1GB, 300MB
-    """
-
-    @classmethod
-    def parse_units(cls, value):
-        return value.strip(seps).upper()
-
-
-class BitRate(Quantity):
-    """
-    Represent bit rate.
-
-    e.g.: 320Kbps, 1.5Mbps
-    """
-
-    @classmethod
-    def parse_units(cls, value):
-        value = value.strip(seps).capitalize()
-        for token in ('bits', 'bit'):
-            value = value.replace(token, 'bps')
-
-        return value
-
-
-class FrameRate(Quantity):
-    """
-    Represent frame rate.
-
-    e.g.: 24fps, 60fps
-    """
-
-    @classmethod
-    def parse_units(cls, value):
-        return 'fps'
@@ -1,74 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-"""
-Validators
-"""
-from functools import partial
-
-from rebulk.validators import chars_before, chars_after, chars_surround
-from . import seps
-
-seps_before = partial(chars_before, seps)
-seps_after = partial(chars_after, seps)
-seps_surround = partial(chars_surround, seps)
-
-
-def int_coercable(string):
-    """
-    Check if string can be coerced to int
-    :param string:
-    :type string:
-    :return:
-    :rtype:
-    """
-    try:
-        int(string)
-        return True
-    except ValueError:
-        return False
-
-
-def and_(*validators):
-    """
-    Compose validators functions
-    :param validators:
-    :type validators:
-    :return:
-    :rtype:
-    """
-    def composed(string):
-        """
-        Composed validators function
-        :param string:
-        :type string:
-        :return:
-        :rtype:
-        """
-        for validator in validators:
-            if not validator(string):
-                return False
-        return True
-    return composed
-
-
-def or_(*validators):
-    """
-    Compose validators functions
-    :param validators:
-    :type validators:
-    :return:
-    :rtype:
-    """
-    def composed(string):
-        """
-        Composed validators function
-        :param string:
-        :type string:
-        :return:
-        :rtype:
-        """
-        for validator in validators:
-            if validator(string):
-                return True
-        return False
-    return composed
@@ -1,34 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-"""
-Words utils
-"""
-from collections import namedtuple
-
-from . import seps
-
-_Word = namedtuple('_Word', ['span', 'value'])
-
-
-def iter_words(string):
-    """
-    Iterate on all words in a string
-    :param string:
-    :type string:
-    :return:
-    :rtype: iterable[str]
-    """
-    i = 0
-    last_sep_index = -1
-    inside_word = False
-    for char in string:
-        if ord(char) < 128 and char in seps:  # Make sure we don't exclude unicode characters.
-            if inside_word:
-                yield _Word(span=(last_sep_index+1, i), value=string[last_sep_index+1:i])
-            inside_word = False
-            last_sep_index = i
-        else:
-            inside_word = True
-        i += 1
-    if inside_word:
-        yield _Word(span=(last_sep_index+1, i), value=string[last_sep_index+1:i])