PTN 1.3

2020-05-01 18:03:37 +02:00
parent 1152befcf1
commit e60525fee1
95 changed files with 306 additions and 21444 deletions
@@ -7,9 +7,6 @@ import os
 import re
 import sys
 import lib.babelfish
 from lib.guessit import guessit
 PY3 = False
 if sys.version_info[0] >= 3: PY3 = True; unicode = str; unichr = chr; long = int
 if PY3:
@@ -274,13 +271,14 @@ def scrapeBlock(item, args, block, patron, headers, action, pagination, debug, t
        longtitle = title + (s if title and title2 else '') + title2 + '\n'
        if sceneTitle:
-            parsedTitle = guessit(title)
+            import lib.PTN.parse as parse
            parsedTitle = parse(title)
            title = longtitle = parsedTitle.get('title', '')
            log('TITOLO',title)
-            if parsedTitle.get('source'):
+            if parsedTitle.get('quality'):
-                quality = str(parsedTitle.get('source'))
+                quality = str(parsedTitle.get('quality'))
-                if parsedTitle.get('screen_size'):
+                if parsedTitle.get('resolution'):
-                    quality += ' ' + str(parsedTitle.get('screen_size', ''))
+                    quality += ' ' + str(parsedTitle.get('resolution', ''))
            if not scraped['year']:
                infolabels['year'] = parsedTitle.get('year', '')
            if parsedTitle.get('episode') and parsedTitle.get('season'):
@@ -299,8 +297,8 @@ def scrapeBlock(item, args, block, patron, headers, action, pagination, debug, t
                longtitle += s + config.get_localized_string(30140) + " " +str(parsedTitle.get('season')[0]) + '-' + str(parsedTitle.get('season')[-1])
            elif parsedTitle.get('season'):
                longtitle += s + config.get_localized_string(60027) % str(parsedTitle.get('season'))
-            if parsedTitle.get('episode_title'):
+            if parsedTitle.get('episodeName'):
-                longtitle += s + parsedTitle.get('episode_title')
+                longtitle += s + parsedTitle.get('episodeName')
        longtitle = typo(longtitle, 'bold')
        lang1, longtitle = scrapeLang(scraped, lang, longtitle)
@@ -0,0 +1,15 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 # https://github.com/platelminto/parse-torrent-name
 from .parse import PTN
 __author__ = 'Giorgio Momigliano'
 __email__ = 'gmomigliano@protonmail.com'
 __version__ = '1.3'
 __license__ = 'MIT'
 ptn = PTN()
 def parse(name):
    return ptn.parse(name)
@@ -0,0 +1,197 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 import re
 from .patterns import patterns, types, exceptions, delimiters, episode_pattern
 class PTN(object):
    def _escape_regex(self, string):
        return re.sub('[\-\[\]{}()*+?.,\\\^$|#\s]', '\\$&', string)
    def __init__(self):
        self.torrent = None
        self.excess_raw = None
        self.group_raw = None
        self.start = None
        self.end = None
        self.title_raw = None
        self.parts = None
    def _part(self, name, match, raw, clean):
        # The main core instructuions
        self.parts[name] = clean
        if len(match) != 0:
            # The instructions for extracting title
            index = self.torrent['name'].find(match[0])
            if index == 0:
                self.start = len(match[0])
            elif self.end is None or index < self.end:
                self.end = index
        if name != 'excess':
            # The instructions for adding excess
            if name == 'group':
                self.group_raw = raw
            if raw is not None:
                self.excess_raw = self.excess_raw.replace(raw, '')
    @staticmethod
    def _get_pattern(pattern):
        return [p[1] for p in patterns if p[0] == pattern][0]
    @staticmethod
    def _clean_string(string):
        clean = re.sub('^ -', '', string)
        if clean.find(' ') == -1 and clean.find('.') != -1:
            clean = re.sub('\.', ' ', clean)
        clean = re.sub('_', ' ', clean)
        clean = re.sub('([\[\(_]|- )$', '', clean).strip()
        clean = clean.strip(' _-')
        return clean
    def parse(self, name):
        name = name.strip()
        self.parts = {}
        self.torrent = {'name': name}
        self.excess_raw = name
        self.group_raw = ''
        self.start = 0
        self.end = None
        self.title_raw = None
        for key, pattern in patterns:
            if key not in ('season', 'episode', 'episodeName', 'website'):
                pattern = r'\b%s\b' % pattern
            clean_name = re.sub('_', ' ', self.torrent['name'])
            match = re.findall(pattern, clean_name, re.IGNORECASE)
            if len(match) == 0:
                continue
            index = {}
            # With multiple matches, we will usually want to use the first match.
            # For 'year', we instead use the last instance of a year match since,
            # if a title includes a year, we don't want to use this for the year field.
            match_index = 0
            if key == 'year':
                match_index = -1
            if isinstance(match[match_index], tuple):
                match = list(match[match_index])
            if len(match) > 1:
                index['raw'] = 0
                index['clean'] = 0
                # for season we might have it in index 1 or index 2
                # i.e. "5x09"
                for i in range(1, len(match)):
                    if match[i]:
                        index['clean'] = i
                        break
            else:
                index['raw'] = 0
                index['clean'] = 0
            # patterns for multiseason/episode make the range, and only the range, appear in match[0]
            if (key == 'season' or key == 'episode') and index['clean'] == 0:
                # handle multi season/episode
                # i.e. S01-S09
                m = re.findall('[0-9]+', match[0])
                if m:
                    clean = list(range(int(m[0]), int(m[1])+1))
            elif key == 'language':
                # handle multi language
                m = re.split('{}+'.format(delimiters), match[0])
                clean = list(filter(None, m))
                if len(clean) == 1:
                    clean = clean[0]
            elif key in types.keys() and types[key] == 'boolean':
                clean = True
            else:
                clean = match[index['clean']]
                if key in types.keys() and types[key] == 'integer':
                    clean = int(clean)
            # Codec, quality and subtitles matches can interfere with group matching,
            # so we do this later as a special case.
            if key == 'group':
                if (re.search(self._get_pattern('codec'), clean, re.IGNORECASE) or
                    re.search(self._get_pattern('quality'), clean, re.IGNORECASE) or
                    re.search(self._get_pattern('subtitles'), clean, re.IGNORECASE)):
                    continue
            self._part(key, match, match[index['raw']], clean)
        # Start process for title
        raw = self.torrent['name']
        if self.end is not None:
            raw = raw[self.start:self.end].split('(')[0]
        clean = self._clean_string(raw)
        self._part('title', [], raw, clean)
        # Considerations for results that are known to cause issues, such
        # as media with years in them but without a release year.
        for exception in exceptions:
            incorrect_key, incorrect_value = exception['incorrect_parse']
            if self.parts['title'] == exception['parsed_title'] \
              and self.parts[incorrect_key] == incorrect_value:
                self.parts.pop(incorrect_key)
                self.parts['title'] = exception['actual_title']
        # Start process for end
        clean = re.sub('(^[-\. ()]+)|([-\. ]+$)', '', self.excess_raw)
        clean = re.sub('[\(\)\/]', ' ', clean)
        match = re.findall('((?:(?:[A-Za-z][a-z]+|[A-Za-z])(?:[\.\ \-\+\_]|$))+)', clean)
        if match:
            match = re.findall(episode_pattern + '[\.\_\-\s\+]*(' + re.escape(match[0]) + ')',
                               self.torrent['name'], re.IGNORECASE)
            if match:
                self._part('episodeName', match, match[0], self._clean_string(match[0]))
                clean = clean.replace(match[0], '')
        clean = re.sub('(^[-_\. ()]+)|([-\. ]+$)', '', clean)
        clean = re.sub('[\(\)\/]', ' ', clean)
        match = re.split('\.\.+| +', clean)
        if len(match) > 0 and isinstance(match[0], tuple):
            match = list(match[0])
        clean = filter(bool, match)
        clean = [item for item in filter(lambda a: a != '-', clean)]
        clean = [item.strip('-') for item in clean]
        if len(clean) != 0:
            group = clean.pop() + self.group_raw
            self._part('group', [], group, group)
        # clean group name from having a container name
        if 'group' in self.parts and 'container' in self.parts:
            group = self.parts['group']
            container = self.parts['container']
            if group.lower().endswith('.'+container.lower()):
                group = group[:-(len(container)+1)]
                self.parts['group'] = group
        # split group name and encoder, adding the latter to self.parts
        if 'group' in self.parts:
            group = self.parts['group']
            pat = '(\[(.*)\])'
            match = re.findall(pat, group, flags=re.IGNORECASE)
            if match:
                match = match[0]
                raw = match[0]
                if match:
                    self._part('encoder', match, raw, match[1])
                    self.parts['group'] = group.replace(raw, '')
                    if not self.parts['group'].strip():
                        self.parts.pop('group')
        if len(clean) != 0:
            if len(clean) == 1:
                clean = clean[0]  # Avoids making a list if it only has 1 element
            self._part('excess', [], self.excess_raw, clean)
        return self.parts
@@ -0,0 +1,86 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 delimiters = '[\.\s\-\+_\/]'
 langs = 'rus|(?:True)?fr(?:ench)?|e?n(?:g(?:lish)?)?|vost(' \
        '?:fr)?|ita(?:liano)?|castellano|swedish|spanish|dk|german|multi|nordic|exyu|chs|hindi|polish|mandarin'
 producers = 'ATVP|AMZN|NF|NICK|RED|DSNP'
 season_range_pattern = '(?:Complete' + delimiters + '*)?(?:' + delimiters + '*)?(?:s(?:easons?)?)?' + delimiters + '?(?:s?[0-9]{1,2}[\s]*(' \
                       '?:\-|(?:\s*to\s*))[\s]*s?[0-9]{1,2})(?:' + delimiters + '*Complete)?'
 # Used when matching episodeName in parse.py, when actually matching episodes we use a slightly
 # modified version that has a capture group on the episode number (as seen below).
 episode_pattern = '(?:(?:[ex]|ep)(?:[0-9]{1,2}(?:-(?:[ex]|ep)?(?:[0-9]{1,2})))|(?:[ex]|ep)(?:[0-9]{1,2}))'
 year_pattern = '(?:19[0-9]|20[0-2])[0-9]'
 month_pattern = '0[1-9]|1[0-2]'
 day_pattern = '[0-2][0-9]|3[01]'
 patterns = [
    ('season', delimiters + '(' # Season description can't be at the beginning, must be after this pattern
               '' + season_range_pattern + '|' # Describes season ranges
               '(?:Complete' + delimiters + ')?s([0-9]{1,2})(?:' + episode_pattern + ')?|'  # Describes season, optionally with complete or episode
               '([0-9]{1,2})x[0-9]{2}|'  # Describes 5x02, 12x15 type descriptions
               '(?:Complete' + delimiters + ')?Season[\. -]([0-9]{1,2})'  # Describes Season.15 type descriptions
               ')(?:' + delimiters + '|$)'),
    ('episode', '((?:[ex]|ep)(?:[0-9]{1,2}(?:-(?:[ex]|ep)?(?:[0-9]{1,2})))|(?:[ex]|ep)([0-9]{1,2}))(?:[^0-9]|$)'),
    ('year', '([\[\(]?(' + year_pattern + ')[\]\)]?)'),
    ('month', '(?:' + year_pattern + ')' + delimiters + '(' + month_pattern + ')' + delimiters + '(?:' + day_pattern + ')'),
    ('day', '(?:' + year_pattern + ')' + delimiters + '(?:' + month_pattern + ')' + delimiters + '(' + day_pattern + ')'),
    ('resolution', '([0-9]{3,4}p|1280x720)'),
    ('quality', ('((?:PPV\.)?[HP]DTV|(?:HD)?CAM-?(?:Rip)?|B[DR]Rip|(?:HD-?)?TS|'
                 'HDRip|HDTVRip|DVDRip|DVDRIP|'
                 '(?:(?:' + producers + ')' + delimiters + '?)?(?:PPV )?W[EB]B(?:-?DL(?:Mux)?)?(?:Rip| DVDRip)?|BluRay|DvDScr|hdtv|telesync)')),
    ('codec', '(xvid|[hx]\.?26[45])'),
    ('audio', ('(MP3|DD5\.?1|Dual[\- ]Audio|LiNE|DTS|DTS5\.1|'
               'AAC[ \.-]LC|AAC(?:(?:\.?2(?:\.0)?)?|(?:\.?5(?:\.1)?)?)|'
               '(?:E-?)?AC-?3(?:' + delimiters + '*?(?:2\.0|5\.1))?)')),
    ('region', 'R[0-9]'),
    ('extended', '(EXTENDED(:?.CUT)?)'),
    ('hardcoded', 'HC'),
    ('proper', 'PROPER'),
    ('repack', 'REPACK'),
    ('container', '(MKV|AVI|MP4)'),
    ('widescreen', 'WS'),
    ('website', '^(\[ ?([^\]]+?) ?\])'),
    ('subtitles', '((?:(?:' + langs + '|e-?)[\-\s.]*)*subs?)'),
    ('language', '((?:(?:' + langs + ')' + delimiters + '*)+)(?!(?:[\-\s.]*(?:' + langs + ')*)+[\-\s.]?subs)'),
    ('sbs', '(?:Half-)?SBS'),
    ('unrated', 'UNRATED'),
    ('size', '(\d+(?:\.\d+)?(?:GB|MB))'),
    ('bitDepth', '(?:8|10)bit'),
    ('3d', '3D'),
    ('internal', 'iNTERNAL'),
    ('readnfo', 'READNFO')
 ]
 types = {
    'season': 'integer',
    'episode': 'integer',
    'year': 'integer',
    'month': 'integer',
    'day': 'integer',
    'extended': 'boolean',
    'hardcoded': 'boolean',
    'proper': 'boolean',
    'repack': 'boolean',
    'widescreen': 'boolean',
    'unrated': 'boolean',
    '3d': 'boolean',
    'internal': 'boolean',
    'readnfo': 'boolean'
 }
 exceptions = [
    {
        'parsed_title': '',
        'incorrect_parse': ('year', 1983),
        'actual_title': '1983'
     },
    {
        'parsed_title': 'Marvel\'s Agents of S H I E L D',
        'incorrect_parse': ('title', 'Marvel\'s Agents of S H I E L D'),
        'actual_title': 'Marvel\'s Agents of S.H.I.E.L.D.'
    }
 ]
@@ -1,25 +0,0 @@
 # -*- coding: utf-8 -*-
 #
 # Copyright (c) 2013 the BabelFish authors. All rights reserved.
 # Use of this source code is governed by the 3-clause BSD license
 # that can be found in the LICENSE file.
 #
 __title__ = 'babelfish'
 __version__ = '0.5.5-dev'
 __author__ = 'Antoine Bertin'
 __license__ = 'BSD'
 __copyright__ = 'Copyright 2015 the BabelFish authors'
 import sys
 if sys.version_info[0] >= 3:
    basestr = str
 else:
    basestr = basestring
 from .converters import (LanguageConverter, LanguageReverseConverter, LanguageEquivalenceConverter, CountryConverter,
    CountryReverseConverter)
 from .country import country_converters, COUNTRIES, COUNTRY_MATRIX, Country
 from .exceptions import Error, LanguageConvertError, LanguageReverseError, CountryConvertError, CountryReverseError
 from .language import language_converters, LANGUAGES, LANGUAGE_MATRIX, Language
 from .script import SCRIPTS, SCRIPT_MATRIX, Script
@@ -1,287 +0,0 @@
 # Copyright (c) 2013 the BabelFish authors. All rights reserved.
 # Use of this source code is governed by the 3-clause BSD license
 # that can be found in the LICENSE file.
 #
 import collections
 from pkg_resources import iter_entry_points, EntryPoint
 from ..exceptions import LanguageConvertError, LanguageReverseError
 # from https://github.com/kennethreitz/requests/blob/master/requests/structures.py
 class CaseInsensitiveDict(collections.MutableMapping):
    """A case-insensitive ``dict``-like object.
    Implements all methods and operations of
    ``collections.MutableMapping`` as well as dict's ``copy``. Also
    provides ``lower_items``.
    All keys are expected to be strings. The structure remembers the
    case of the last key to be set, and ``iter(instance)``,
    ``keys()``, ``items()``, ``iterkeys()``, and ``iteritems()``
    will contain case-sensitive keys. However, querying and contains
    testing is case insensitive:
        cid = CaseInsensitiveDict()
        cid['English'] = 'eng'
        cid['ENGLISH'] == 'eng'  # True
        list(cid) == ['English']  # True
    If the constructor, ``.update``, or equality comparison
    operations are given keys that have equal ``.lower()``s, the
    behavior is undefined.
    """
    def __init__(self, data=None, **kwargs):
        self._store = dict()
        if data is None:
            data = {}
        self.update(data, **kwargs)
    def __setitem__(self, key, value):
        # Use the lowercased key for lookups, but store the actual
        # key alongside the value.
        self._store[key.lower()] = (key, value)
    def __getitem__(self, key):
        return self._store[key.lower()][1]
    def __delitem__(self, key):
        del self._store[key.lower()]
    def __iter__(self):
        return (casedkey for casedkey, mappedvalue in self._store.values())
    def __len__(self):
        return len(self._store)
    def lower_items(self):
        """Like iteritems(), but with all lowercase keys."""
        return (
            (lowerkey, keyval[1])
            for (lowerkey, keyval)
            in self._store.items()
        )
    def __eq__(self, other):
        if isinstance(other, collections.Mapping):
            other = CaseInsensitiveDict(other)
        else:
            return NotImplemented
        # Compare insensitively
        return dict(self.lower_items()) == dict(other.lower_items())
    # Copy is required
    def copy(self):
        return CaseInsensitiveDict(self._store.values())
    def __repr__(self):
        return '%s(%r)' % (self.__class__.__name__, dict(self.items()))
 class LanguageConverter(object):
    """A :class:`LanguageConverter` supports converting an alpha3 language code with an
    alpha2 country code and a script code into a custom code
    .. attribute:: codes
        Set of possible custom codes
    """
    def convert(self, alpha3, country=None, script=None):
        """Convert an alpha3 language code with an alpha2 country code and a script code
        into a custom code
        :param string alpha3: ISO-639-3 language code
        :param country: ISO-3166 country code, if any
        :type country: string or None
        :param script: ISO-15924 script code, if any
        :type script: string or None
        :return: the corresponding custom code
        :rtype: string
        :raise: :class:`~babelfish.exceptions.LanguageConvertError`
        """
        raise NotImplementedError
 class LanguageReverseConverter(LanguageConverter):
    """A :class:`LanguageConverter` able to reverse a custom code into a alpha3
    ISO-639-3 language code, alpha2 ISO-3166-1 country code and ISO-15924 script code
    """
    def reverse(self, code):
        """Reverse a custom code into alpha3, country and script code
        :param string code: custom code to reverse
        :return: the corresponding alpha3 ISO-639-3 language code, alpha2 ISO-3166-1 country code and ISO-15924 script code
        :rtype: tuple
        :raise: :class:`~babelfish.exceptions.LanguageReverseError`
        """
        raise NotImplementedError
 class LanguageEquivalenceConverter(LanguageReverseConverter):
    """A :class:`LanguageEquivalenceConverter` is a utility class that allows you to easily define a
    :class:`LanguageReverseConverter` by only specifying the dict from alpha3 to their corresponding symbols.
    You must specify the dict of equivalence as a class variable named SYMBOLS.
    If you also set the class variable CASE_SENSITIVE to ``True`` then the reverse conversion function will be
    case-sensitive (it is case-insensitive by default).
    Example::
        class MyCodeConverter(babelfish.LanguageEquivalenceConverter):
            CASE_SENSITIVE = True
            SYMBOLS = {'fra': 'mycode1', 'eng': 'mycode2'}
    """
    CASE_SENSITIVE = False
    def __init__(self):
        self.codes = set()
        self.to_symbol = {}
        if self.CASE_SENSITIVE:
            self.from_symbol = {}
        else:
            self.from_symbol = CaseInsensitiveDict()
        for alpha3, symbol in self.SYMBOLS.items():
            self.to_symbol[alpha3] = symbol
            self.from_symbol[symbol] = (alpha3, None, None)
            self.codes.add(symbol)
    def convert(self, alpha3, country=None, script=None):
        try:
            return self.to_symbol[alpha3]
        except KeyError:
            raise LanguageConvertError(alpha3, country, script)
    def reverse(self, code):
        try:
            return self.from_symbol[code]
        except KeyError:
            raise LanguageReverseError(code)
 class CountryConverter(object):
    """A :class:`CountryConverter` supports converting an alpha2 country code
    into a custom code
    .. attribute:: codes
        Set of possible custom codes
    """
    def convert(self, alpha2):
        """Convert an alpha2 country code into a custom code
        :param string alpha2: ISO-3166-1 language code
        :return: the corresponding custom code
        :rtype: string
        :raise: :class:`~babelfish.exceptions.CountryConvertError`
        """
        raise NotImplementedError
 class CountryReverseConverter(CountryConverter):
    """A :class:`CountryConverter` able to reverse a custom code into a alpha2
    ISO-3166-1 country code
    """
    def reverse(self, code):
        """Reverse a custom code into alpha2 code
        :param string code: custom code to reverse
        :return: the corresponding alpha2 ISO-3166-1 country code
        :rtype: string
        :raise: :class:`~babelfish.exceptions.CountryReverseError`
        """
        raise NotImplementedError
 class ConverterManager(object):
    """Manager for babelfish converters behaving like a dict with lazy loading
    Loading is done in this order:
    * Entry point converters
    * Registered converters
    * Internal converters
    .. attribute:: entry_point
        The entry point where to look for converters
    .. attribute:: internal_converters
        Internal converters with entry point syntax
    """
    entry_point = ''
    internal_converters = []
    def __init__(self):
        #: Registered converters with entry point syntax
        self.registered_converters = []
        #: Loaded converters
        self.converters = {}
    def __getitem__(self, name):
        """Get a converter, lazy loading it if necessary"""
        if name in self.converters:
            return self.converters[name]
        for ep in iter_entry_points(self.entry_point):
            if ep.name == name:
                self.converters[ep.name] = ep.load()()
                return self.converters[ep.name]
        for ep in (EntryPoint.parse(c) for c in self.registered_converters + self.internal_converters):
            if ep.name == name:
                # `require` argument of ep.load() is deprecated in newer versions of setuptools
                if hasattr(ep, 'resolve'):
                    plugin = ep.resolve()
                elif hasattr(ep, '_load'):
                    plugin = ep._load()
                else:
                    plugin = ep.load(require=False)
                self.converters[ep.name] = plugin()
                return self.converters[ep.name]
        raise KeyError(name)
    def __setitem__(self, name, converter):
        """Load a converter"""
        self.converters[name] = converter
    def __delitem__(self, name):
        """Unload a converter"""
        del self.converters[name]
    def __iter__(self):
        """Iterator over loaded converters"""
        return iter(self.converters)
    def register(self, entry_point):
        """Register a converter
        :param string entry_point: converter to register (entry point syntax)
        :raise: ValueError if already registered
        """
        if entry_point in self.registered_converters:
            raise ValueError('Already registered')
        self.registered_converters.insert(0, entry_point)
    def unregister(self, entry_point):
        """Unregister a converter
        :param string entry_point: converter to unregister (entry point syntax)
        """
        self.registered_converters.remove(entry_point)
    def __contains__(self, name):
        return name in self.converters
@@ -1,17 +0,0 @@
 # -*- coding: utf-8 -*-
 #
 # Copyright (c) 2013 the BabelFish authors. All rights reserved.
 # Use of this source code is governed by the 3-clause BSD license
 # that can be found in the LICENSE file.
 #
 from __future__ import unicode_literals
 from . import LanguageEquivalenceConverter
 from ..language import LANGUAGE_MATRIX
 class Alpha2Converter(LanguageEquivalenceConverter):
    CASE_SENSITIVE = True
    SYMBOLS = {}
    for iso_language in LANGUAGE_MATRIX:
        if iso_language.alpha2:
            SYMBOLS[iso_language.alpha3] = iso_language.alpha2
@@ -1,17 +0,0 @@
 # -*- coding: utf-8 -*-
 #
 # Copyright (c) 2013 the BabelFish authors. All rights reserved.
 # Use of this source code is governed by the 3-clause BSD license
 # that can be found in the LICENSE file.
 #
 from __future__ import unicode_literals
 from . import LanguageEquivalenceConverter
 from ..language import LANGUAGE_MATRIX
 class Alpha3BConverter(LanguageEquivalenceConverter):
    CASE_SENSITIVE = True
    SYMBOLS = {}
    for iso_language in LANGUAGE_MATRIX:
        if iso_language.alpha3b:
            SYMBOLS[iso_language.alpha3] = iso_language.alpha3b
@@ -1,17 +0,0 @@
 # -*- coding: utf-8 -*-
 #
 # Copyright (c) 2013 the BabelFish authors. All rights reserved.
 # Use of this source code is governed by the 3-clause BSD license
 # that can be found in the LICENSE file.
 #
 from __future__ import unicode_literals
 from . import LanguageEquivalenceConverter
 from ..language import LANGUAGE_MATRIX
 class Alpha3TConverter(LanguageEquivalenceConverter):
    CASE_SENSITIVE = True
    SYMBOLS = {}
    for iso_language in LANGUAGE_MATRIX:
        if iso_language.alpha3t:
            SYMBOLS[iso_language.alpha3] = iso_language.alpha3t
@@ -1,31 +0,0 @@
 # -*- coding: utf-8 -*-
 #
 # Copyright (c) 2013 the BabelFish authors. All rights reserved.
 # Use of this source code is governed by the 3-clause BSD license
 # that can be found in the LICENSE file.
 #
 from __future__ import unicode_literals
 from . import CountryReverseConverter, CaseInsensitiveDict
 from ..country import COUNTRY_MATRIX
 from ..exceptions import CountryConvertError, CountryReverseError
 class CountryNameConverter(CountryReverseConverter):
    def __init__(self):
        self.codes = set()
        self.to_name = {}
        self.from_name = CaseInsensitiveDict()
        for country in COUNTRY_MATRIX:
            self.codes.add(country.name)
            self.to_name[country.alpha2] = country.name
            self.from_name[country.name] = country.alpha2
    def convert(self, alpha2):
        if alpha2 not in self.to_name:
            raise CountryConvertError(alpha2)
        return self.to_name[alpha2]
    def reverse(self, name):
        if name not in self.from_name:
            raise CountryReverseError(name)
        return self.from_name[name]
@@ -1,17 +0,0 @@
 # -*- coding: utf-8 -*-
 #
 # Copyright (c) 2013 the BabelFish authors. All rights reserved.
 # Use of this source code is governed by the 3-clause BSD license
 # that can be found in the LICENSE file.
 #
 from __future__ import unicode_literals
 from . import LanguageEquivalenceConverter
 from ..language import LANGUAGE_MATRIX
 class NameConverter(LanguageEquivalenceConverter):
    CASE_SENSITIVE = False
    SYMBOLS = {}
    for iso_language in LANGUAGE_MATRIX:
        if iso_language.name:
            SYMBOLS[iso_language.alpha3] = iso_language.name
@@ -1,36 +0,0 @@
 # -*- coding: utf-8 -*-
 #
 # Copyright (c) 2013 the BabelFish authors. All rights reserved.
 # Use of this source code is governed by the 3-clause BSD license
 # that can be found in the LICENSE file.
 #
 from __future__ import unicode_literals
 from . import LanguageReverseConverter, CaseInsensitiveDict
 from ..exceptions import LanguageReverseError
 from ..language import language_converters
 class OpenSubtitlesConverter(LanguageReverseConverter):
    def __init__(self):
        self.alpha3b_converter = language_converters['alpha3b']
        self.alpha2_converter = language_converters['alpha2']
        self.to_opensubtitles = {('por', 'BR'): 'pob', ('gre', None): 'ell', ('srp', None): 'scc', ('srp', 'ME'): 'mne'}
        self.from_opensubtitles = CaseInsensitiveDict({'pob': ('por', 'BR'), 'pb': ('por', 'BR'), 'ell': ('ell', None),
                                                       'scc': ('srp', None), 'mne': ('srp', 'ME')})
        self.codes = (self.alpha2_converter.codes | self.alpha3b_converter.codes | set(self.from_opensubtitles.keys()))
    def convert(self, alpha3, country=None, script=None):
        alpha3b = self.alpha3b_converter.convert(alpha3, country, script)
        if (alpha3b, country) in self.to_opensubtitles:
            return self.to_opensubtitles[(alpha3b, country)]
        return alpha3b
    def reverse(self, opensubtitles):
        if opensubtitles in self.from_opensubtitles:
            return self.from_opensubtitles[opensubtitles]
        for conv in [self.alpha3b_converter, self.alpha2_converter]:
            try:
                return conv.reverse(opensubtitles)
            except LanguageReverseError:
                pass
        raise LanguageReverseError(opensubtitles)
@@ -1,23 +0,0 @@
 # -*- coding: utf-8 -*-
 #
 # Copyright (c) 2013 the BabelFish authors. All rights reserved.
 # Use of this source code is governed by the 3-clause BSD license
 # that can be found in the LICENSE file.
 #
 from __future__ import unicode_literals
 from . import LanguageConverter
 from ..exceptions import LanguageConvertError
 from ..language import LANGUAGE_MATRIX
 class ScopeConverter(LanguageConverter):
    FULLNAME = {'I': 'individual', 'M': 'macrolanguage', 'S': 'special'}
    SYMBOLS = {}
    for iso_language in LANGUAGE_MATRIX:
        SYMBOLS[iso_language.alpha3] = iso_language.scope
    codes = set(SYMBOLS.values())
    def convert(self, alpha3, country=None, script=None):
        if self.SYMBOLS[alpha3] in self.FULLNAME:
            return self.FULLNAME[self.SYMBOLS[alpha3]]
        raise LanguageConvertError(alpha3, country, script)
@@ -1,23 +0,0 @@
 # -*- coding: utf-8 -*-
 #
 # Copyright (c) 2013 the BabelFish authors. All rights reserved.
 # Use of this source code is governed by the 3-clause BSD license
 # that can be found in the LICENSE file.
 #
 from __future__ import unicode_literals
 from . import LanguageConverter
 from ..exceptions import LanguageConvertError
 from ..language import LANGUAGE_MATRIX
 class LanguageTypeConverter(LanguageConverter):
    FULLNAME = {'A': 'ancient', 'C': 'constructed', 'E': 'extinct', 'H': 'historical', 'L': 'living', 'S': 'special'}
    SYMBOLS = {}
    for iso_language in LANGUAGE_MATRIX:
        SYMBOLS[iso_language.alpha3] = iso_language.type
    codes = set(SYMBOLS.values())
    def convert(self, alpha3, country=None, script=None):
        if self.SYMBOLS[alpha3] in self.FULLNAME:
            return self.FULLNAME[self.SYMBOLS[alpha3]]
        raise LanguageConvertError(alpha3, country, script)
@@ -1,107 +0,0 @@
 # -*- coding: utf-8 -*-
 #
 # Copyright (c) 2013 the BabelFish authors. All rights reserved.
 # Use of this source code is governed by the 3-clause BSD license
 # that can be found in the LICENSE file.
 #
 from __future__ import unicode_literals
 from collections import namedtuple
 from functools import partial
 from pkg_resources import resource_stream  # @UnresolvedImport
 from .converters import ConverterManager
 from . import basestr
 COUNTRIES = {}
 COUNTRY_MATRIX = []
 #: The namedtuple used in the :data:`COUNTRY_MATRIX`
 IsoCountry = namedtuple('IsoCountry', ['name', 'alpha2'])
 f = resource_stream('babelfish', 'data/iso-3166-1.txt')
 f.readline()
 for l in f:
    iso_country = IsoCountry(*l.decode('utf-8').strip().split(';'))
    COUNTRIES[iso_country.alpha2] = iso_country.name
    COUNTRY_MATRIX.append(iso_country)
 f.close()
 class CountryConverterManager(ConverterManager):
    """:class:`~babelfish.converters.ConverterManager` for country converters"""
    entry_point = 'babelfish.country_converters'
    internal_converters = ['name = babelfish.converters.countryname:CountryNameConverter']
 country_converters = CountryConverterManager()
 class CountryMeta(type):
    """The :class:`Country` metaclass
    Dynamically redirect :meth:`Country.frommycode` to :meth:`Country.fromcode` with the ``mycode`` `converter`
    """
    def __getattr__(cls, name):
        if name.startswith('from'):
            return partial(cls.fromcode, converter=name[4:])
        return type.__getattribute__(cls, name)
 class Country(CountryMeta(str('CountryBase'), (object,), {})):
    """A country on Earth
    A country is represented by a 2-letter code from the ISO-3166 standard
    :param string country: 2-letter ISO-3166 country code
    """
    def __init__(self, country):
        if country not in COUNTRIES:
            raise ValueError('%r is not a valid country' % country)
        #: ISO-3166 2-letter country code
        self.alpha2 = country
    @classmethod
    def fromcode(cls, code, converter):
        """Create a :class:`Country` by its `code` using `converter` to
        :meth:`~babelfish.converters.CountryReverseConverter.reverse` it
        :param string code: the code to reverse
        :param string converter: name of the :class:`~babelfish.converters.CountryReverseConverter` to use
        :return: the corresponding :class:`Country` instance
        :rtype: :class:`Country`
        """
        return cls(country_converters[converter].reverse(code))
    def __getstate__(self):
        return self.alpha2
    def __setstate__(self, state):
        self.alpha2 = state
    def __getattr__(self, name):
        try:
            return country_converters[name].convert(self.alpha2)
        except KeyError:
            raise AttributeError(name)
    def __hash__(self):
        return hash(self.alpha2)
    def __eq__(self, other):
        if isinstance(other, basestr):
            return str(self) == other
        if not isinstance(other, Country):
            return False
        return self.alpha2 == other.alpha2
    def __ne__(self, other):
        return not self == other
    def __repr__(self):
        return '<Country [%s]>' % self
    def __str__(self):
        return self.alpha2
@@ -1,45 +0,0 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 #
 # Copyright (c) 2013 the BabelFish authors. All rights reserved.
 # Use of this source code is governed by the 3-clause BSD license
 # that can be found in the LICENSE file.
 #
 from __future__ import unicode_literals
 import os.path
 import tempfile
 import zipfile
 import requests
 DATA_DIR = os.path.dirname(__file__)
 # iso-3166-1.txt
 print('Downloading ISO-3166-1 standard (ISO country codes)...')
 with open(os.path.join(DATA_DIR, 'iso-3166-1.txt'), 'w') as f:
    r = requests.get('http://www.iso.org/iso/home/standards/country_codes/country_names_and_code_elements_txt.htm')
    f.write(r.content.strip())
 # iso-639-3.tab
 print('Downloading ISO-639-3 standard (ISO language codes)...')
 with tempfile.TemporaryFile() as f:
    r = requests.get('http://www-01.sil.org/iso639-3/iso-639-3_Code_Tables_20130531.zip')
    f.write(r.content)
    with zipfile.ZipFile(f) as z:
        z.extract('iso-639-3.tab', DATA_DIR)
 # iso-15924
 print('Downloading ISO-15924 standard (ISO script codes)...')
 with tempfile.TemporaryFile() as f:
    r = requests.get('http://www.unicode.org/iso15924/iso15924.txt.zip')
    f.write(r.content)
    with zipfile.ZipFile(f) as z:
        z.extract('iso15924-utf8-20131012.txt', DATA_DIR)
 # opensubtitles supported languages
 print('Downloading OpenSubtitles supported languages...')
 with open(os.path.join(DATA_DIR, 'opensubtitles_languages.txt'), 'w') as f:
    r = requests.get('http://www.opensubtitles.org/addons/export_languages.php')
    f.write(r.content)
 print('Done!')
@@ -1,250 +0,0 @@
 Country Name;ISO 3166-1-alpha-2 code
 AFGHANISTAN;AF
 ÅLAND ISLANDS;AX
 ALBANIA;AL
 ALGERIA;DZ
 AMERICAN SAMOA;AS
 ANDORRA;AD
 ANGOLA;AO
 ANGUILLA;AI
 ANTARCTICA;AQ
 ANTIGUA AND BARBUDA;AG
 ARGENTINA;AR
 ARMENIA;AM
 ARUBA;AW
 AUSTRALIA;AU
 AUSTRIA;AT
 AZERBAIJAN;AZ
 BAHAMAS;BS
 BAHRAIN;BH
 BANGLADESH;BD
 BARBADOS;BB
 BELARUS;BY
 BELGIUM;BE
 BELIZE;BZ
 BENIN;BJ
 BERMUDA;BM
 BHUTAN;BT
 BOLIVIA, PLURINATIONAL STATE OF;BO
 BONAIRE, SINT EUSTATIUS AND SABA;BQ
 BOSNIA AND HERZEGOVINA;BA
 BOTSWANA;BW
 BOUVET ISLAND;BV
 BRAZIL;BR
 BRITISH INDIAN OCEAN TERRITORY;IO
 BRUNEI DARUSSALAM;BN
 BULGARIA;BG
 BURKINA FASO;BF
 BURUNDI;BI
 CAMBODIA;KH
 CAMEROON;CM
 CANADA;CA
 CAPE VERDE;CV
 CAYMAN ISLANDS;KY
 CENTRAL AFRICAN REPUBLIC;CF
 CHAD;TD
 CHILE;CL
 CHINA;CN
 CHRISTMAS ISLAND;CX
 COCOS (KEELING) ISLANDS;CC
 COLOMBIA;CO
 COMOROS;KM
 CONGO;CG
 CONGO, THE DEMOCRATIC REPUBLIC OF THE;CD
 COOK ISLANDS;CK
 COSTA RICA;CR
 CÔTE D'IVOIRE;CI
 CROATIA;HR
 CUBA;CU
 CURAÇAO;CW
 CYPRUS;CY
 CZECH REPUBLIC;CZ
 DENMARK;DK
 DJIBOUTI;DJ
 DOMINICA;DM
 DOMINICAN REPUBLIC;DO
 ECUADOR;EC
 EGYPT;EG
 EL SALVADOR;SV
 EQUATORIAL GUINEA;GQ
 ERITREA;ER
 ESTONIA;EE
 ETHIOPIA;ET
 FALKLAND ISLANDS (MALVINAS);FK
 FAROE ISLANDS;FO
 FIJI;FJ
 FINLAND;FI
 FRANCE;FR
 FRENCH GUIANA;GF
 FRENCH POLYNESIA;PF
 FRENCH SOUTHERN TERRITORIES;TF
 GABON;GA
 GAMBIA;GM
 GEORGIA;GE
 GERMANY;DE
 GHANA;GH
 GIBRALTAR;GI
 GREECE;GR
 GREENLAND;GL
 GRENADA;GD
 GUADELOUPE;GP
 GUAM;GU
 GUATEMALA;GT
 GUERNSEY;GG
 GUINEA;GN
 GUINEA-BISSAU;GW
 GUYANA;GY
 HAITI;HT
 HEARD ISLAND AND MCDONALD ISLANDS;HM
 HOLY SEE (VATICAN CITY STATE);VA
 HONDURAS;HN
 HONG KONG;HK
 HUNGARY;HU
 ICELAND;IS
 INDIA;IN
 INDONESIA;ID
 IRAN, ISLAMIC REPUBLIC OF;IR
 IRAQ;IQ
 IRELAND;IE
 ISLE OF MAN;IM
 ISRAEL;IL
 ITALY;IT
 JAMAICA;JM
 JAPAN;JP
 JERSEY;JE
 JORDAN;JO
 KAZAKHSTAN;KZ
 KENYA;KE
 KIRIBATI;KI
 KOREA, DEMOCRATIC PEOPLE'S REPUBLIC OF;KP
 KOREA, REPUBLIC OF;KR
 KUWAIT;KW
 KYRGYZSTAN;KG
 LAO PEOPLE'S DEMOCRATIC REPUBLIC;LA
 LATVIA;LV
 LEBANON;LB
 LESOTHO;LS
 LIBERIA;LR
 LIBYA;LY
 LIECHTENSTEIN;LI
 LITHUANIA;LT
 LUXEMBOURG;LU
 MACAO;MO
 MACEDONIA, THE FORMER YUGOSLAV REPUBLIC OF;MK
 MADAGASCAR;MG
 MALAWI;MW
 MALAYSIA;MY
 MALDIVES;MV
 MALI;ML
 MALTA;MT
 MARSHALL ISLANDS;MH
 MARTINIQUE;MQ
 MAURITANIA;MR
 MAURITIUS;MU
 MAYOTTE;YT
 MEXICO;MX
 MICRONESIA, FEDERATED STATES OF;FM
 MOLDOVA, REPUBLIC OF;MD
 MONACO;MC
 MONGOLIA;MN
 MONTENEGRO;ME
 MONTSERRAT;MS
 MOROCCO;MA
 MOZAMBIQUE;MZ
 MYANMAR;MM
 NAMIBIA;NA
 NAURU;NR
 NEPAL;NP
 NETHERLANDS;NL
 NEW CALEDONIA;NC
 NEW ZEALAND;NZ
 NICARAGUA;NI
 NIGER;NE
 NIGERIA;NG
 NIUE;NU
 NORFOLK ISLAND;NF
 NORTHERN MARIANA ISLANDS;MP
 NORWAY;NO
 OMAN;OM
 PAKISTAN;PK
 PALAU;PW
 PALESTINE, STATE OF;PS
 PANAMA;PA
 PAPUA NEW GUINEA;PG
 PARAGUAY;PY
 PERU;PE
 PHILIPPINES;PH
 PITCAIRN;PN
 POLAND;PL
 PORTUGAL;PT
 PUERTO RICO;PR
 QATAR;QA
 RÉUNION;RE
 ROMANIA;RO
 RUSSIAN FEDERATION;RU
 RWANDA;RW
 SAINT BARTHÉLEMY;BL
 SAINT HELENA, ASCENSION AND TRISTAN DA CUNHA;SH
 SAINT KITTS AND NEVIS;KN
 SAINT LUCIA;LC
 SAINT MARTIN (FRENCH PART);MF
 SAINT PIERRE AND MIQUELON;PM
 SAINT VINCENT AND THE GRENADINES;VC
 SAMOA;WS
 SAN MARINO;SM
 SAO TOME AND PRINCIPE;ST
 SAUDI ARABIA;SA
 SENEGAL;SN
 SERBIA;RS
 SEYCHELLES;SC
 SIERRA LEONE;SL
 SINGAPORE;SG
 SINT MAARTEN (DUTCH PART);SX
 SLOVAKIA;SK
 SLOVENIA;SI
 SOLOMON ISLANDS;SB
 SOMALIA;SO
 SOUTH AFRICA;ZA
 SOUTH GEORGIA AND THE SOUTH SANDWICH ISLANDS;GS
 SOUTH SUDAN;SS
 SPAIN;ES
 SRI LANKA;LK
 SUDAN;SD
 SURINAME;SR
 SVALBARD AND JAN MAYEN;SJ
 SWAZILAND;SZ
 SWEDEN;SE
 SWITZERLAND;CH
 SYRIAN ARAB REPUBLIC;SY
 TAIWAN, PROVINCE OF CHINA;TW
 TAJIKISTAN;TJ
 TANZANIA, UNITED REPUBLIC OF;TZ
 THAILAND;TH
 TIMOR-LESTE;TL
 TOGO;TG
 TOKELAU;TK
 TONGA;TO
 TRINIDAD AND TOBAGO;TT
 TUNISIA;TN
 TURKEY;TR
 TURKMENISTAN;TM
 TURKS AND CAICOS ISLANDS;TC
 TUVALU;TV
 UGANDA;UG
 UKRAINE;UA
 UNITED ARAB EMIRATES;AE
 UNITED KINGDOM;GB
 UNITED STATES;US
 UNITED STATES MINOR OUTLYING ISLANDS;UM
 URUGUAY;UY
 UZBEKISTAN;UZ
 VANUATU;VU
 VENEZUELA, BOLIVARIAN REPUBLIC OF;VE
 VIET NAM;VN
 VIRGIN ISLANDS, BRITISH;VG
 VIRGIN ISLANDS, U.S.;VI
 WALLIS AND FUTUNA;WF
 WESTERN SAHARA;EH
 YEMEN;YE
 ZAMBIA;ZM
 ZIMBABWE;ZW
@@ -1,176 +0,0 @@
 #
 # ISO 15924 - Codes for the representation of names of scripts
 #             Codes pour la représentation des noms d’écritures
 # Format: 
 #             Code;N°;English Name;Nom français;PVA;Date
 #
 Afak;439;Afaka;afaka;;2010-12-21
 Aghb;239;Caucasian Albanian;aghbanien;;2012-10-16
 Ahom;338;Ahom, Tai Ahom;âhom;;2012-11-01
 Arab;160;Arabic;arabe;Arabic;2004-05-01
 Armi;124;Imperial Aramaic;araméen impérial;Imperial_Aramaic;2009-06-01
 Armn;230;Armenian;arménien;Armenian;2004-05-01
 Avst;134;Avestan;avestique;Avestan;2009-06-01
 Bali;360;Balinese;balinais;Balinese;2006-10-10
 Bamu;435;Bamum;bamoum;Bamum;2009-06-01
 Bass;259;Bassa Vah;bassa;;2010-03-26
 Batk;365;Batak;batik;Batak;2010-07-23
 Beng;325;Bengali;bengalî;Bengali;2004-05-01
 Blis;550;Blissymbols;symboles Bliss;;2004-05-01
 Bopo;285;Bopomofo;bopomofo;Bopomofo;2004-05-01
 Brah;300;Brahmi;brahma;Brahmi;2010-07-23
 Brai;570;Braille;braille;Braille;2004-05-01
 Bugi;367;Buginese;bouguis;Buginese;2006-06-21
 Buhd;372;Buhid;bouhide;Buhid;2004-05-01
 Cakm;349;Chakma;chakma;Chakma;2012-02-06
 Cans;440;Unified Canadian Aboriginal Syllabics;syllabaire autochtone canadien unifié;Canadian_Aboriginal;2004-05-29
 Cari;201;Carian;carien;Carian;2007-07-02
 Cham;358;Cham;cham (čam, tcham);Cham;2009-11-11
 Cher;445;Cherokee;tchérokî;Cherokee;2004-05-01
 Cirt;291;Cirth;cirth;;2004-05-01
 Copt;204;Coptic;copte;Coptic;2006-06-21
 Cprt;403;Cypriot;syllabaire chypriote;Cypriot;2004-05-01
 Cyrl;220;Cyrillic;cyrillique;Cyrillic;2004-05-01
 Cyrs;221;Cyrillic (Old Church Slavonic variant);cyrillique (variante slavonne);;2004-05-01
 Deva;315;Devanagari (Nagari);dévanâgarî;Devanagari;2004-05-01
 Dsrt;250;Deseret (Mormon);déseret (mormon);Deseret;2004-05-01
 Dupl;755;Duployan shorthand, Duployan stenography;sténographie Duployé;;2010-07-18
 Egyd;070;Egyptian demotic;démotique égyptien;;2004-05-01
 Egyh;060;Egyptian hieratic;hiératique égyptien;;2004-05-01
 Egyp;050;Egyptian hieroglyphs;hiéroglyphes égyptiens;Egyptian_Hieroglyphs;2009-06-01
 Elba;226;Elbasan;elbasan;;2010-07-18
 Ethi;430;Ethiopic (Geʻez);éthiopien (geʻez, guèze);Ethiopic;2004-10-25
 Geor;240;Georgian (Mkhedruli);géorgien (mkhédrouli);Georgian;2004-05-29
 Geok;241;Khutsuri (Asomtavruli and Nuskhuri);khoutsouri (assomtavrouli et nouskhouri);Georgian;2012-10-16
 Glag;225;Glagolitic;glagolitique;Glagolitic;2006-06-21
 Goth;206;Gothic;gotique;Gothic;2004-05-01
 Gran;343;Grantha;grantha;;2009-11-11
 Grek;200;Greek;grec;Greek;2004-05-01
 Gujr;320;Gujarati;goudjarâtî (gujrâtî);Gujarati;2004-05-01
 Guru;310;Gurmukhi;gourmoukhî;Gurmukhi;2004-05-01
 Hang;286;Hangul (Hangŭl, Hangeul);hangûl (hangŭl, hangeul);Hangul;2004-05-29
 Hani;500;Han (Hanzi, Kanji, Hanja);idéogrammes han (sinogrammes);Han;2009-02-23
 Hano;371;Hanunoo (Hanunóo);hanounóo;Hanunoo;2004-05-29
 Hans;501;Han (Simplified variant);idéogrammes han (variante simplifiée);;2004-05-29
 Hant;502;Han (Traditional variant);idéogrammes han (variante traditionnelle);;2004-05-29
 Hatr;127;Hatran;hatrénien;;2012-11-01
 Hebr;125;Hebrew;hébreu;Hebrew;2004-05-01
 Hira;410;Hiragana;hiragana;Hiragana;2004-05-01
 Hluw;080;Anatolian Hieroglyphs (Luwian Hieroglyphs, Hittite Hieroglyphs);hiéroglyphes anatoliens (hiéroglyphes louvites, hiéroglyphes hittites);;2011-12-09
 Hmng;450;Pahawh Hmong;pahawh hmong;;2004-05-01
 Hrkt;412;Japanese syllabaries (alias for Hiragana + Katakana);syllabaires japonais (alias pour hiragana + katakana);Katakana_Or_Hiragana;2011-06-21
 Hung;176;Old Hungarian (Hungarian Runic);runes hongroises (ancien hongrois);;2012-10-16
 Inds;610;Indus (Harappan);indus;;2004-05-01
 Ital;210;Old Italic (Etruscan, Oscan, etc.);ancien italique (étrusque, osque, etc.);Old_Italic;2004-05-29
 Java;361;Javanese;javanais;Javanese;2009-06-01
 Jpan;413;Japanese (alias for Han + Hiragana + Katakana);japonais (alias pour han + hiragana + katakana);;2006-06-21
 Jurc;510;Jurchen;jurchen;;2010-12-21
 Kali;357;Kayah Li;kayah li;Kayah_Li;2007-07-02
 Kana;411;Katakana;katakana;Katakana;2004-05-01
 Khar;305;Kharoshthi;kharochthî;Kharoshthi;2006-06-21
 Khmr;355;Khmer;khmer;Khmer;2004-05-29
 Khoj;322;Khojki;khojkî;;2011-06-21
 Knda;345;Kannada;kannara (canara);Kannada;2004-05-29
 Kore;287;Korean (alias for Hangul + Han);coréen (alias pour hangûl + han);;2007-06-13
 Kpel;436;Kpelle;kpèllé;;2010-03-26
 Kthi;317;Kaithi;kaithî;Kaithi;2009-06-01
 Lana;351;Tai Tham (Lanna);taï tham (lanna);Tai_Tham;2009-06-01
 Laoo;356;Lao;laotien;Lao;2004-05-01
 Latf;217;Latin (Fraktur variant);latin (variante brisée);;2004-05-01
 Latg;216;Latin (Gaelic variant);latin (variante gaélique);;2004-05-01
 Latn;215;Latin;latin;Latin;2004-05-01
 Lepc;335;Lepcha (Róng);lepcha (róng);Lepcha;2007-07-02
 Limb;336;Limbu;limbou;Limbu;2004-05-29
 Lina;400;Linear A;linéaire A;;2004-05-01
 Linb;401;Linear B;linéaire B;Linear_B;2004-05-29
 Lisu;399;Lisu (Fraser);lisu (Fraser);Lisu;2009-06-01
 Loma;437;Loma;loma;;2010-03-26
 Lyci;202;Lycian;lycien;Lycian;2007-07-02
 Lydi;116;Lydian;lydien;Lydian;2007-07-02
 Mahj;314;Mahajani;mahâjanî;;2012-10-16
 Mand;140;Mandaic, Mandaean;mandéen;Mandaic;2010-07-23
 Mani;139;Manichaean;manichéen;;2007-07-15
 Maya;090;Mayan hieroglyphs;hiéroglyphes mayas;;2004-05-01
 Mend;438;Mende Kikakui;mendé kikakui;;2013-10-12
 Merc;101;Meroitic Cursive;cursif méroïtique;Meroitic_Cursive;2012-02-06
 Mero;100;Meroitic Hieroglyphs;hiéroglyphes méroïtiques;Meroitic_Hieroglyphs;2012-02-06
 Mlym;347;Malayalam;malayâlam;Malayalam;2004-05-01
 Modi;323;Modi, Moḍī;modî;;2013-10-12
 Moon;218;Moon (Moon code, Moon script, Moon type);écriture Moon;;2006-12-11
 Mong;145;Mongolian;mongol;Mongolian;2004-05-01
 Mroo;199;Mro, Mru;mro;;2010-12-21
 Mtei;337;Meitei Mayek (Meithei, Meetei);meitei mayek;Meetei_Mayek;2009-06-01
 Mult;323; Multani;multanî;;2012-11-01
 Mymr;350;Myanmar (Burmese);birman;Myanmar;2004-05-01
 Narb;106;Old North Arabian (Ancient North Arabian);nord-arabique;;2010-03-26
 Nbat;159;Nabataean;nabatéen;;2010-03-26
 Nkgb;420;Nakhi Geba ('Na-'Khi ²Ggŏ-¹baw, Naxi Geba);nakhi géba;;2009-02-23
 Nkoo;165;N’Ko;n’ko;Nko;2006-10-10
 Nshu;499;Nüshu;nüshu;;2010-12-21
 Ogam;212;Ogham;ogam;Ogham;2004-05-01
 Olck;261;Ol Chiki (Ol Cemet’, Ol, Santali);ol tchiki;Ol_Chiki;2007-07-02
 Orkh;175;Old Turkic, Orkhon Runic;orkhon;Old_Turkic;2009-06-01
 Orya;327;Oriya;oriyâ;Oriya;2004-05-01
 Osma;260;Osmanya;osmanais;Osmanya;2004-05-01
 Palm;126;Palmyrene;palmyrénien;;2010-03-26
 Pauc;263;Pau Cin Hau;paou chin haou;;2013-10-12
 Perm;227;Old Permic;ancien permien;;2004-05-01
 Phag;331;Phags-pa;’phags pa;Phags_Pa;2006-10-10
 Phli;131;Inscriptional Pahlavi;pehlevi des inscriptions;Inscriptional_Pahlavi;2009-06-01
 Phlp;132;Psalter Pahlavi;pehlevi des psautiers;;2007-11-26
 Phlv;133;Book Pahlavi;pehlevi des livres;;2007-07-15
 Phnx;115;Phoenician;phénicien;Phoenician;2006-10-10
 Plrd;282;Miao (Pollard);miao (Pollard);Miao;2012-02-06
 Prti;130;Inscriptional Parthian;parthe des inscriptions;Inscriptional_Parthian;2009-06-01
 Qaaa;900;Reserved for private use (start);réservé à l’usage privé (début);;2004-05-29
 Qabx;949;Reserved for private use (end);réservé à l’usage privé (fin);;2004-05-29
 Rjng;363;Rejang (Redjang, Kaganga);redjang (kaganga);Rejang;2009-02-23
 Roro;620;Rongorongo;rongorongo;;2004-05-01
 Runr;211;Runic;runique;Runic;2004-05-01
 Samr;123;Samaritan;samaritain;Samaritan;2009-06-01
 Sara;292;Sarati;sarati;;2004-05-29
 Sarb;105;Old South Arabian;sud-arabique, himyarite;Old_South_Arabian;2009-06-01
 Saur;344;Saurashtra;saurachtra;Saurashtra;2007-07-02
 Sgnw;095;SignWriting;SignÉcriture, SignWriting;;2006-10-10
 Shaw;281;Shavian (Shaw);shavien (Shaw);Shavian;2004-05-01
 Shrd;319;Sharada, Śāradā;charada, shard;Sharada;2012-02-06
 Sidd;302;Siddham, Siddhaṃ, Siddhamātṛkā;siddham;;2013-10-12
 Sind;318;Khudawadi, Sindhi;khoudawadî, sindhî;;2010-12-21
 Sinh;348;Sinhala;singhalais;Sinhala;2004-05-01
 Sora;398;Sora Sompeng;sora sompeng;Sora_Sompeng;2012-02-06
 Sund;362;Sundanese;sundanais;Sundanese;2007-07-02
 Sylo;316;Syloti Nagri;sylotî nâgrî;Syloti_Nagri;2006-06-21
 Syrc;135;Syriac;syriaque;Syriac;2004-05-01
 Syre;138;Syriac (Estrangelo variant);syriaque (variante estranghélo);;2004-05-01
 Syrj;137;Syriac (Western variant);syriaque (variante occidentale);;2004-05-01
 Syrn;136;Syriac (Eastern variant);syriaque (variante orientale);;2004-05-01
 Tagb;373;Tagbanwa;tagbanoua;Tagbanwa;2004-05-01
 Takr;321;Takri, Ṭākrī, Ṭāṅkrī;tâkrî;Takri;2012-02-06
 Tale;353;Tai Le;taï-le;Tai_Le;2004-10-25
 Talu;354;New Tai Lue;nouveau taï-lue;New_Tai_Lue;2006-06-21
 Taml;346;Tamil;tamoul;Tamil;2004-05-01
 Tang;520;Tangut;tangoute;;2010-12-21
 Tavt;359;Tai Viet;taï viêt;Tai_Viet;2009-06-01
 Telu;340;Telugu;télougou;Telugu;2004-05-01
 Teng;290;Tengwar;tengwar;;2004-05-01
 Tfng;120;Tifinagh (Berber);tifinagh (berbère);Tifinagh;2006-06-21
 Tglg;370;Tagalog (Baybayin, Alibata);tagal (baybayin, alibata);Tagalog;2009-02-23
 Thaa;170;Thaana;thâna;Thaana;2004-05-01
 Thai;352;Thai;thaï;Thai;2004-05-01
 Tibt;330;Tibetan;tibétain;Tibetan;2004-05-01
 Tirh;326;Tirhuta;tirhouta;;2011-12-09
 Ugar;040;Ugaritic;ougaritique;Ugaritic;2004-05-01
 Vaii;470;Vai;vaï;Vai;2007-07-02
 Visp;280;Visible Speech;parole visible;;2004-05-01
 Wara;262;Warang Citi (Varang Kshiti);warang citi;;2009-11-11
 Wole;480;Woleai;woléaï;;2010-12-21
 Xpeo;030;Old Persian;cunéiforme persépolitain;Old_Persian;2006-06-21
 Xsux;020;Cuneiform, Sumero-Akkadian;cunéiforme suméro-akkadien;Cuneiform;2006-10-10
 Yiii;460;Yi;yi;Yi;2004-05-01
 Zinh;994;Code for inherited script;codet pour écriture héritée;Inherited;2009-02-23
 Zmth;995;Mathematical notation;notation mathématique;;2007-11-26
 Zsym;996;Symbols;symboles;;2007-11-26
 Zxxx;997;Code for unwritten documents;codet pour les documents non écrits;;2011-06-21
 Zyyy;998;Code for undetermined script;codet pour écriture indéterminée;Common;2004-05-29
 Zzzz;999;Code for uncoded script;codet pour écriture non codée;Unknown;2006-10-10
@@ -1,474 +0,0 @@
 IdSubLanguage	ISO639	LanguageName	UploadEnabled	WebEnabled
 aar	aa	Afar, afar	0	0
 abk	ab	Abkhazian	0	0
 ace		Achinese	0	0
 ach		Acoli	0	0
 ada		Adangme	0	0
 ady		adyghé	0	0
 afa		Afro-Asiatic (Other)	0	0
 afh		Afrihili	0	0
 afr	af	Afrikaans	1	0
 ain		Ainu	0	0
 aka	ak	Akan	0	0
 akk		Akkadian	0	0
 alb	sq	Albanian	1	1
 ale		Aleut	0	0
 alg		Algonquian languages	0	0
 alt		Southern Altai	0	0
 amh	am	Amharic	0	0
 ang		English, Old (ca.450-1100)	0	0
 apa		Apache languages	0	0
 ara	ar	Arabic	1	1
 arc		Aramaic	0	0
 arg	an	Aragonese	0	0
 arm	hy	Armenian	1	0
 arn		Araucanian	0	0
 arp		Arapaho	0	0
 art		Artificial (Other)	0	0
 arw		Arawak	0	0
 asm	as	Assamese	0	0
 ast		Asturian, Bable	0	0
 ath		Athapascan languages	0	0
 aus		Australian languages	0	0
 ava	av	Avaric	0	0
 ave	ae	Avestan	0	0
 awa		Awadhi	0	0
 aym	ay	Aymara	0	0
 aze	az	Azerbaijani	0	0
 bad		Banda	0	0
 bai		Bamileke languages	0	0
 bak	ba	Bashkir	0	0
 bal		Baluchi	0	0
 bam	bm	Bambara	0	0
 ban		Balinese	0	0
 baq	eu	Basque	1	1
 bas		Basa	0	0
 bat		Baltic (Other)	0	0
 bej		Beja	0	0
 bel	be	Belarusian	0	0
 bem		Bemba	0	0
 ben	bn	Bengali	1	0
 ber		Berber (Other)	0	0
 bho		Bhojpuri	0	0
 bih	bh	Bihari	0	0
 bik		Bikol	0	0
 bin		Bini	0	0
 bis	bi	Bislama	0	0
 bla		Siksika	0	0
 bnt		Bantu (Other)	0	0
 bos	bs	Bosnian	1	0
 bra		Braj	0	0
 bre	br	Breton	1	0
 btk		Batak (Indonesia)	0	0
 bua		Buriat	0	0
 bug		Buginese	0	0
 bul	bg	Bulgarian	1	1
 bur	my	Burmese	1	0
 byn		Blin	0	0
 cad		Caddo	0	0
 cai		Central American Indian (Other)	0	0
 car		Carib	0	0
 cat	ca	Catalan	1	1
 cau		Caucasian (Other)	0	0
 ceb		Cebuano	0	0
 cel		Celtic (Other)	0	0
 cha	ch	Chamorro	0	0
 chb		Chibcha	0	0
 che	ce	Chechen	0	0
 chg		Chagatai	0	0
 chi	zh	Chinese	1	1
 chk		Chuukese	0	0
 chm		Mari	0	0
 chn		Chinook jargon	0	0
 cho		Choctaw	0	0
 chp		Chipewyan	0	0
 chr		Cherokee	0	0
 chu	cu	Church Slavic	0	0
 chv	cv	Chuvash	0	0
 chy		Cheyenne	0	0
 cmc		Chamic languages	0	0
 cop		Coptic	0	0
 cor	kw	Cornish	0	0
 cos	co	Corsican	0	0
 cpe		Creoles and pidgins, English based (Other)	0	0
 cpf		Creoles and pidgins, French-based (Other)	0	0
 cpp		Creoles and pidgins, Portuguese-based (Other)	0	0
 cre	cr	Cree	0	0
 crh		Crimean Tatar	0	0
 crp		Creoles and pidgins (Other)	0	0
 csb		Kashubian	0	0
 cus		Cushitic (Other)' couchitiques, autres langues	0	0
 cze	cs	Czech	1	1
 dak		Dakota	0	0
 dan	da	Danish	1	1
 dar		Dargwa	0	0
 day		Dayak	0	0
 del		Delaware	0	0
 den		Slave (Athapascan)	0	0
 dgr		Dogrib	0	0
 din		Dinka	0	0
 div	dv	Divehi	0	0
 doi		Dogri	0	0
 dra		Dravidian (Other)	0	0
 dua		Duala	0	0
 dum		Dutch, Middle (ca.1050-1350)	0	0
 dut	nl	Dutch	1	1
 dyu		Dyula	0	0
 dzo	dz	Dzongkha	0	0
 efi		Efik	0	0
 egy		Egyptian (Ancient)	0	0
 eka		Ekajuk	0	0
 elx		Elamite	0	0
 eng	en	English	1	1
 enm		English, Middle (1100-1500)	0	0
 epo	eo	Esperanto	1	0
 est	et	Estonian	1	1
 ewe	ee	Ewe	0	0
 ewo		Ewondo	0	0
 fan		Fang	0	0
 fao	fo	Faroese	0	0
 fat		Fanti	0	0
 fij	fj	Fijian	0	0
 fil		Filipino	0	0
 fin	fi	Finnish	1	1
 fiu		Finno-Ugrian (Other)	0	0
 fon		Fon	0	0
 fre	fr	French	1	1
 frm		French, Middle (ca.1400-1600)	0	0
 fro		French, Old (842-ca.1400)	0	0
 fry	fy	Frisian	0	0
 ful	ff	Fulah	0	0
 fur		Friulian	0	0
 gaa		Ga	0	0
 gay		Gayo	0	0
 gba		Gbaya	0	0
 gem		Germanic (Other)	0	0
 geo	ka	Georgian	1	1
 ger	de	German	1	1
 gez		Geez	0	0
 gil		Gilbertese	0	0
 gla	gd	Gaelic	0	0
 gle	ga	Irish	0	0
 glg	gl	Galician	1	1
 glv	gv	Manx	0	0
 gmh		German, Middle High (ca.1050-1500)	0	0
 goh		German, Old High (ca.750-1050)	0	0
 gon		Gondi	0	0
 gor		Gorontalo	0	0
 got		Gothic	0	0
 grb		Grebo	0	0
 grc		Greek, Ancient (to 1453)	0	0
 ell	el	Greek	1	1
 grn	gn	Guarani	0	0
 guj	gu	Gujarati	0	0
 gwi		Gwich´in	0	0
 hai		Haida	0	0
 hat	ht	Haitian	0	0
 hau	ha	Hausa	0	0
 haw		Hawaiian	0	0
 heb	he	Hebrew	1	1
 her	hz	Herero	0	0
 hil		Hiligaynon	0	0
 him		Himachali	0	0
 hin	hi	Hindi	1	1
 hit		Hittite	0	0
 hmn		Hmong	0	0
 hmo	ho	Hiri Motu	0	0
 hrv	hr	Croatian	1	1
 hun	hu	Hungarian	1	1
 hup		Hupa	0	0
 iba		Iban	0	0
 ibo	ig	Igbo	0	0
 ice	is	Icelandic	1	1
 ido	io	Ido	0	0
 iii	ii	Sichuan Yi	0	0
 ijo		Ijo	0	0
 iku	iu	Inuktitut	0	0
 ile	ie	Interlingue	0	0
 ilo		Iloko	0	0
 ina	ia	Interlingua (International Auxiliary Language Asso	0	0
 inc		Indic (Other)	0	0
 ind	id	Indonesian	1	1
 ine		Indo-European (Other)	0	0
 inh		Ingush	0	0
 ipk	ik	Inupiaq	0	0
 ira		Iranian (Other)	0	0
 iro		Iroquoian languages	0	0
 ita	it	Italian	1	1
 jav	jv	Javanese	0	0
 jpn	ja	Japanese	1	1
 jpr		Judeo-Persian	0	0
 jrb		Judeo-Arabic	0	0
 kaa		Kara-Kalpak	0	0
 kab		Kabyle	0	0
 kac		Kachin	0	0
 kal	kl	Kalaallisut	0	0
 kam		Kamba	0	0
 kan	kn	Kannada	0	0
 kar		Karen	0	0
 kas	ks	Kashmiri	0	0
 kau	kr	Kanuri	0	0
 kaw		Kawi	0	0
 kaz	kk	Kazakh	1	0
 kbd		Kabardian	0	0
 kha		Khasi	0	0
 khi		Khoisan (Other)	0	0
 khm	km	Khmer	1	1
 kho		Khotanese	0	0
 kik	ki	Kikuyu	0	0
 kin	rw	Kinyarwanda	0	0
 kir	ky	Kirghiz	0	0
 kmb		Kimbundu	0	0
 kok		Konkani	0	0
 kom	kv	Komi	0	0
 kon	kg	Kongo	0	0
 kor	ko	Korean	1	1
 kos		Kosraean	0	0
 kpe		Kpelle	0	0
 krc		Karachay-Balkar	0	0
 kro		Kru	0	0
 kru		Kurukh	0	0
 kua	kj	Kuanyama	0	0
 kum		Kumyk	0	0
 kur	ku	Kurdish	0	0
 kut		Kutenai	0	0
 lad		Ladino	0	0
 lah		Lahnda	0	0
 lam		Lamba	0	0
 lao	lo	Lao	0	0
 lat	la	Latin	0	0
 lav	lv	Latvian	1	0
 lez		Lezghian	0	0
 lim	li	Limburgan	0	0
 lin	ln	Lingala	0	0
 lit	lt	Lithuanian	1	0
 lol		Mongo	0	0
 loz		Lozi	0	0
 ltz	lb	Luxembourgish	1	0
 lua		Luba-Lulua	0	0
 lub	lu	Luba-Katanga	0	0
 lug	lg	Ganda	0	0
 lui		Luiseno	0	0
 lun		Lunda	0	0
 luo		Luo (Kenya and Tanzania)	0	0
 lus		lushai	0	0
 mac	mk	Macedonian	1	1
 mad		Madurese	0	0
 mag		Magahi	0	0
 mah	mh	Marshallese	0	0
 mai		Maithili	0	0
 mak		Makasar	0	0
 mal	ml	Malayalam	1	0
 man		Mandingo	0	0
 mao	mi	Maori	0	0
 map		Austronesian (Other)	0	0
 mar	mr	Marathi	0	0
 mas		Masai	0	0
 may	ms	Malay	1	1
 mdf		Moksha	0	0
 mdr		Mandar	0	0
 men		Mende	0	0
 mga		Irish, Middle (900-1200)	0	0
 mic		Mi'kmaq	0	0
 min		Minangkabau	0	0
 mis		Miscellaneous languages	0	0
 mkh		Mon-Khmer (Other)	0	0
 mlg	mg	Malagasy	0	0
 mlt	mt	Maltese	0	0
 mnc		Manchu	0	0
 mni		Manipuri	0	0
 mno		Manobo languages	0	0
 moh		Mohawk	0	0
 mol	mo	Moldavian	0	0
 mon	mn	Mongolian	1	0
 mos		Mossi	0	0
 mwl		Mirandese	0	0
 mul		Multiple languages	0	0
 mun		Munda languages	0	0
 mus		Creek	0	0
 mwr		Marwari	0	0
 myn		Mayan languages	0	0
 myv		Erzya	0	0
 nah		Nahuatl	0	0
 nai		North American Indian	0	0
 nap		Neapolitan	0	0
 nau	na	Nauru	0	0
 nav	nv	Navajo	0	0
 nbl	nr	Ndebele, South	0	0
 nde	nd	Ndebele, North	0	0
 ndo	ng	Ndonga	0	0
 nds		Low German	0	0
 nep	ne	Nepali	0	0
 new		Nepal Bhasa	0	0
 nia		Nias	0	0
 nic		Niger-Kordofanian (Other)	0	0
 niu		Niuean	0	0
 nno	nn	Norwegian Nynorsk	0	0
 nob	nb	Norwegian Bokmal	0	0
 nog		Nogai	0	0
 non		Norse, Old	0	0
 nor	no	Norwegian	1	1
 nso		Northern Sotho	0	0
 nub		Nubian languages	0	0
 nwc		Classical Newari	0	0
 nya	ny	Chichewa	0	0
 nym		Nyamwezi	0	0
 nyn		Nyankole	0	0
 nyo		Nyoro	0	0
 nzi		Nzima	0	0
 oci	oc	Occitan	1	1
 oji	oj	Ojibwa	0	0
 ori	or	Oriya	0	0
 orm	om	Oromo	0	0
 osa		Osage	0	0
 oss	os	Ossetian	0	0
 ota		Turkish, Ottoman (1500-1928)	0	0
 oto		Otomian languages	0	0
 paa		Papuan (Other)	0	0
 pag		Pangasinan	0	0
 pal		Pahlavi	0	0
 pam		Pampanga	0	0
 pan	pa	Panjabi	0	0
 pap		Papiamento	0	0
 pau		Palauan	0	0
 peo		Persian, Old (ca.600-400 B.C.)	0	0
 per	fa	Persian	1	1
 phi		Philippine (Other)	0	0
 phn		Phoenician	0	0
 pli	pi	Pali	0	0
 pol	pl	Polish	1	1
 pon		Pohnpeian	0	0
 por	pt	Portuguese	1	1
 pra		Prakrit languages	0	0
 pro		Provençal, Old (to 1500)	0	0
 pus	ps	Pushto	0	0
 que	qu	Quechua	0	0
 raj		Rajasthani	0	0
 rap		Rapanui	0	0
 rar		Rarotongan	0	0
 roa		Romance (Other)	0	0
 roh	rm	Raeto-Romance	0	0
 rom		Romany	0	0
 run	rn	Rundi	0	0
 rup		Aromanian	0	0
 rus	ru	Russian	1	1
 sad		Sandawe	0	0
 sag	sg	Sango	0	0
 sah		Yakut	0	0
 sai		South American Indian (Other)	0	0
 sal		Salishan languages	0	0
 sam		Samaritan Aramaic	0	0
 san	sa	Sanskrit	0	0
 sas		Sasak	0	0
 sat		Santali	0	0
 scc	sr	Serbian	1	1
 scn		Sicilian	0	0
 sco		Scots	0	0
 sel		Selkup	0	0
 sem		Semitic (Other)	0	0
 sga		Irish, Old (to 900)	0	0
 sgn		Sign Languages	0	0
 shn		Shan	0	0
 sid		Sidamo	0	0
 sin	si	Sinhalese	1	1
 sio		Siouan languages	0	0
 sit		Sino-Tibetan (Other)	0	0
 sla		Slavic (Other)	0	0
 slo	sk	Slovak	1	1
 slv	sl	Slovenian	1	1
 sma		Southern Sami	0	0
 sme	se	Northern Sami	0	0
 smi		Sami languages (Other)	0	0
 smj		Lule Sami	0	0
 smn		Inari Sami	0	0
 smo	sm	Samoan	0	0
 sms		Skolt Sami	0	0
 sna	sn	Shona	0	0
 snd	sd	Sindhi	0	0
 snk		Soninke	0	0
 sog		Sogdian	0	0
 som	so	Somali	0	0
 son		Songhai	0	0
 sot	st	Sotho, Southern	0	0
 spa	es	Spanish	1	1
 srd	sc	Sardinian	0	0
 srr		Serer	0	0
 ssa		Nilo-Saharan (Other)	0	0
 ssw	ss	Swati	0	0
 suk		Sukuma	0	0
 sun	su	Sundanese	0	0
 sus		Susu	0	0
 sux		Sumerian	0	0
 swa	sw	Swahili	1	0
 swe	sv	Swedish	1	1
 syr		Syriac	1	0
 tah	ty	Tahitian	0	0
 tai		Tai (Other)	0	0
 tam	ta	Tamil	1	0
 tat	tt	Tatar	0	0
 tel	te	Telugu	1	0
 tem		Timne	0	0
 ter		Tereno	0	0
 tet		Tetum	0	0
 tgk	tg	Tajik	0	0
 tgl	tl	Tagalog	1	1
 tha	th	Thai	1	1
 tib	bo	Tibetan	0	0
 tig		Tigre	0	0
 tir	ti	Tigrinya	0	0
 tiv		Tiv	0	0
 tkl		Tokelau	0	0
 tlh		Klingon	0	0
 tli		Tlingit	0	0
 tmh		Tamashek	0	0
 tog		Tonga (Nyasa)	0	0
 ton	to	Tonga (Tonga Islands)	0	0
 tpi		Tok Pisin	0	0
 tsi		Tsimshian	0	0
 tsn	tn	Tswana	0	0
 tso	ts	Tsonga	0	0
 tuk	tk	Turkmen	0	0
 tum		Tumbuka	0	0
 tup		Tupi languages	0	0
 tur	tr	Turkish	1	1
 tut		Altaic (Other)	0	0
 tvl		Tuvalu	0	0
 twi	tw	Twi	0	0
 tyv		Tuvinian	0	0
 udm		Udmurt	0	0
 uga		Ugaritic	0	0
 uig	ug	Uighur	0	0
 ukr	uk	Ukrainian	1	1
 umb		Umbundu	0	0
 und		Undetermined	0	0
 urd	ur	Urdu	1	0
 uzb	uz	Uzbek	0	0
 vai		Vai	0	0
 ven	ve	Venda	0	0
 vie	vi	Vietnamese	1	1
 vol	vo	Volapük	0	0
 vot		Votic	0	0
 wak		Wakashan languages	0	0
 wal		Walamo	0	0
 war		Waray	0	0
 was		Washo	0	0
 wel	cy	Welsh	0	0
 wen		Sorbian languages	0	0
 wln	wa	Walloon	0	0
 wol	wo	Wolof	0	0
 xal		Kalmyk	0	0
 xho	xh	Xhosa	0	0
 yao		Yao	0	0
 yap		Yapese	0	0
 yid	yi	Yiddish	0	0
 yor	yo	Yoruba	0	0
 ypk		Yupik languages	0	0
 zap		Zapotec	0	0
 zen		Zenaga	0	0
 zha	za	Zhuang	0	0
 znd		Zande	0	0
 zul	zu	Zulu	0	0
 zun		Zuni	0	0
 rum	ro	Romanian	1	1
 pob	pb	Brazilian	1	1
 mne		Montenegrin	1	0
@@ -1,85 +0,0 @@
 # -*- coding: utf-8 -*-
 #
 # Copyright (c) 2013 the BabelFish authors. All rights reserved.
 # Use of this source code is governed by the 3-clause BSD license
 # that can be found in the LICENSE file.
 #
 from __future__ import unicode_literals
 class Error(Exception):
    """Base class for all exceptions in babelfish"""
    pass
 class LanguageError(Error, AttributeError):
    """Base class for all language exceptions in babelfish"""
    pass
 class LanguageConvertError(LanguageError):
    """Exception raised by converters when :meth:`~babelfish.converters.LanguageConverter.convert` fails
    :param string alpha3: alpha3 code that failed conversion
    :param country: country code that failed conversion, if any
    :type country: string or None
    :param script: script code that failed conversion, if any
    :type script: string or None
    """
    def __init__(self, alpha3, country=None, script=None):
        self.alpha3 = alpha3
        self.country = country
        self.script = script
    def __str__(self):
        s = self.alpha3
        if self.country is not None:
            s += '-' + self.country
        if self.script is not None:
            s += '-' + self.script
        return s
 class LanguageReverseError(LanguageError):
    """Exception raised by converters when :meth:`~babelfish.converters.LanguageReverseConverter.reverse` fails
    :param string code: code that failed reverse conversion
    """
    def __init__(self, code):
        self.code = code
    def __str__(self):
        return repr(self.code)
 class CountryError(Error, AttributeError):
    """Base class for all country exceptions in babelfish"""
    pass
 class CountryConvertError(CountryError):
    """Exception raised by converters when :meth:`~babelfish.converters.CountryConverter.convert` fails
    :param string alpha2: alpha2 code that failed conversion
    """
    def __init__(self, alpha2):
        self.alpha2 = alpha2
    def __str__(self):
        return self.alpha2
 class CountryReverseError(CountryError):
    """Exception raised by converters when :meth:`~babelfish.converters.CountryReverseConverter.reverse` fails
    :param string code: code that failed reverse conversion
    """
    def __init__(self, code):
        self.code = code
    def __str__(self):
        return repr(self.code)
@@ -1,185 +0,0 @@
 # -*- coding: utf-8 -*-
 #
 # Copyright (c) 2013 the BabelFish authors. All rights reserved.
 # Use of this source code is governed by the 3-clause BSD license
 # that can be found in the LICENSE file.
 #
 from __future__ import unicode_literals
 from collections import namedtuple
 from functools import partial
 from pkg_resources import resource_stream  # @UnresolvedImport
 from .converters import ConverterManager
 from .country import Country
 from .exceptions import LanguageConvertError
 from .script import Script
 from . import basestr
 LANGUAGES = set()
 LANGUAGE_MATRIX = []
 #: The namedtuple used in the :data:`LANGUAGE_MATRIX`
 IsoLanguage = namedtuple('IsoLanguage', ['alpha3', 'alpha3b', 'alpha3t', 'alpha2', 'scope', 'type', 'name', 'comment'])
 f = resource_stream('babelfish', 'data/iso-639-3.tab')
 f.readline()
 for l in f:
    iso_language = IsoLanguage(*l.decode('utf-8').split('\t'))
    LANGUAGES.add(iso_language.alpha3)
    LANGUAGE_MATRIX.append(iso_language)
 f.close()
 class LanguageConverterManager(ConverterManager):
    """:class:`~babelfish.converters.ConverterManager` for language converters"""
    entry_point = 'babelfish.language_converters'
    internal_converters = ['alpha2 = babelfish.converters.alpha2:Alpha2Converter',
                           'alpha3b = babelfish.converters.alpha3b:Alpha3BConverter',
                           'alpha3t = babelfish.converters.alpha3t:Alpha3TConverter',
                           'name = babelfish.converters.name:NameConverter',
                           'scope = babelfish.converters.scope:ScopeConverter',
                           'type = babelfish.converters.type:LanguageTypeConverter',
                           'opensubtitles = babelfish.converters.opensubtitles:OpenSubtitlesConverter']
 language_converters = LanguageConverterManager()
 class LanguageMeta(type):
    """The :class:`Language` metaclass
    Dynamically redirect :meth:`Language.frommycode` to :meth:`Language.fromcode` with the ``mycode`` `converter`
    """
    def __getattr__(cls, name):
        if name.startswith('from'):
            return partial(cls.fromcode, converter=name[4:])
        return type.__getattribute__(cls, name)
 class Language(LanguageMeta(str('LanguageBase'), (object,), {})):
    """A human language
    A human language is composed of a language part following the ISO-639
    standard and can be country-specific when a :class:`~babelfish.country.Country`
    is specified.
    The :class:`Language` is extensible with custom converters (see :ref:`custom_converters`)
    :param string language: the language as a 3-letter ISO-639-3 code
    :param country: the country (if any) as a 2-letter ISO-3166 code or :class:`~babelfish.country.Country` instance
    :type country: string or :class:`~babelfish.country.Country` or None
    :param script: the script (if any) as a 4-letter ISO-15924 code or :class:`~babelfish.script.Script` instance
    :type script: string or :class:`~babelfish.script.Script` or None
    :param unknown: the unknown language as a three-letters ISO-639-3 code to use as fallback
    :type unknown: string or None
    :raise: ValueError if the language could not be recognized and `unknown` is ``None``
    """
    def __init__(self, language, country=None, script=None, unknown=None):
        if unknown is not None and language not in LANGUAGES:
            language = unknown
        if language not in LANGUAGES:
            raise ValueError('%r is not a valid language' % language)
        self.alpha3 = language
        self.country = None
        if isinstance(country, Country):
            self.country = country
        elif country is None:
            self.country = None
        else:
            self.country = Country(country)
        self.script = None
        if isinstance(script, Script):
            self.script = script
        elif script is None:
            self.script = None
        else:
            self.script = Script(script)
    @classmethod
    def fromcode(cls, code, converter):
        """Create a :class:`Language` by its `code` using `converter` to
        :meth:`~babelfish.converters.LanguageReverseConverter.reverse` it
        :param string code: the code to reverse
        :param string converter: name of the :class:`~babelfish.converters.LanguageReverseConverter` to use
        :return: the corresponding :class:`Language` instance
        :rtype: :class:`Language`
        """
        return cls(*language_converters[converter].reverse(code))
    @classmethod
    def fromietf(cls, ietf):
        """Create a :class:`Language` by from an IETF language code
        :param string ietf: the ietf code
        :return: the corresponding :class:`Language` instance
        :rtype: :class:`Language`
        """
        subtags = ietf.split('-')
        language_subtag = subtags.pop(0).lower()
        if len(language_subtag) == 2:
            language = cls.fromalpha2(language_subtag)
        else:
            language = cls(language_subtag)
        while subtags:
            subtag = subtags.pop(0)
            if len(subtag) == 2:
                language.country = Country(subtag.upper())
            else:
                language.script = Script(subtag.capitalize())
            if language.script is not None:
                if subtags:
                    raise ValueError('Wrong IETF format. Unmatched subtags: %r' % subtags)
                break
        return language
    def __getstate__(self):
        return self.alpha3, self.country, self.script
    def __setstate__(self, state):
        self.alpha3, self.country, self.script = state
    def __getattr__(self, name):
        alpha3 = self.alpha3
        country = self.country.alpha2 if self.country is not None else None
        script = self.script.code if self.script is not None else None
        try:
            return language_converters[name].convert(alpha3, country, script)
        except KeyError:
            raise AttributeError(name)
    def __hash__(self):
        return hash(str(self))
    def __eq__(self, other):
        if isinstance(other, basestr):
            return str(self) == other
        if not isinstance(other, Language):
            return False
        return (self.alpha3 == other.alpha3 and
                self.country == other.country and
                self.script == other.script)
    def __ne__(self, other):
        return not self == other
    def __bool__(self):
        return self.alpha3 != 'und'
    __nonzero__ = __bool__
    def __repr__(self):
        return '<Language [%s]>' % self
    def __str__(self):
        try:
            s = self.alpha2
        except LanguageConvertError:
            s = self.alpha3
        if self.country is not None:
            s += '-' + str(self.country)
        if self.script is not None:
            s += '-' + str(self.script)
        return s
@@ -1,76 +0,0 @@
 # -*- coding: utf-8 -*-
 #
 # Copyright (c) 2013 the BabelFish authors. All rights reserved.
 # Use of this source code is governed by the 3-clause BSD license
 # that can be found in the LICENSE file.
 #
 from __future__ import unicode_literals
 from collections import namedtuple
 from pkg_resources import resource_stream  # @UnresolvedImport
 from . import basestr
 #: Script code to script name mapping
 SCRIPTS = {}
 #: List of countries in the ISO-15924 as namedtuple of code, number, name, french_name, pva and date
 SCRIPT_MATRIX = []
 #: The namedtuple used in the :data:`SCRIPT_MATRIX`
 IsoScript = namedtuple('IsoScript', ['code', 'number', 'name', 'french_name', 'pva', 'date'])
 f = resource_stream('babelfish', 'data/iso15924-utf8-20131012.txt')
 f.readline()
 for l in f:
    l = l.decode('utf-8').strip()
    if not l or l.startswith('#'):
        continue
    script = IsoScript._make(l.split(';'))
    SCRIPT_MATRIX.append(script)
    SCRIPTS[script.code] = script.name
 f.close()
 class Script(object):
    """A human writing system
    A script is represented by a 4-letter code from the ISO-15924 standard
    :param string script: 4-letter ISO-15924 script code
    """
    def __init__(self, script):
        if script not in SCRIPTS:
            raise ValueError('%r is not a valid script' % script)
        #: ISO-15924 4-letter script code
        self.code = script
    @property
    def name(self):
        """English name of the script"""
        return SCRIPTS[self.code]
    def __getstate__(self):
        return self.code
    def __setstate__(self, state):
        self.code = state
    def __hash__(self):
        return hash(self.code)
    def __eq__(self, other):
        if isinstance(other, basestr):
            return self.code == other
        if not isinstance(other, Script):
            return False
        return self.code == other.code
    def __ne__(self, other):
        return not self == other
    def __repr__(self):
        return '<Script [%s]>' % self
    def __str__(self):
        return self.code
@@ -1,377 +0,0 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 #
 # Copyright (c) 2013 the BabelFish authors. All rights reserved.
 # Use of this source code is governed by the 3-clause BSD license
 # that can be found in the LICENSE file.
 #
 from __future__ import unicode_literals
 import re
 import sys
 import pickle
 from unittest import TestCase, TestSuite, TestLoader, TextTestRunner
 from pkg_resources import resource_stream  # @UnresolvedImport
 from babelfish import (LANGUAGES, Language, Country, Script, language_converters, country_converters,
    LanguageReverseConverter, LanguageConvertError, LanguageReverseError, CountryReverseError)
 if sys.version_info[:2] <= (2, 6):
    _MAX_LENGTH = 80
    def safe_repr(obj, short=False):
        try:
            result = repr(obj)
        except Exception:
            result = object.__repr__(obj)
        if not short or len(result) < _MAX_LENGTH:
            return result
        return result[:_MAX_LENGTH] + ' [truncated]...'
    class _AssertRaisesContext(object):
        """A context manager used to implement TestCase.assertRaises* methods."""
        def __init__(self, expected, test_case, expected_regexp=None):
            self.expected = expected
            self.failureException = test_case.failureException
            self.expected_regexp = expected_regexp
        def __enter__(self):
            return self
        def __exit__(self, exc_type, exc_value, tb):
            if exc_type is None:
                try:
                    exc_name = self.expected.__name__
                except AttributeError:
                    exc_name = str(self.expected)
                raise self.failureException(
                    "{0} not raised".format(exc_name))
            if not issubclass(exc_type, self.expected):
                # let unexpected exceptions pass through
                return False
            self.exception = exc_value  # store for later retrieval
            if self.expected_regexp is None:
                return True
            expected_regexp = self.expected_regexp
            if isinstance(expected_regexp, basestring):
                expected_regexp = re.compile(expected_regexp)
            if not expected_regexp.search(str(exc_value)):
                raise self.failureException('"%s" does not match "%s"' %
                         (expected_regexp.pattern, str(exc_value)))
            return True
    class _Py26FixTestCase(object):
        def assertIsNone(self, obj, msg=None):
            """Same as self.assertTrue(obj is None), with a nicer default message."""
            if obj is not None:
                standardMsg = '%s is not None' % (safe_repr(obj),)
                self.fail(self._formatMessage(msg, standardMsg))
        def assertIsNotNone(self, obj, msg=None):
            """Included for symmetry with assertIsNone."""
            if obj is None:
                standardMsg = 'unexpectedly None'
                self.fail(self._formatMessage(msg, standardMsg))
        def assertIn(self, member, container, msg=None):
            """Just like self.assertTrue(a in b), but with a nicer default message."""
            if member not in container:
                standardMsg = '%s not found in %s' % (safe_repr(member),
                                                      safe_repr(container))
                self.fail(self._formatMessage(msg, standardMsg))
        def assertNotIn(self, member, container, msg=None):
            """Just like self.assertTrue(a not in b), but with a nicer default message."""
            if member in container:
                standardMsg = '%s unexpectedly found in %s' % (safe_repr(member),
                                                            safe_repr(container))
                self.fail(self._formatMessage(msg, standardMsg))
        def assertIs(self, expr1, expr2, msg=None):
            """Just like self.assertTrue(a is b), but with a nicer default message."""
            if expr1 is not expr2:
                standardMsg = '%s is not %s' % (safe_repr(expr1),
                                                 safe_repr(expr2))
                self.fail(self._formatMessage(msg, standardMsg))
        def assertIsNot(self, expr1, expr2, msg=None):
            """Just like self.assertTrue(a is not b), but with a nicer default message."""
            if expr1 is expr2:
                standardMsg = 'unexpectedly identical: %s' % (safe_repr(expr1),)
                self.fail(self._formatMessage(msg, standardMsg))
 else:
    class _Py26FixTestCase(object):
        pass
 class TestScript(TestCase, _Py26FixTestCase):
    def test_wrong_script(self):
        self.assertRaises(ValueError, lambda: Script('Azer'))
    def test_eq(self):
        self.assertEqual(Script('Latn'), Script('Latn'))
    def test_ne(self):
        self.assertNotEqual(Script('Cyrl'), Script('Latn'))
    def test_hash(self):
        self.assertEqual(hash(Script('Hira')), hash('Hira'))
    def test_pickle(self):
        self.assertEqual(pickle.loads(pickle.dumps(Script('Latn'))), Script('Latn'))
 class TestCountry(TestCase, _Py26FixTestCase):
    def test_wrong_country(self):
        self.assertRaises(ValueError, lambda: Country('ZZ'))
    def test_eq(self):
        self.assertEqual(Country('US'), Country('US'))
    def test_ne(self):
        self.assertNotEqual(Country('GB'), Country('US'))
        self.assertIsNotNone(Country('US'))
    def test_hash(self):
        self.assertEqual(hash(Country('US')), hash('US'))
    def test_pickle(self):
        for country in [Country('GB'), Country('US')]:
            self.assertEqual(pickle.loads(pickle.dumps(country)), country)
    def test_converter_name(self):
        self.assertEqual(Country('US').name, 'UNITED STATES')
        self.assertEqual(Country.fromname('UNITED STATES'), Country('US'))
        self.assertEqual(Country.fromcode('UNITED STATES', 'name'), Country('US'))
        self.assertRaises(CountryReverseError, lambda: Country.fromname('ZZZZZ'))
        self.assertEqual(len(country_converters['name'].codes), 249)
 class TestLanguage(TestCase, _Py26FixTestCase):
    def test_languages(self):
        self.assertEqual(len(LANGUAGES), 7874)
    def test_wrong_language(self):
        self.assertRaises(ValueError, lambda: Language('zzz'))
    def test_unknown_language(self):
        self.assertEqual(Language('zzzz', unknown='und'), Language('und'))
    def test_converter_alpha2(self):
        self.assertEqual(Language('eng').alpha2, 'en')
        self.assertEqual(Language.fromalpha2('en'), Language('eng'))
        self.assertEqual(Language.fromcode('en', 'alpha2'), Language('eng'))
        self.assertRaises(LanguageReverseError, lambda: Language.fromalpha2('zz'))
        self.assertRaises(LanguageConvertError, lambda: Language('aaa').alpha2)
        self.assertEqual(len(language_converters['alpha2'].codes), 184)
    def test_converter_alpha3b(self):
        self.assertEqual(Language('fra').alpha3b, 'fre')
        self.assertEqual(Language.fromalpha3b('fre'), Language('fra'))
        self.assertEqual(Language.fromcode('fre', 'alpha3b'), Language('fra'))
        self.assertRaises(LanguageReverseError, lambda: Language.fromalpha3b('zzz'))
        self.assertRaises(LanguageConvertError, lambda: Language('aaa').alpha3b)
        self.assertEqual(len(language_converters['alpha3b'].codes), 418)
    def test_converter_alpha3t(self):
        self.assertEqual(Language('fra').alpha3t, 'fra')
        self.assertEqual(Language.fromalpha3t('fra'), Language('fra'))
        self.assertEqual(Language.fromcode('fra', 'alpha3t'), Language('fra'))
        self.assertRaises(LanguageReverseError, lambda: Language.fromalpha3t('zzz'))
        self.assertRaises(LanguageConvertError, lambda: Language('aaa').alpha3t)
        self.assertEqual(len(language_converters['alpha3t'].codes), 418)
    def test_converter_name(self):
        self.assertEqual(Language('eng').name, 'English')
        self.assertEqual(Language.fromname('English'), Language('eng'))
        self.assertEqual(Language.fromcode('English', 'name'), Language('eng'))
        self.assertRaises(LanguageReverseError, lambda: Language.fromname('Zzzzzzzzz'))
        self.assertEqual(len(language_converters['name'].codes), 7874)
    def test_converter_scope(self):
        self.assertEqual(language_converters['scope'].codes, set(['I', 'S', 'M']))
        self.assertEqual(Language('eng').scope, 'individual')
        self.assertEqual(Language('und').scope, 'special')
    def test_converter_type(self):
        self.assertEqual(language_converters['type'].codes, set(['A', 'C', 'E', 'H', 'L', 'S']))
        self.assertEqual(Language('eng').type, 'living')
        self.assertEqual(Language('und').type, 'special')
    def test_converter_opensubtitles(self):
        self.assertEqual(Language('fra').opensubtitles, Language('fra').alpha3b)
        self.assertEqual(Language('por', 'BR').opensubtitles, 'pob')
        self.assertEqual(Language.fromopensubtitles('fre'), Language('fra'))
        self.assertEqual(Language.fromopensubtitles('pob'), Language('por', 'BR'))
        self.assertEqual(Language.fromopensubtitles('pb'), Language('por', 'BR'))
        # Montenegrin is not recognized as an ISO language (yet?) but for now it is
        # unofficially accepted as Serbian from Montenegro
        self.assertEqual(Language.fromopensubtitles('mne'), Language('srp', 'ME'))
        self.assertEqual(Language.fromcode('pob', 'opensubtitles'), Language('por', 'BR'))
        self.assertRaises(LanguageReverseError, lambda: Language.fromopensubtitles('zzz'))
        self.assertRaises(LanguageConvertError, lambda: Language('aaa').opensubtitles)
        self.assertEqual(len(language_converters['opensubtitles'].codes), 607)
        # test with all the LANGUAGES from the opensubtitles api
        # downloaded from: http://www.opensubtitles.org/addons/export_languages.php
        f = resource_stream('babelfish', 'data/opensubtitles_languages.txt')
        f.readline()
        for l in f:
            idlang, alpha2, _, upload_enabled, web_enabled = l.decode('utf-8').strip().split('\t')
            if not int(upload_enabled) and not int(web_enabled):
                # do not test LANGUAGES that are too esoteric / not widely available
                continue
            self.assertEqual(Language.fromopensubtitles(idlang).opensubtitles, idlang)
            if alpha2:
                self.assertEqual(Language.fromopensubtitles(idlang), Language.fromopensubtitles(alpha2))
        f.close()
    def test_converter_opensubtitles_codes(self):
        for code in language_converters['opensubtitles'].from_opensubtitles.keys():
            self.assertIn(code, language_converters['opensubtitles'].codes)
    def test_fromietf_country_script(self):
        language = Language.fromietf('fra-FR-Latn')
        self.assertEqual(language.alpha3, 'fra')
        self.assertEqual(language.country, Country('FR'))
        self.assertEqual(language.script, Script('Latn'))
    def test_fromietf_country_no_script(self):
        language = Language.fromietf('fra-FR')
        self.assertEqual(language.alpha3, 'fra')
        self.assertEqual(language.country, Country('FR'))
        self.assertIsNone(language.script)
    def test_fromietf_no_country_no_script(self):
        language = Language.fromietf('fra-FR')
        self.assertEqual(language.alpha3, 'fra')
        self.assertEqual(language.country, Country('FR'))
        self.assertIsNone(language.script)
    def test_fromietf_no_country_script(self):
        language = Language.fromietf('fra-Latn')
        self.assertEqual(language.alpha3, 'fra')
        self.assertIsNone(language.country)
        self.assertEqual(language.script, Script('Latn'))
    def test_fromietf_alpha2_language(self):
        language = Language.fromietf('fr-Latn')
        self.assertEqual(language.alpha3, 'fra')
        self.assertIsNone(language.country)
        self.assertEqual(language.script, Script('Latn'))
    def test_fromietf_wrong_language(self):
        self.assertRaises(ValueError, lambda: Language.fromietf('xyz-FR'))
    def test_fromietf_wrong_country(self):
        self.assertRaises(ValueError, lambda: Language.fromietf('fra-YZ'))
    def test_fromietf_wrong_script(self):
        self.assertRaises(ValueError, lambda: Language.fromietf('fra-FR-Wxyz'))
    def test_eq(self):
        self.assertEqual(Language('eng'), Language('eng'))
    def test_ne(self):
        self.assertNotEqual(Language('fra'), Language('eng'))
        self.assertIsNotNone(Language('fra'))
    def test_nonzero(self):
        self.assertFalse(bool(Language('und')))
        self.assertTrue(bool(Language('eng')))
    def test_language_hasattr(self):
        self.assertTrue(hasattr(Language('fra'), 'alpha3'))
        self.assertTrue(hasattr(Language('fra'), 'alpha2'))
        self.assertFalse(hasattr(Language('bej'), 'alpha2'))
    def test_country_hasattr(self):
        self.assertTrue(hasattr(Country('US'), 'name'))
        self.assertTrue(hasattr(Country('FR'), 'alpha2'))
        self.assertFalse(hasattr(Country('BE'), 'none'))
    def test_country(self):
        self.assertEqual(Language('por', 'BR').country, Country('BR'))
        self.assertEqual(Language('eng', Country('US')).country, Country('US'))
    def test_eq_with_country(self):
        self.assertEqual(Language('eng', 'US'), Language('eng', Country('US')))
    def test_ne_with_country(self):
        self.assertNotEqual(Language('eng', 'US'), Language('eng', Country('GB')))
    def test_script(self):
        self.assertEqual(Language('srp', script='Latn').script, Script('Latn'))
        self.assertEqual(Language('srp', script=Script('Cyrl')).script, Script('Cyrl'))
    def test_eq_with_script(self):
        self.assertEqual(Language('srp', script='Latn'), Language('srp', script=Script('Latn')))
    def test_ne_with_script(self):
        self.assertNotEqual(Language('srp', script='Latn'), Language('srp', script=Script('Cyrl')))
    def test_eq_with_country_and_script(self):
        self.assertEqual(Language('srp', 'SR', 'Latn'), Language('srp', Country('SR'), Script('Latn')))
    def test_ne_with_country_and_script(self):
        self.assertNotEqual(Language('srp', 'SR', 'Latn'), Language('srp', Country('SR'), Script('Cyrl')))
    def test_hash(self):
        self.assertEqual(hash(Language('fra')), hash('fr'))
        self.assertEqual(hash(Language('ace')), hash('ace'))
        self.assertEqual(hash(Language('por', 'BR')), hash('pt-BR'))
        self.assertEqual(hash(Language('srp', script='Cyrl')), hash('sr-Cyrl'))
        self.assertEqual(hash(Language('eng', 'US', 'Latn')), hash('en-US-Latn'))
    def test_pickle(self):
        for lang in [Language('fra'),
                     Language('eng', 'US'),
                     Language('srp', script='Latn'),
                     Language('eng', 'US', 'Latn')]:
            self.assertEqual(pickle.loads(pickle.dumps(lang)), lang)
    def test_str(self):
        self.assertEqual(Language.fromietf(str(Language('eng', 'US', 'Latn'))), Language('eng', 'US', 'Latn'))
        self.assertEqual(Language.fromietf(str(Language('fra', 'FR'))), Language('fra', 'FR'))
        self.assertEqual(Language.fromietf(str(Language('bel'))), Language('bel'))
    def test_register_converter(self):
        class TestConverter(LanguageReverseConverter):
            def __init__(self):
                self.to_test = {'fra': 'test1', 'eng': 'test2'}
                self.from_test = {'test1': 'fra', 'test2': 'eng'}
            def convert(self, alpha3, country=None, script=None):
                if alpha3 not in self.to_test:
                    raise LanguageConvertError(alpha3, country, script)
                return self.to_test[alpha3]
            def reverse(self, test):
                if test not in self.from_test:
                    raise LanguageReverseError(test)
                return (self.from_test[test], None)
        language = Language('fra')
        self.assertFalse(hasattr(language, 'test'))
        language_converters['test'] = TestConverter()
        self.assertTrue(hasattr(language, 'test'))
        self.assertIn('test', language_converters)
        self.assertEqual(Language('fra').test, 'test1')
        self.assertEqual(Language.fromtest('test2').alpha3, 'eng')
        del language_converters['test']
        self.assertNotIn('test', language_converters)
        self.assertRaises(KeyError, lambda: Language.fromtest('test1'))
        self.assertRaises(AttributeError, lambda: Language('fra').test)
 def suite():
    suite = TestSuite()
    suite.addTest(TestLoader().loadTestsFromTestCase(TestScript))
    suite.addTest(TestLoader().loadTestsFromTestCase(TestCountry))
    suite.addTest(TestLoader().loadTestsFromTestCase(TestLanguage))
    return suite
 if __name__ == '__main__':
    TextTestRunner().run(suite())
@@ -1,14 +0,0 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 """
 Extracts as much information as possible from a video file.
 """
 from . import monkeypatch as _monkeypatch
 from .api import guessit, GuessItApi
 from .options import ConfigurationException
 from .rules.common.quantity import Size
 from .__version__ import __version__
 _monkeypatch.monkeypatch_rebulk()
@@ -1,180 +0,0 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 """
 Entry point module
 """
 # pragma: no cover
 from __future__ import print_function
 import json
 import logging
 import os
 import sys
 import six
 from rebulk.__version__ import __version__ as __rebulk_version__
 from guessit import api
 from guessit.__version__ import __version__
 from guessit.jsonutils import GuessitEncoder
 from guessit.options import argument_parser, parse_options, load_config, merge_options
 try:
    from collections import OrderedDict
 except ImportError:  # pragma: no-cover
    from ordereddict import OrderedDict  # pylint:disable=import-error
 def guess_filename(filename, options):
    """
    Guess a single filename using given options
    :param filename: filename to parse
    :type filename: str
    :param options:
    :type options: dict
    :return:
    :rtype:
    """
    if not options.get('yaml') and not options.get('json') and not options.get('show_property'):
        print('For:', filename)
    guess = api.guessit(filename, options)
    if options.get('show_property'):
        print(guess.get(options.get('show_property'), ''))
        return
    if options.get('json'):
        print(json.dumps(guess, cls=GuessitEncoder, ensure_ascii=False))
    elif options.get('yaml'):
        import yaml
        from guessit import yamlutils
        ystr = yaml.dump({filename: OrderedDict(guess)}, Dumper=yamlutils.CustomDumper, default_flow_style=False,
                         allow_unicode=True)
        i = 0
        for yline in ystr.splitlines():
            if i == 0:
                print("? " + yline[:-1])
            elif i == 1:
                print(":" + yline[1:])
            else:
                print(yline)
            i += 1
    else:
        print('GuessIt found:', json.dumps(guess, cls=GuessitEncoder, indent=4, ensure_ascii=False))
 def display_properties(options):
    """
    Display properties
    """
    properties = api.properties(options)
    if options.get('json'):
        if options.get('values'):
            print(json.dumps(properties, cls=GuessitEncoder, ensure_ascii=False))
        else:
            print(json.dumps(list(properties.keys()), cls=GuessitEncoder, ensure_ascii=False))
    elif options.get('yaml'):
        import yaml
        from guessit import yamlutils
        if options.get('values'):
            print(yaml.dump(properties, Dumper=yamlutils.CustomDumper, default_flow_style=False, allow_unicode=True))
        else:
            print(yaml.dump(list(properties.keys()), Dumper=yamlutils.CustomDumper, default_flow_style=False,
                            allow_unicode=True))
    else:
        print('GuessIt properties:')
        properties_list = list(sorted(properties.keys()))
        for property_name in properties_list:
            property_values = properties.get(property_name)
            print(2 * ' ' + '[+] %s' % (property_name,))
            if property_values and options.get('values'):
                for property_value in property_values:
                    print(4 * ' ' + '[!] %s' % (property_value,))
 def fix_argv_encoding():
    """
    Fix encoding of sys.argv on windows Python 2
    """
    if six.PY2 and os.name == 'nt':  # pragma: no cover
        # see http://bugs.python.org/issue2128
        import locale
        for i, j in enumerate(sys.argv):
            sys.argv[i] = j.decode(locale.getpreferredencoding())
 def main(args=None):  # pylint:disable=too-many-branches
    """
    Main function for entry point
    """
    fix_argv_encoding()
    if args is None:  # pragma: no cover
        options = parse_options()
    else:
        options = parse_options(args)
    config = load_config(options)
    options = merge_options(config, options)
    if options.get('verbose'):
        logging.basicConfig(stream=sys.stdout, format='%(message)s')
        logging.getLogger().setLevel(logging.DEBUG)
    help_required = True
    if options.get('version'):
        print('+-------------------------------------------------------+')
        print('+                   GuessIt ' + __version__ + (28 - len(__version__)) * ' ' + '+')
        print('+-------------------------------------------------------+')
        print('+                   Rebulk ' + __rebulk_version__ + (29 - len(__rebulk_version__)) * ' ' + '+')
        print('+-------------------------------------------------------+')
        print('|      Please report any bug or feature request at      |')
        print('|     https://github.com/guessit-io/guessit/issues.     |')
        print('+-------------------------------------------------------+')
        help_required = False
    if options.get('yaml'):
        try:
            import yaml  # pylint:disable=unused-variable,unused-import
        except ImportError:  # pragma: no cover
            del options['yaml']
            print('PyYAML is not installed. \'--yaml\' option will be ignored ...', file=sys.stderr)
    if options.get('properties') or options.get('values'):
        display_properties(options)
        help_required = False
    filenames = []
    if options.get('filename'):
        for filename in options.get('filename'):
            filenames.append(filename)
    if options.get('input_file'):
        if six.PY2:
            input_file = open(options.get('input_file'), 'r')
        else:
            input_file = open(options.get('input_file'), 'r', encoding='utf-8')
        try:
            filenames.extend([line.strip() for line in input_file.readlines()])
        finally:
            input_file.close()
    filenames = list(filter(lambda f: f, filenames))
    if filenames:
        for filename in filenames:
            help_required = False
            guess_filename(filename, options)
    if help_required:  # pragma: no cover
        argument_parser.print_help()
 if __name__ == '__main__':  # pragma: no cover
    main()
@@ -1,7 +0,0 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 """
 Version module
 """
 # pragma: no cover
 __version__ = '3.1.1.dev0'
@@ -1,263 +0,0 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 """
 API functions that can be used by external software
 """
 try:
    from collections import OrderedDict
 except ImportError:  # pragma: no-cover
    from ordereddict import OrderedDict  # pylint:disable=import-error
 import os
 import traceback
 import six
 from rebulk.introspector import introspect
 from .__version__ import __version__
 from .options import parse_options, load_config, merge_options
 from .rules import rebulk_builder
 class GuessitException(Exception):
    """
    Exception raised when guessit fails to perform a guess because of an internal error.
    """
    def __init__(self, string, options):
        super(GuessitException, self).__init__("An internal error has occured in guessit.\n"
                                               "===================== Guessit Exception Report =====================\n"
                                               "version=%s\n"
                                               "string=%s\n"
                                               "options=%s\n"
                                               "--------------------------------------------------------------------\n"
                                               "%s"
                                               "--------------------------------------------------------------------\n"
                                               "Please report at "
                                               "https://github.com/guessit-io/guessit/issues.\n"
                                               "====================================================================" %
                                               (__version__, str(string), str(options), traceback.format_exc()))
        self.string = string
        self.options = options
 def configure(options=None, rules_builder=rebulk_builder, force=False):
    """
    Load configuration files and initialize rebulk rules if required.
    :param options:
    :type options: dict
    :param rules_builder:
    :type rules_builder:
    :param force:
    :type force: bool
    :return:
    """
    default_api.configure(options, rules_builder=rules_builder, force=force)
 def guessit(string, options=None):
    """
    Retrieves all matches from string as a dict
    :param string: the filename or release name
    :type string: str
    :param options:
    :type options: str|dict
    :return:
    :rtype:
    """
    return default_api.guessit(string, options)
 def properties(options=None):
    """
    Retrieves all properties with possible values that can be guessed
    :param options:
    :type options: str|dict
    :return:
    :rtype:
    """
    return default_api.properties(options)
 def suggested_expected(titles, options=None):
    """
    Return a list of suggested titles to be used as `expected_title` based on the list of titles
    :param titles: the filename or release name
    :type titles: list|set|dict
    :param options:
    :type options: str|dict
    :return:
    :rtype: list of str
    """
    return default_api.suggested_expected(titles, options)
 class GuessItApi(object):
    """
    An api class that can be configured with custom Rebulk configuration.
    """
    def __init__(self):
        """Default constructor."""
        self.rebulk = None
        self.config = None
        self.load_config_options = None
        self.advanced_config = None
    @classmethod
    def _fix_encoding(cls, value):
        if isinstance(value, list):
            return [cls._fix_encoding(item) for item in value]
        if isinstance(value, dict):
            return {cls._fix_encoding(k): cls._fix_encoding(v) for k, v in value.items()}
        if six.PY2 and isinstance(value, six.text_type):
            return value.encode('utf-8')
        if six.PY3 and isinstance(value, six.binary_type):
            return value.decode('ascii')
        return value
    @classmethod
    def _has_same_properties(cls, dic1, dic2, values):
        for value in values:
            if dic1.get(value) != dic2.get(value):
                return False
        return True
    def configure(self, options=None, rules_builder=rebulk_builder, force=False, sanitize_options=True):
        """
        Load configuration files and initialize rebulk rules if required.
        :param options:
        :type options: str|dict
        :param rules_builder:
        :type rules_builder:
        :param force:
        :type force: bool
        :return:
        :rtype: dict
        """
        if sanitize_options:
            options = parse_options(options, True)
            options = self._fix_encoding(options)
        if self.config is None or self.load_config_options is None or force or \
                not self._has_same_properties(self.load_config_options,
                                              options,
                                              ['config', 'no_user_config', 'no_default_config']):
            config = load_config(options)
            config = self._fix_encoding(config)
            self.load_config_options = options
        else:
            config = self.config
        advanced_config = merge_options(config.get('advanced_config'), options.get('advanced_config'))
        should_build_rebulk = force or not self.rebulk or not self.advanced_config or \
                              self.advanced_config != advanced_config
        if should_build_rebulk:
            self.advanced_config = advanced_config
            self.rebulk = rules_builder(advanced_config)
        self.config = config
        return self.config
    def guessit(self, string, options=None):  # pylint: disable=too-many-branches
        """
        Retrieves all matches from string as a dict
        :param string: the filename or release name
        :type string: str|Path
        :param options:
        :type options: str|dict
        :return:
        :rtype:
        """
        try:
            from pathlib import Path
            if isinstance(string, Path):
                try:
                    # Handle path-like object
                    string = os.fspath(string)
                except AttributeError:
                    string = str(string)
        except ImportError:
            pass
        try:
            options = parse_options(options, True)
            options = self._fix_encoding(options)
            config = self.configure(options, sanitize_options=False)
            options = merge_options(config, options)
            result_decode = False
            result_encode = False
            if six.PY2:
                if isinstance(string, six.text_type):
                    string = string.encode("utf-8")
                    result_decode = True
                elif isinstance(string, six.binary_type):
                    string = six.binary_type(string)
            if six.PY3:
                if isinstance(string, six.binary_type):
                    string = string.decode('ascii')
                    result_encode = True
                elif isinstance(string, six.text_type):
                    string = six.text_type(string)
            matches = self.rebulk.matches(string, options)
            if result_decode:
                for match in matches:
                    if isinstance(match.value, six.binary_type):
                        match.value = match.value.decode("utf-8")
            if result_encode:
                for match in matches:
                    if isinstance(match.value, six.text_type):
                        match.value = match.value.encode("ascii")
            return matches.to_dict(options.get('advanced', False), options.get('single_value', False),
                                   options.get('enforce_list', False))
        except:
            raise GuessitException(string, options)
    def properties(self, options=None):
        """
        Grab properties and values that can be generated.
        :param options:
        :type options:
        :return:
        :rtype:
        """
        options = parse_options(options, True)
        options = self._fix_encoding(options)
        config = self.configure(options, sanitize_options=False)
        options = merge_options(config, options)
        unordered = introspect(self.rebulk, options).properties
        ordered = OrderedDict()
        for k in sorted(unordered.keys(), key=six.text_type):
            ordered[k] = list(sorted(unordered[k], key=six.text_type))
        if hasattr(self.rebulk, 'customize_properties'):
            ordered = self.rebulk.customize_properties(ordered)
        return ordered
    def suggested_expected(self, titles, options=None):
        """
        Return a list of suggested titles to be used as `expected_title` based on the list of titles
        :param titles: the filename or release name
        :type titles: list|set|dict
        :param options:
        :type options: str|dict
        :return:
        :rtype: list of str
        """
        suggested = []
        for title in titles:
            guess = self.guessit(title, options)
            if len(guess) != 2 or 'title' not in guess:
                suggested.append(title)
        return suggested
 default_api = GuessItApi()
@@ -1,27 +0,0 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 """
 Backports
 """
 # pragma: no-cover
 # pylint: disabled
 def cmp_to_key(mycmp):
    """functools.cmp_to_key backport"""
    class KeyClass(object):
        """Key class"""
        def __init__(self, obj, *args):  # pylint: disable=unused-argument
            self.obj = obj
        def __lt__(self, other):
            return mycmp(self.obj, other.obj) < 0
        def __gt__(self, other):
            return mycmp(self.obj, other.obj) > 0
        def __eq__(self, other):
            return mycmp(self.obj, other.obj) == 0
        def __le__(self, other):
            return mycmp(self.obj, other.obj) <= 0
        def __ge__(self, other):
            return mycmp(self.obj, other.obj) >= 0
        def __ne__(self, other):
            return mycmp(self.obj, other.obj) != 0
    return KeyClass
@@ -1,586 +0,0 @@
 {
  "expected_title": [
    "OSS 117",
    "This is Us"
  ],
  "allowed_countries": [
    "au",
    "gb",
    "us"
  ],
  "allowed_languages": [
    "ca",
    "cs",
    "de",
    "en",
    "es",
    "fr",
    "he",
    "hi",
    "hu",
    "it",
    "ja",
    "ko",
    "mul",
    "nl",
    "no",
    "pl",
    "pt",
    "ro",
    "ru",
    "sv",
    "te",
    "uk",
    "und"
  ],
  "advanced_config": {
    "common_words": [
      "ca",
      "cat",
      "de",
      "he",
      "it",
      "no",
      "por",
      "rum",
      "se",
      "st",
      "sub"
    ],
    "groups": {
      "starting": "([{",
      "ending": ")]}"
    },
    "audio_codec": {
      "audio_channels": {
        "1.0": [
          "1ch",
          "mono"
        ],
        "2.0": [
          "2ch",
          "stereo",
          "re:(2[\\W_]0(?:ch)?)(?=[^\\d]|$)"
        ],
        "5.1": [
          "5ch",
          "6ch",
          "re:(5[\\W_][01](?:ch)?)(?=[^\\d]|$)",
          "re:(6[\\W_]0(?:ch)?)(?=[^\\d]|$)"
        ],
        "7.1": [
          "7ch",
          "8ch",
          "re:(7[\\W_][01](?:ch)?)(?=[^\\d]|$)"
        ]
      }
    },
    "container": {
      "subtitles": [
        "srt",
        "idx",
        "sub",
        "ssa",
        "ass"
      ],
      "info": [
        "nfo"
      ],
      "videos": [
        "3g2",
        "3gp",
        "3gp2",
        "asf",
        "avi",
        "divx",
        "flv",
        "iso",
        "m4v",
        "mk2",
        "mk3d",
        "mka",
        "mkv",
        "mov",
        "mp4",
        "mp4a",
        "mpeg",
        "mpg",
        "ogg",
        "ogm",
        "ogv",
        "qt",
        "ra",
        "ram",
        "rm",
        "ts",
        "vob",
        "wav",
        "webm",
        "wma",
        "wmv"
      ],
      "torrent": [
        "torrent"
      ],
      "nzb": [
        "nzb"
      ]
    },
    "country": {
      "synonyms": {
        "ES": [
          "españa"
        ],
        "GB": [
          "UK"
        ],
        "BR": [
          "brazilian",
          "bra"
        ],
        "CA": [
          "québec",
          "quebec",
          "qc"
        ],
        "MX": [
          "Latinoamérica",
          "latin america"
        ]
      }
    },
    "episodes": {
      "season_max_range": 100,
      "episode_max_range": 100,
      "max_range_gap": 1,
      "season_markers": [
        "s"
      ],
      "season_ep_markers": [
        "x"
      ],
      "disc_markers": [
        "d"
      ],
      "episode_markers": [
        "xe",
        "ex",
        "ep",
        "e",
        "x"
      ],
      "range_separators": [
        "-",
        "~",
        "to",
        "a"
      ],
      "discrete_separators": [
        "+",
        "&",
        "and",
        "et"
      ],
      "season_words": [
        "season",
        "saison",
        "seizoen",
        "seasons",
        "saisons",
        "tem",
        "temp",
        "temporada",
        "temporadas",
        "stagione"
      ],
      "episode_words": [
        "episode",
        "episodes",
        "eps",
        "ep",
        "episodio",
        "episodios",
        "capitulo",
        "capitulos"
      ],
      "of_words": [
        "of",
        "sur"
      ],
      "all_words": [
        "All"
      ]
    },
    "language": {
      "synonyms": {
        "ell": [
          "gr",
          "greek"
        ],
        "spa": [
          "esp",
          "español",
          "espanol"
        ],
        "fra": [
          "français",
          "vf",
          "vff",
          "vfi",
          "vfq"
        ],
        "swe": [
          "se"
        ],
        "por_BR": [
          "po",
          "pb",
          "pob",
          "ptbr",
          "br",
          "brazilian"
        ],
        "deu_CH": [
          "swissgerman",
          "swiss german"
        ],
        "nld_BE": [
          "flemish"
        ],
        "cat": [
          "català",
          "castellano",
          "espanol castellano",
          "español castellano"
        ],
        "ces": [
          "cz"
        ],
        "ukr": [
          "ua"
        ],
        "zho": [
          "cn"
        ],
        "jpn": [
          "jp"
        ],
        "hrv": [
          "scr"
        ],
        "mul": [
          "multi",
          "dl"
        ]
      },
      "subtitle_affixes": [
        "sub",
        "subs",
        "esub",
        "esubs",
        "subbed",
        "custom subbed",
        "custom subs",
        "custom sub",
        "customsubbed",
        "customsubs",
        "customsub",
        "soft subtitles",
        "soft subs"
      ],
      "subtitle_prefixes": [
        "st",
        "vost",
        "subforced",
        "fansub",
        "hardsub",
        "legenda",
        "legendas",
        "legendado",
        "subtitulado",
        "soft",
        "subtitles"
      ],
      "subtitle_suffixes": [
        "subforced",
        "fansub",
        "hardsub"
      ],
      "language_affixes": [
        "dublado",
        "dubbed",
        "dub"
      ],
      "language_prefixes": [
        "true"
      ],
      "language_suffixes": [
        "audio"
      ],
      "weak_affixes": [
        "v",
        "audio",
        "true"
      ]
    },
    "part": {
      "prefixes": [
        "pt",
        "part"
      ]
    },
    "release_group": {
      "forbidden_names": [
        "bonus",
        "by",
        "for",
        "par",
        "pour",
        "rip"
      ],
      "ignored_seps": "[]{}()"
    },
    "screen_size": {
      "frame_rates": [
        "23.976",
        "24",
        "25",
        "29.970",
        "30",
        "48",
        "50",
        "60",
        "120"
      ],
      "min_ar": 1.333,
      "max_ar": 1.898,
      "interlaced": [
        "360",
        "480",
        "576",
        "900",
        "1080"
      ],
      "progressive": [
        "360",
        "480",
        "540",
        "576",
        "900",
        "1080",
        "368",
        "720",
        "1440",
        "2160",
        "4320"
      ]
    },
    "website": {
      "safe_tlds": [
        "com",
        "net",
        "org"
      ],
      "safe_subdomains": [
        "www"
      ],
      "safe_prefixes": [
        "co",
        "com",
        "net",
        "org"
      ],
      "prefixes": [
        "from"
      ]
    },
    "streaming_service": {
      "A&E": [
        "AE",
        "A&E"
      ],
      "ABC": "AMBC",
      "ABC Australia": "AUBC",
      "Al Jazeera English": "AJAZ",
      "AMC": "AMC",
      "Amazon Prime": [
        "AMZN",
        "Amazon",
        "re:Amazon-?Prime"
      ],
      "Adult Swim": [
        "AS",
        "re:Adult-?Swim"
      ],
      "America's Test Kitchen": "ATK",
      "Animal Planet": "ANPL",
      "AnimeLab": "ANLB",
      "AOL": "AOL",
      "ARD": "ARD",
      "BBC iPlayer": [
        "iP",
        "re:BBC-?iPlayer"
      ],
      "BravoTV": "BRAV",
      "Canal+": "CNLP",
      "Cartoon Network": "CN",
      "CBC": "CBC",
      "CBS": "CBS",
      "CNBC": "CNBC",
      "Comedy Central": [
        "CC",
        "re:Comedy-?Central"
      ],
      "Channel 4": "4OD",
      "CHRGD": "CHGD",
      "Cinemax": "CMAX",
      "Country Music Television": "CMT",
      "Comedians in Cars Getting Coffee": "CCGC",
      "Crunchy Roll": [
        "CR",
        "re:Crunchy-?Roll"
      ],
      "Crackle": "CRKL",
      "CSpan": "CSPN",
      "CTV": "CTV",
      "CuriosityStream": "CUR",
      "CWSeed": "CWS",
      "Daisuki": "DSKI",
      "DC Universe": "DCU",
      "Deadhouse Films": "DHF",
      "DramaFever": [
        "DF",
        "DramaFever"
      ],
      "Digiturk Diledigin Yerde": "DDY",
      "Discovery": [
        "DISC",
        "Discovery"
      ],
      "Disney": [
        "DSNY",
        "Disney"
      ],
      "DIY Network": "DIY",
      "Doc Club": "DOCC",
      "DPlay": "DPLY",
      "E!": "ETV",
      "ePix": "EPIX",
      "El Trece": "ETTV",
      "ESPN": "ESPN",
      "Esquire": "ESQ",
      "Family": "FAM",
      "Family Jr": "FJR",
      "Food Network": "FOOD",
      "Fox": "FOX",
      "Freeform": "FREE",
      "FYI Network": "FYI",
      "Global": "GLBL",
      "GloboSat Play": "GLOB",
      "Hallmark": "HLMK",
      "HBO Go": [
        "HBO",
        "re:HBO-?Go"
      ],
      "HGTV": "HGTV",
      "History": [
        "HIST",
        "History"
      ],
      "Hulu": "HULU",
      "Investigation Discovery": "ID",
      "IFC": "IFC",
      "iTunes": "iTunes",
      "ITV": "ITV",
      "Knowledge Network": "KNOW",
      "Lifetime": "LIFE",
      "Motor Trend OnDemand": "MTOD",
      "MBC": [
        "MBC",
        "MBCVOD"
      ],
      "MSNBC": "MNBC",
      "MTV": "MTV",
      "National Geographic": [
        "NATG",
        "re:National-?Geographic"
      ],
      "NBA TV": [
        "NBA",
        "re:NBA-?TV"
      ],
      "NBC": "NBC",
      "Netflix": [
        "NF",
        "Netflix"
      ],
      "NFL": "NFL",
      "NFL Now": "NFLN",
      "NHL GameCenter": "GC",
      "Nickelodeon": [
        "NICK",
        "Nickelodeon"
      ],
      "Norsk Rikskringkasting": "NRK",
      "OnDemandKorea": [
        "ODK",
        "OnDemandKorea"
      ],
      "PBS": "PBS",
      "PBS Kids": "PBSK",
      "Playstation Network": "PSN",
      "Pluzz": "PLUZ",
      "RTE One": "RTE",
      "SBS (AU)": "SBS",
      "SeeSo": [
        "SESO",
        "SeeSo"
      ],
      "Shomi": "SHMI",
      "Spike": "SPIK",
      "Spike TV": [
        "SPKE",
        "re:Spike-?TV"
      ],
      "Sportsnet": "SNET",
      "Sprout": "SPRT",
      "Stan": "STAN",
      "Starz": "STZ",
      "Sveriges Television": "SVT",
      "SwearNet": "SWER",
      "Syfy": "SYFY",
      "TBS": "TBS",
      "TFou": "TFOU",
      "The CW": [
        "CW",
        "re:The-?CW"
      ],
      "TLC": "TLC",
      "TubiTV": "TUBI",
      "TV3 Ireland": "TV3",
      "TV4 Sweeden": "TV4",
      "TVING": "TVING",
      "TV Land": [
        "TVL",
        "re:TV-?Land"
      ],
      "UFC": "UFC",
      "UKTV": "UKTV",
      "Univision": "UNIV",
      "USA Network": "USAN",
      "Velocity": "VLCT",
      "VH1": "VH1",
      "Viceland": "VICE",
      "Viki": "VIKI",
      "Vimeo": "VMEO",
      "VRV": "VRV",
      "W Network": "WNET",
      "WatchMe": "WME",
      "WWE Network": "WWEN",
      "Xbox Video": "XBOX",
      "Yahoo": "YHOO",
      "YouTube Red": "RED",
      "ZDF": "ZDF"
    }
  }
 }
@@ -1,22 +0,0 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 """
 JSON Utils
 """
 import json
 from six import text_type
 from rebulk.match import Match
 class GuessitEncoder(json.JSONEncoder):
    """
    JSON Encoder for guessit response
    """
    def default(self, o):  # pylint:disable=method-hidden
        if isinstance(o, Match):
            return o.advanced
        if hasattr(o, 'name'):  # Babelfish languages/countries long name
            return text_type(o.name)
        # pragma: no cover
        return text_type(o)
@@ -1,34 +0,0 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 """
 Monkeypatch initialisation functions
 """
 try:
    from collections import OrderedDict
 except ImportError:  # pragma: no-cover
    from ordereddict import OrderedDict  # pylint:disable=import-error
 from rebulk.match import Match
 def monkeypatch_rebulk():
    """Monkeypatch rebulk classes"""
    @property
    def match_advanced(self):
        """
        Build advanced dict from match
        :param self:
        :return:
        """
        ret = OrderedDict()
        ret['value'] = self.value
        if self.raw:
            ret['raw'] = self.raw
        ret['start'] = self.start
        ret['end'] = self.end
        return ret
    Match.advanced = match_advanced
@@ -1,295 +0,0 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 """
 Options
 """
 import copy
 import json
 import os
 import pkgutil
 import shlex
 from argparse import ArgumentParser
 import six
 def build_argument_parser():
    """
    Builds the argument parser
    :return: the argument parser
    :rtype: ArgumentParser
    """
    opts = ArgumentParser()
    opts.add_argument(dest='filename', help='Filename or release name to guess', nargs='*')
    naming_opts = opts.add_argument_group("Naming")
    naming_opts.add_argument('-t', '--type', dest='type', default=None,
                             help='The suggested file type: movie, episode. If undefined, type will be guessed.')
    naming_opts.add_argument('-n', '--name-only', dest='name_only', action='store_true', default=None,
                             help='Parse files as name only, considering "/" and "\\" like other separators.')
    naming_opts.add_argument('-Y', '--date-year-first', action='store_true', dest='date_year_first', default=None,
                             help='If short date is found, consider the first digits as the year.')
    naming_opts.add_argument('-D', '--date-day-first', action='store_true', dest='date_day_first', default=None,
                             help='If short date is found, consider the second digits as the day.')
    naming_opts.add_argument('-L', '--allowed-languages', action='append', dest='allowed_languages', default=None,
                             help='Allowed language (can be used multiple times)')
    naming_opts.add_argument('-C', '--allowed-countries', action='append', dest='allowed_countries', default=None,
                             help='Allowed country (can be used multiple times)')
    naming_opts.add_argument('-E', '--episode-prefer-number', action='store_true', dest='episode_prefer_number',
                             default=None,
                             help='Guess "serie.213.avi" as the episode 213. Without this option, '
                                  'it will be guessed as season 2, episode 13')
    naming_opts.add_argument('-T', '--expected-title', action='append', dest='expected_title', default=None,
                             help='Expected title to parse (can be used multiple times)')
    naming_opts.add_argument('-G', '--expected-group', action='append', dest='expected_group', default=None,
                             help='Expected release group (can be used multiple times)')
    naming_opts.add_argument('--includes', action='append', dest='includes', default=None,
                             help='List of properties to be detected')
    naming_opts.add_argument('--excludes', action='append', dest='excludes', default=None,
                             help='List of properties to be ignored')
    input_opts = opts.add_argument_group("Input")
    input_opts.add_argument('-f', '--input-file', dest='input_file', default=None,
                            help='Read filenames from an input text file. File should use UTF-8 charset.')
    output_opts = opts.add_argument_group("Output")
    output_opts.add_argument('-v', '--verbose', action='store_true', dest='verbose', default=None,
                             help='Display debug output')
    output_opts.add_argument('-P', '--show-property', dest='show_property', default=None,
                             help='Display the value of a single property (title, series, video_codec, year, ...)')
    output_opts.add_argument('-a', '--advanced', dest='advanced', action='store_true', default=None,
                             help='Display advanced information for filename guesses, as json output')
    output_opts.add_argument('-s', '--single-value', dest='single_value', action='store_true', default=None,
                             help='Keep only first value found for each property')
    output_opts.add_argument('-l', '--enforce-list', dest='enforce_list', action='store_true', default=None,
                             help='Wrap each found value in a list even when property has a single value')
    output_opts.add_argument('-j', '--json', dest='json', action='store_true', default=None,
                             help='Display information for filename guesses as json output')
    output_opts.add_argument('-y', '--yaml', dest='yaml', action='store_true', default=None,
                             help='Display information for filename guesses as yaml output')
    conf_opts = opts.add_argument_group("Configuration")
    conf_opts.add_argument('-c', '--config', dest='config', action='append', default=None,
                           help='Filepath to configuration file. Configuration file contains the same '
                                'options as those from command line options, but option names have "-" characters '
                                'replaced with "_". This configuration will be merged with default and user '
                                'configuration files.')
    conf_opts.add_argument('--no-user-config', dest='no_user_config', action='store_true',
                           default=None,
                           help='Disable user configuration. If not defined, guessit tries to read configuration files '
                                'at ~/.guessit/options.(json|yml|yaml) and ~/.config/guessit/options.(json|yml|yaml)')
    conf_opts.add_argument('--no-default-config', dest='no_default_config', action='store_true',
                           default=None,
                           help='Disable default configuration. This should be done only if you are providing a full '
                                'configuration through user configuration or --config option. If no "advanced_config" '
                                'is provided by another configuration file, it will still be loaded from default '
                                'configuration.')
    information_opts = opts.add_argument_group("Information")
    information_opts.add_argument('-p', '--properties', dest='properties', action='store_true', default=None,
                                  help='Display properties that can be guessed.')
    information_opts.add_argument('-V', '--values', dest='values', action='store_true', default=None,
                                  help='Display property values that can be guessed.')
    information_opts.add_argument('--version', dest='version', action='store_true', default=None,
                                  help='Display the guessit version.')
    return opts
 def parse_options(options=None, api=False):
    """
    Parse given option string
    :param options:
    :type options:
    :param api
    :type api: boolean
    :return:
    :rtype:
    """
    if isinstance(options, six.string_types):
        args = shlex.split(options)
        options = vars(argument_parser.parse_args(args))
    elif options is None:
        if api:
            options = {}
        else:
            options = vars(argument_parser.parse_args())
    elif not isinstance(options, dict):
        options = vars(argument_parser.parse_args(options))
    return options
 argument_parser = build_argument_parser()
 class ConfigurationException(Exception):
    """
    Exception related to configuration file.
    """
    pass  # pylint:disable=unnecessary-pass
 def load_config(options):
    """
    Load options from configuration files, if defined and present.
    :param options:
    :type options:
    :return:
    :rtype:
    """
    configurations = []
    if not options.get('no_default_config'):
        default_options_data = pkgutil.get_data('guessit', 'config/options.json').decode('utf-8')
        default_options = json.loads(default_options_data)
        configurations.append(default_options)
    config_files = []
    if not options.get('no_user_config'):
        home_directory = os.path.expanduser("~")
        cwd = os.getcwd()
        yaml_supported = False
        try:
            import yaml  # pylint:disable=unused-variable,unused-import
            yaml_supported = True
        except ImportError:
            pass
        config_file_locations = get_options_file_locations(home_directory, cwd, yaml_supported)
        config_files = [f for f in config_file_locations if os.path.exists(f)]
    custom_config_files = options.get('config')
    if custom_config_files:
        config_files = config_files + custom_config_files
    for config_file in config_files:
        config_file_options = load_config_file(config_file)
        if config_file_options:
            configurations.append(config_file_options)
    config = {}
    if configurations:
        config = merge_options(*configurations)
    if 'advanced_config' not in config:
        # Guessit doesn't work without advanced_config, so we use default if no configuration files provides it.
        default_options_data = pkgutil.get_data('guessit', 'config/options.json').decode('utf-8')
        default_options = json.loads(default_options_data)
        config['advanced_config'] = default_options['advanced_config']
    return config
 def merge_options(*options):
    """
    Merge options into a single options dict.
    :param options:
    :type options:
    :return:
    :rtype:
    """
    merged = {}
    if options:
        if options[0]:
            merged.update(copy.deepcopy(options[0]))
        for options in options[1:]:
            if options:
                pristine = options.get('pristine')
                if pristine is True:
                    merged = {}
                elif pristine:
                    for to_reset in pristine:
                        if to_reset in merged:
                            del merged[to_reset]
                for (option, value) in options.items():
                    merge_option_value(option, value, merged)
    return merged
 def merge_option_value(option, value, merged):
    """
    Merge option value
    :param option:
    :param value:
    :param merged:
    :return:
    """
    if value is not None and option != 'pristine':
        if option in merged.keys() and isinstance(merged[option], list):
            for val in value:
                if val not in merged[option]:
                    merged[option].append(val)
        elif option in merged.keys() and isinstance(merged[option], dict):
            merged[option] = merge_options(merged[option], value)
        elif isinstance(value, list):
            merged[option] = list(value)
        else:
            merged[option] = value
 def load_config_file(filepath):
    """
    Load a configuration as an options dict.
    Format of the file is given with filepath extension.
    :param filepath:
    :type filepath:
    :return:
    :rtype:
    """
    if filepath.endswith('.json'):
        with open(filepath) as config_file_data:
            return json.load(config_file_data)
    if filepath.endswith('.yaml') or filepath.endswith('.yml'):
        try:
            import yaml
            with open(filepath) as config_file_data:
                return yaml.load(config_file_data)
        except ImportError:  # pragma: no cover
            raise ConfigurationException('Configuration file extension is not supported. '
                                         'PyYAML should be installed to support "%s" file' % (
                                             filepath,))
    try:
        # Try to load input as JSON
        return json.loads(filepath)
    except:  # pylint: disable=bare-except
        pass
    raise ConfigurationException('Configuration file extension is not supported for "%s" file.' % (filepath,))
 def get_options_file_locations(homedir, cwd, yaml_supported=False):
    """
    Get all possible locations for options file.
    :param homedir: user home directory
    :type homedir: basestring
    :param cwd: current working directory
    :type homedir: basestring
    :return:
    :rtype: list
    """
    locations = []
    configdirs = [(os.path.join(homedir, '.guessit'), 'options'),
                  (os.path.join(homedir, '.config', 'guessit'), 'options'),
                  (cwd, 'guessit.options')]
    configexts = ['json']
    if yaml_supported:
        configexts.append('yaml')
        configexts.append('yml')
    for configdir in configdirs:
        for configext in configexts:
            locations.append(os.path.join(configdir[0], configdir[1] + '.' + configext))
    return locations
@@ -1,35 +0,0 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 """
 Utils for re module
 """
 from rebulk.remodule import re
 def build_or_pattern(patterns, name=None, escape=False):
    """
    Build a or pattern string from a list of possible patterns
    :param patterns:
    :type patterns:
    :param name:
    :type name:
    :param escape:
    :type escape:
    :return:
    :rtype:
    """
    or_pattern = []
    for pattern in patterns:
        if not or_pattern:
            or_pattern.append('(?')
            if name:
                or_pattern.append('P<' + name + '>')
            else:
                or_pattern.append(':')
        else:
            or_pattern.append('|')
        or_pattern.append('(?:%s)' % re.escape(pattern) if escape else pattern)
    or_pattern.append(')')
    return ''.join(or_pattern)
@@ -1,99 +0,0 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 """
 Rebulk object default builder
 """
 from rebulk import Rebulk
 from .markers.path import path
 from .markers.groups import groups
 from .properties.episodes import episodes
 from .properties.container import container
 from .properties.source import source
 from .properties.video_codec import video_codec
 from .properties.audio_codec import audio_codec
 from .properties.screen_size import screen_size
 from .properties.website import website
 from .properties.date import date
 from .properties.title import title
 from .properties.episode_title import episode_title
 from .properties.language import language
 from .properties.country import country
 from .properties.release_group import release_group
 from .properties.streaming_service import streaming_service
 from .properties.other import other
 from .properties.size import size
 from .properties.bit_rate import bit_rate
 from .properties.edition import edition
 from .properties.cds import cds
 from .properties.bonus import bonus
 from .properties.film import film
 from .properties.part import part
 from .properties.crc import crc
 from .properties.mimetype import mimetype
 from .properties.type import type_
 from .processors import processors
 def rebulk_builder(config):
    """
    Default builder for main Rebulk object used by api.
    :return: Main Rebulk object
    :rtype: Rebulk
    """
    def _config(name):
        return config.get(name, {})
    rebulk = Rebulk()
    common_words = frozenset(_config('common_words'))
    rebulk.rebulk(path(_config('path')))
    rebulk.rebulk(groups(_config('groups')))
    rebulk.rebulk(episodes(_config('episodes')))
    rebulk.rebulk(container(_config('container')))
    rebulk.rebulk(source(_config('source')))
    rebulk.rebulk(video_codec(_config('video_codec')))
    rebulk.rebulk(audio_codec(_config('audio_codec')))
    rebulk.rebulk(screen_size(_config('screen_size')))
    rebulk.rebulk(website(_config('website')))
    rebulk.rebulk(date(_config('date')))
    rebulk.rebulk(title(_config('title')))
    rebulk.rebulk(episode_title(_config('episode_title')))
    rebulk.rebulk(language(_config('language'), common_words))
    rebulk.rebulk(country(_config('country'), common_words))
    rebulk.rebulk(release_group(_config('release_group')))
    rebulk.rebulk(streaming_service(_config('streaming_service')))
    rebulk.rebulk(other(_config('other')))
    rebulk.rebulk(size(_config('size')))
    rebulk.rebulk(bit_rate(_config('bit_rate')))
    rebulk.rebulk(edition(_config('edition')))
    rebulk.rebulk(cds(_config('cds')))
    rebulk.rebulk(bonus(_config('bonus')))
    rebulk.rebulk(film(_config('film')))
    rebulk.rebulk(part(_config('part')))
    rebulk.rebulk(crc(_config('crc')))
    rebulk.rebulk(processors(_config('processors')))
    rebulk.rebulk(mimetype(_config('mimetype')))
    rebulk.rebulk(type_(_config('type')))
    def customize_properties(properties):
        """
        Customize default rebulk properties
        """
        count = properties['count']
        del properties['count']
        properties['season_count'] = count
        properties['episode_count'] = count
        return properties
    rebulk.customize_properties = customize_properties
    return rebulk
@@ -1,15 +0,0 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 """
 Common module
 """
 import re
 seps = r' [](){}+*|=-_~#/\\.,;:'  # list of tags/words separators
 seps_no_groups = seps.replace('[](){}', '')
 seps_no_fs = seps.replace('/', '').replace('\\', '')
 title_seps = r'-+/\|'  # separators for title
 dash = (r'-', r'['+re.escape(seps_no_fs)+']')  # abbreviation used by many rebulk objects.
 alt_dash = (r'@', r'['+re.escape(seps_no_fs)+']')  # abbreviation used by many rebulk objects.
@@ -1,75 +0,0 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 """
 Comparators
 """
 try:
    from functools import cmp_to_key
 except ImportError:
    from ...backports import cmp_to_key
 def marker_comparator_predicate(match):
    """
    Match predicate used in comparator
    """
    return (
        not match.private
        and match.name not in ('proper_count', 'title')
        and not (match.name == 'container' and 'extension' in match.tags)
        and not (match.name == 'other' and match.value == 'Rip')
    )
 def marker_weight(matches, marker, predicate):
    """
    Compute the comparator weight of a marker
    :param matches:
    :param marker:
    :param predicate:
    :return:
    """
    return len(set(match.name for match in matches.range(*marker.span, predicate=predicate)))
 def marker_comparator(matches, markers, predicate):
    """
    Builds a comparator that returns markers sorted from the most valuable to the less.
    Take the parts where matches count is higher, then when length is higher, then when position is at left.
    :param matches:
    :type matches:
    :param markers:
    :param predicate:
    :return:
    :rtype:
    """
    def comparator(marker1, marker2):
        """
        The actual comparator function.
        """
        matches_count = marker_weight(matches, marker2, predicate) - marker_weight(matches, marker1, predicate)
        if matches_count:
            return matches_count
        # give preference to rightmost path
        return markers.index(marker2) - markers.index(marker1)
    return comparator
 def marker_sorted(markers, matches, predicate=marker_comparator_predicate):
    """
    Sort markers from matches, from the most valuable to the less.
    :param markers:
    :type markers:
    :param matches:
    :type matches:
    :param predicate:
    :return:
    :rtype:
    """
    return sorted(markers, key=cmp_to_key(marker_comparator(matches, markers, predicate=predicate)))
@@ -1,125 +0,0 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 """
 Date
 """
 from dateutil import parser
 from rebulk.remodule import re
 _dsep = r'[-/ \.]'
 _dsep_bis = r'[-/ \.x]'
 date_regexps = [
    re.compile(r'%s((\d{8}))%s' % (_dsep, _dsep), re.IGNORECASE),
    re.compile(r'%s((\d{6}))%s' % (_dsep, _dsep), re.IGNORECASE),
    re.compile(r'(?:^|[^\d])((\d{2})%s(\d{1,2})%s(\d{1,2}))(?:$|[^\d])' % (_dsep, _dsep), re.IGNORECASE),
    re.compile(r'(?:^|[^\d])((\d{1,2})%s(\d{1,2})%s(\d{2}))(?:$|[^\d])' % (_dsep, _dsep), re.IGNORECASE),
    re.compile(r'(?:^|[^\d])((\d{4})%s(\d{1,2})%s(\d{1,2}))(?:$|[^\d])' % (_dsep_bis, _dsep), re.IGNORECASE),
    re.compile(r'(?:^|[^\d])((\d{1,2})%s(\d{1,2})%s(\d{4}))(?:$|[^\d])' % (_dsep, _dsep_bis), re.IGNORECASE),
    re.compile(r'(?:^|[^\d])((\d{1,2}(?:st|nd|rd|th)?%s(?:[a-z]{3,10})%s\d{4}))(?:$|[^\d])' % (_dsep, _dsep),
               re.IGNORECASE)]
 def valid_year(year):
    """Check if number is a valid year"""
    return 1920 <= year < 2030
 def _is_int(string):
    """
    Check if the input string is an integer
    :param string:
    :type string:
    :return:
    :rtype:
    """
    try:
        int(string)
        return True
    except ValueError:
        return False
 def _guess_day_first_parameter(groups):  # pylint:disable=inconsistent-return-statements
    """
    If day_first is not defined, use some heuristic to fix it.
    It helps to solve issues with python dateutils 2.5.3 parser changes.
    :param groups: match groups found for the date
    :type groups: list of match objects
    :return: day_first option guessed value
    :rtype: bool
    """
    # If match starts with a long year, then day_first is force to false.
    if _is_int(groups[0]) and valid_year(int(groups[0][:4])):
        return False
    # If match ends with a long year, the day_first is forced to true.
    if _is_int(groups[-1]) and valid_year(int(groups[-1][-4:])):
        return True
    # If match starts with a short year, then day_first is force to false.
    if _is_int(groups[0]) and int(groups[0][:2]) > 31:
        return False
    # If match ends with a short year, then day_first is force to true.
    if _is_int(groups[-1]) and int(groups[-1][-2:]) > 31:
        return True
 def search_date(string, year_first=None, day_first=None):  # pylint:disable=inconsistent-return-statements
    """Looks for date patterns, and if found return the date and group span.
    Assumes there are sentinels at the beginning and end of the string that
    always allow matching a non-digit delimiting the date.
    Year can be defined on two digit only. It will return the nearest possible
    date from today.
    >>> search_date(' This happened on 2002-04-22. ')
    (18, 28, datetime.date(2002, 4, 22))
    >>> search_date(' And this on 17-06-1998. ')
    (13, 23, datetime.date(1998, 6, 17))
    >>> search_date(' no date in here ')
    """
    for date_re in date_regexps:
        search_match = date_re.search(string)
        if not search_match:
            continue
        start, end = search_match.start(1), search_match.end(1)
        groups = search_match.groups()[1:]
        match = '-'.join(groups)
        if match is None:
            continue
        if year_first and day_first is None:
            day_first = False
        if day_first is None:
            day_first = _guess_day_first_parameter(groups)
        # If day_first/year_first is undefined, parse is made using both possible values.
        yearfirst_opts = [False, True]
        if year_first is not None:
            yearfirst_opts = [year_first]
        dayfirst_opts = [True, False]
        if day_first is not None:
            dayfirst_opts = [day_first]
        kwargs_list = ({'dayfirst': d, 'yearfirst': y}
                       for d in dayfirst_opts for y in yearfirst_opts)
        for kwargs in kwargs_list:
            try:
                date = parser.parse(match, **kwargs)
            except (ValueError, TypeError):  # pragma: no cover
                # see https://bugs.launchpad.net/dateutil/+bug/1247643
                date = None
            # check date plausibility
            if date and valid_year(date.year):  # pylint:disable=no-member
                return start, end, date.date()  # pylint:disable=no-member
@@ -1,53 +0,0 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 """
 Expected property factory
 """
 import re
 from rebulk import Rebulk
 from rebulk.utils import find_all
 from . import dash, seps
 def build_expected_function(context_key):
    """
    Creates a expected property function
    :param context_key:
    :type context_key:
    :param cleanup:
    :type cleanup:
    :return:
    :rtype:
    """
    def expected(input_string, context):
        """
        Expected property functional pattern.
        :param input_string:
        :type input_string:
        :param context:
        :type context:
        :return:
        :rtype:
        """
        ret = []
        for search in context.get(context_key):
            if search.startswith('re:'):
                search = search[3:]
                search = search.replace(' ', '-')
                matches = Rebulk().regex(search, abbreviations=[dash], flags=re.IGNORECASE) \
                    .matches(input_string, context)
                for match in matches:
                    ret.append(match.span)
            else:
                value = search
                for sep in seps:
                    input_string = input_string.replace(sep, ' ')
                    search = search.replace(sep, ' ')
                for start in find_all(input_string, search, ignore_case=True):
                    ret.append({'start': start, 'end': start + len(search), 'value': value})
        return ret
    return expected
@@ -1,136 +0,0 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 """
 Formatters
 """
 from rebulk.formatters import formatters
 from rebulk.remodule import re
 from . import seps
 _excluded_clean_chars = ',:;-/\\'
 clean_chars = ""
 for sep in seps:
    if sep not in _excluded_clean_chars:
        clean_chars += sep
 def _potential_before(i, input_string):
    """
    Check if the character at position i can be a potential single char separator considering what's before it.
    :param i:
    :type i: int
    :param input_string:
    :type input_string: str
    :return:
    :rtype: bool
    """
    return i - 1 >= 0 and input_string[i] in seps and input_string[i - 2] in seps and input_string[i - 1] not in seps
 def _potential_after(i, input_string):
    """
    Check if the character at position i can be a potential single char separator considering what's after it.
    :param i:
    :type i: int
    :param input_string:
    :type input_string: str
    :return:
    :rtype: bool
    """
    return i + 2 >= len(input_string) or \
           input_string[i + 2] == input_string[i] and input_string[i + 1] not in seps
 def cleanup(input_string):
    """
    Removes and strip separators from input_string (but keep ',;' characters)
    It also keep separators for single characters (Mavels Agents of S.H.I.E.L.D.)
    :param input_string:
    :type input_string: str
    :return:
    :rtype:
    """
    clean_string = input_string
    for char in clean_chars:
        clean_string = clean_string.replace(char, ' ')
    # Restore input separator if they separate single characters.
    # Useful for Mavels Agents of S.H.I.E.L.D.
    # https://github.com/guessit-io/guessit/issues/278
    indices = [i for i, letter in enumerate(clean_string) if letter in seps]
    dots = set()
    if indices:
        clean_list = list(clean_string)
        potential_indices = []
        for i in indices:
            if _potential_before(i, input_string) and _potential_after(i, input_string):
                potential_indices.append(i)
        replace_indices = []
        for potential_index in potential_indices:
            if potential_index - 2 in potential_indices or potential_index + 2 in potential_indices:
                replace_indices.append(potential_index)
        if replace_indices:
            for replace_index in replace_indices:
                dots.add(input_string[replace_index])
                clean_list[replace_index] = input_string[replace_index]
            clean_string = ''.join(clean_list)
    clean_string = strip(clean_string, ''.join([c for c in seps if c not in dots]))
    clean_string = re.sub(' +', ' ', clean_string)
    return clean_string
 def strip(input_string, chars=seps):
    """
    Strip separators from input_string
    :param input_string:
    :param chars:
    :type input_string:
    :return:
    :rtype:
    """
    return input_string.strip(chars)
 def raw_cleanup(raw):
    """
    Cleanup a raw value to perform raw comparison
    :param raw:
    :type raw:
    :return:
    :rtype:
    """
    return formatters(cleanup, strip)(raw.lower())
 def reorder_title(title, articles=('the',), separators=(',', ', ')):
    """
    Reorder the title
    :param title:
    :type title:
    :param articles:
    :type articles:
    :param separators:
    :type separators:
    :return:
    :rtype:
    """
    ltitle = title.lower()
    for article in articles:
        for separator in separators:
            suffix = separator + article
            if ltitle[-len(suffix):] == suffix:
                return title[-len(suffix) + len(separator):] + ' ' + title[:-len(suffix)]
    return title
@@ -1,165 +0,0 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 """
 parse numeral from various formats
 """
 from rebulk.remodule import re
 digital_numeral = r'\d{1,4}'
 roman_numeral = r'(?=[MCDLXVI]+)M{0,4}(?:CM|CD|D?C{0,3})(?:XC|XL|L?X{0,3})(?:IX|IV|V?I{0,3})'
 english_word_numeral_list = [
    'zero', 'one', 'two', 'three', 'four', 'five', 'six', 'seven', 'eight', 'nine', 'ten',
    'eleven', 'twelve', 'thirteen', 'fourteen', 'fifteen', 'sixteen', 'seventeen', 'eighteen', 'nineteen', 'twenty'
 ]
 french_word_numeral_list = [
    'zéro', 'un', 'deux', 'trois', 'quatre', 'cinq', 'six', 'sept', 'huit', 'neuf', 'dix',
    'onze', 'douze', 'treize', 'quatorze', 'quinze', 'seize', 'dix-sept', 'dix-huit', 'dix-neuf', 'vingt'
 ]
 french_alt_word_numeral_list = [
    'zero', 'une', 'deux', 'trois', 'quatre', 'cinq', 'six', 'sept', 'huit', 'neuf', 'dix',
    'onze', 'douze', 'treize', 'quatorze', 'quinze', 'seize', 'dixsept', 'dixhuit', 'dixneuf', 'vingt'
 ]
 def __build_word_numeral(*args):
    """
    Build word numeral regexp from list.
    :param args:
    :type args:
    :param kwargs:
    :type kwargs:
    :return:
    :rtype:
    """
    re_ = None
    for word_list in args:
        for word in word_list:
            if not re_:
                re_ = r'(?:(?=\w+)'
            else:
                re_ += '|'
            re_ += word
    re_ += ')'
    return re_
 word_numeral = __build_word_numeral(english_word_numeral_list, french_word_numeral_list, french_alt_word_numeral_list)
 numeral = '(?:' + digital_numeral + '|' + roman_numeral + '|' + word_numeral + ')'
 __romanNumeralMap = (
    ('M', 1000),
    ('CM', 900),
    ('D', 500),
    ('CD', 400),
    ('C', 100),
    ('XC', 90),
    ('L', 50),
    ('XL', 40),
    ('X', 10),
    ('IX', 9),
    ('V', 5),
    ('IV', 4),
    ('I', 1)
 )
 __romanNumeralPattern = re.compile('^' + roman_numeral + '$')
 def __parse_roman(value):
    """
    convert Roman numeral to integer
    :param value: Value to parse
    :type value: string
    :return:
    :rtype:
    """
    if not __romanNumeralPattern.search(value):
        raise ValueError('Invalid Roman numeral: %s' % value)
    result = 0
    index = 0
    for num, integer in __romanNumeralMap:
        while value[index:index + len(num)] == num:
            result += integer
            index += len(num)
    return result
 def __parse_word(value):
    """
    Convert Word numeral to integer
    :param value: Value to parse
    :type value: string
    :return:
    :rtype:
    """
    for word_list in [english_word_numeral_list, french_word_numeral_list, french_alt_word_numeral_list]:
        try:
            return word_list.index(value.lower())
        except ValueError:
            pass
    raise ValueError  # pragma: no cover
 _clean_re = re.compile(r'[^\d]*(\d+)[^\d]*')
 def parse_numeral(value, int_enabled=True, roman_enabled=True, word_enabled=True, clean=True):
    """
    Parse a numeric value into integer.
    :param value: Value to parse. Can be an integer, roman numeral or word.
    :type value: string
    :param int_enabled:
    :type int_enabled:
    :param roman_enabled:
    :type roman_enabled:
    :param word_enabled:
    :type word_enabled:
    :param clean:
    :type clean:
    :return: Numeric value, or None if value can't be parsed
    :rtype: int
    """
    # pylint: disable=too-many-branches
    if int_enabled:
        try:
            if clean:
                match = _clean_re.match(value)
                if match:
                    clean_value = match.group(1)
                    return int(clean_value)
            return int(value)
        except ValueError:
            pass
    if roman_enabled:
        try:
            if clean:
                for word in value.split():
                    try:
                        return __parse_roman(word.upper())
                    except ValueError:
                        pass
            return __parse_roman(value)
        except ValueError:
            pass
    if word_enabled:
        try:
            if clean:
                for word in value.split():
                    try:
                        return __parse_word(word)
                    except ValueError:  # pragma: no cover
                        pass
            return __parse_word(value)  # pragma: no cover
        except ValueError:  # pragma: no cover
            pass
    raise ValueError('Invalid numeral: ' + value)   # pragma: no cover
@@ -1,27 +0,0 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 """
 Pattern utility functions
 """
 def is_disabled(context, name):
    """Whether a specific pattern is disabled.
    The context object might define an inclusion list (includes) or an exclusion list (excludes)
    A pattern is considered disabled if it's found in the exclusion list or
    it's not found in the inclusion list and the inclusion list is not empty or not defined.
    :param context:
    :param name:
    :return:
    """
    if not context:
        return False
    excludes = context.get('excludes')
    if excludes and name in excludes:
        return True
    includes = context.get('includes')
    return includes and name not in includes
@@ -1,106 +0,0 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 """
 Quantities: Size
 """
 import re
 from abc import abstractmethod
 import six
 from ..common import seps
 class Quantity(object):
    """
    Represent a quantity object with magnitude and units.
    """
    parser_re = re.compile(r'(?P<magnitude>\d+(?:[.]\d+)?)(?P<units>[^\d]+)?')
    def __init__(self, magnitude, units):
        self.magnitude = magnitude
        self.units = units
    @classmethod
    @abstractmethod
    def parse_units(cls, value):
        """
        Parse a string to a proper unit notation.
        """
        raise NotImplementedError
    @classmethod
    def fromstring(cls, string):
        """
        Parse the string into a quantity object.
        :param string:
        :return:
        """
        values = cls.parser_re.match(string).groupdict()
        try:
            magnitude = int(values['magnitude'])
        except ValueError:
            magnitude = float(values['magnitude'])
        units = cls.parse_units(values['units'])
        return cls(magnitude, units)
    def __hash__(self):
        return hash(str(self))
    def __eq__(self, other):
        if isinstance(other, six.string_types):
            return str(self) == other
        if not isinstance(other, self.__class__):
            return NotImplemented
        return self.magnitude == other.magnitude and self.units == other.units
    def __ne__(self, other):
        return not self == other
    def __repr__(self):
        return '<{0} [{1}]>'.format(self.__class__.__name__, self)
    def __str__(self):
        return '{0}{1}'.format(self.magnitude, self.units)
 class Size(Quantity):
    """
    Represent size.
    e.g.: 1.1GB, 300MB
    """
    @classmethod
    def parse_units(cls, value):
        return value.strip(seps).upper()
 class BitRate(Quantity):
    """
    Represent bit rate.
    e.g.: 320Kbps, 1.5Mbps
    """
    @classmethod
    def parse_units(cls, value):
        value = value.strip(seps).capitalize()
        for token in ('bits', 'bit'):
            value = value.replace(token, 'bps')
        return value
 class FrameRate(Quantity):
    """
    Represent frame rate.
    e.g.: 24fps, 60fps
    """
    @classmethod
    def parse_units(cls, value):
        return 'fps'
@@ -1,74 +0,0 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 """
 Validators
 """
 from functools import partial
 from rebulk.validators import chars_before, chars_after, chars_surround
 from . import seps
 seps_before = partial(chars_before, seps)
 seps_after = partial(chars_after, seps)
 seps_surround = partial(chars_surround, seps)
 def int_coercable(string):
    """
    Check if string can be coerced to int
    :param string:
    :type string:
    :return:
    :rtype:
    """
    try:
        int(string)
        return True
    except ValueError:
        return False
 def and_(*validators):
    """
    Compose validators functions
    :param validators:
    :type validators:
    :return:
    :rtype:
    """
    def composed(string):
        """
        Composed validators function
        :param string:
        :type string:
        :return:
        :rtype:
        """
        for validator in validators:
            if not validator(string):
                return False
        return True
    return composed
 def or_(*validators):
    """
    Compose validators functions
    :param validators:
    :type validators:
    :return:
    :rtype:
    """
    def composed(string):
        """
        Composed validators function
        :param string:
        :type string:
        :return:
        :rtype:
        """
        for validator in validators:
            if validator(string):
                return True
        return False
    return composed
@@ -1,34 +0,0 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 """
 Words utils
 """
 from collections import namedtuple
 from . import seps
 _Word = namedtuple('_Word', ['span', 'value'])
 def iter_words(string):
    """
    Iterate on all words in a string
    :param string:
    :type string:
    :return:
    :rtype: iterable[str]
    """
    i = 0
    last_sep_index = -1
    inside_word = False
    for char in string:
        if ord(char) < 128 and char in seps:  # Make sure we don't exclude unicode characters.
            if inside_word:
                yield _Word(span=(last_sep_index+1, i), value=string[last_sep_index+1:i])
            inside_word = False
            last_sep_index = i
        else:
            inside_word = True
        i += 1
    if inside_word:
        yield _Word(span=(last_sep_index+1, i), value=string[last_sep_index+1:i])
@@ -1,5 +0,0 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 """
 Markers
 """
@@ -1,52 +0,0 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 """
 Groups markers (...), [...] and {...}
 """
 from rebulk import Rebulk
 def groups(config):
    """
    Builder for rebulk object.
    :param config: rule configuration
    :type config: dict
    :return: Created Rebulk object
    :rtype: Rebulk
    """
    rebulk = Rebulk()
    rebulk.defaults(name="group", marker=True)
    starting = config['starting']
    ending = config['ending']
    def mark_groups(input_string):
        """
        Functional pattern to mark groups (...), [...] and {...}.
        :param input_string:
        :return:
        """
        openings = ([], [], [])
        i = 0
        ret = []
        for char in input_string:
            start_type = starting.find(char)
            if start_type > -1:
                openings[start_type].append(i)
            i += 1
            end_type = ending.find(char)
            if end_type > -1:
                try:
                    start_index = openings[end_type].pop()
                    ret.append((start_index, i))
                except IndexError:
                    pass
        return ret
    rebulk.functional(mark_groups)
    return rebulk
@@ -1,47 +0,0 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 """
 Path markers
 """
 from rebulk import Rebulk
 from rebulk.utils import find_all
 def path(config):  # pylint:disable=unused-argument
    """
    Builder for rebulk object.
    :param config: rule configuration
    :type config: dict
    :return: Created Rebulk object
    :rtype: Rebulk
    """
    rebulk = Rebulk()
    rebulk.defaults(name="path", marker=True)
    def mark_path(input_string, context):
        """
        Functional pattern to mark path elements.
        :param input_string:
        :param context:
        :return:
        """
        ret = []
        if context.get('name_only', False):
            ret.append((0, len(input_string)))
        else:
            indices = list(find_all(input_string, '/'))
            indices += list(find_all(input_string, '\\'))
            indices += [-1, len(input_string)]
            indices.sort()
            for i in range(0, len(indices) - 1):
                ret.append((indices[i] + 1, indices[i + 1]))
        return ret
    rebulk.functional(mark_path)
    return rebulk
@@ -1,20 +0,0 @@
 """
 Match processors
 """
 from guessit.rules.common import seps
 def strip(match, chars=seps):
    """
    Strip given characters from match.
    :param chars:
    :param match:
    :return:
    """
    while match.input_string[match.start] in chars:
        match.start += 1
    while match.input_string[match.end - 1] in chars:
        match.end -= 1
    if not match:
        return False
@@ -1,259 +0,0 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 """
 Processors
 """
 from collections import defaultdict
 import copy
 import six
 from rebulk import Rebulk, Rule, CustomRule, POST_PROCESS, PRE_PROCESS, AppendMatch, RemoveMatch
 from .common import seps_no_groups
 from .common.formatters import cleanup
 from .common.comparators import marker_sorted
 from .common.date import valid_year
 from .common.words import iter_words
 class EnlargeGroupMatches(CustomRule):
    """
    Enlarge matches that are starting and/or ending group to include brackets in their span.
    """
    priority = PRE_PROCESS
    def when(self, matches, context):
        starting = []
        ending = []
        for group in matches.markers.named('group'):
            for match in matches.starting(group.start + 1):
                starting.append(match)
            for match in matches.ending(group.end - 1):
                ending.append(match)
        if starting or ending:
            return starting, ending
        return False
    def then(self, matches, when_response, context):
        starting, ending = when_response
        for match in starting:
            matches.remove(match)
            match.start -= 1
            match.raw_start += 1
            matches.append(match)
        for match in ending:
            matches.remove(match)
            match.end += 1
            match.raw_end -= 1
            matches.append(match)
 class EquivalentHoles(Rule):
    """
    Creates equivalent matches for holes that have same values than existing (case insensitive)
    """
    priority = POST_PROCESS
    consequence = AppendMatch
    def when(self, matches, context):
        new_matches = []
        for filepath in marker_sorted(matches.markers.named('path'), matches):
            holes = matches.holes(start=filepath.start, end=filepath.end, formatter=cleanup)
            for name in matches.names:
                for hole in list(holes):
                    for current_match in matches.named(name):
                        if isinstance(current_match.value, six.string_types) and \
                                        hole.value.lower() == current_match.value.lower():
                            if 'equivalent-ignore' in current_match.tags:
                                continue
                            new_value = _preferred_string(hole.value, current_match.value)
                            if hole.value != new_value:
                                hole.value = new_value
                            if current_match.value != new_value:
                                current_match.value = new_value
                            hole.name = name
                            hole.tags = ['equivalent']
                            new_matches.append(hole)
                            if hole in holes:
                                holes.remove(hole)
        return new_matches
 class RemoveAmbiguous(Rule):
    """
    If multiple matches are found with same name and different values, keep the one in the most valuable filepart.
    Also keep others match with same name and values than those kept ones.
    """
    priority = POST_PROCESS
    consequence = RemoveMatch
    def __init__(self, sort_function=marker_sorted, predicate=None):
        super(RemoveAmbiguous, self).__init__()
        self.sort_function = sort_function
        self.predicate = predicate
    def when(self, matches, context):
        fileparts = self.sort_function(matches.markers.named('path'), matches)
        previous_fileparts_names = set()
        values = defaultdict(list)
        to_remove = []
        for filepart in fileparts:
            filepart_matches = matches.range(filepart.start, filepart.end, predicate=self.predicate)
            filepart_names = set()
            for match in filepart_matches:
                filepart_names.add(match.name)
                if match.name in previous_fileparts_names:
                    if match.value not in values[match.name]:
                        to_remove.append(match)
                else:
                    if match.value not in values[match.name]:
                        values[match.name].append(match.value)
            previous_fileparts_names.update(filepart_names)
        return to_remove
 class RemoveLessSpecificSeasonEpisode(RemoveAmbiguous):
    """
    If multiple season/episodes matches are found with different values,
    keep the one tagged as 'SxxExx' or in the rightmost filepart.
    """
    def __init__(self, name):
        super(RemoveLessSpecificSeasonEpisode, self).__init__(
            sort_function=(lambda markers, matches:
                           marker_sorted(list(reversed(markers)), matches,
                                         lambda match: match.name == name and 'SxxExx' in match.tags)),
            predicate=lambda match: match.name == name)
 def _preferred_string(value1, value2):  # pylint:disable=too-many-return-statements
    """
    Retrieves preferred title from both values.
    :param value1:
    :type value1: str
    :param value2:
    :type value2: str
    :return: The preferred title
    :rtype: str
    """
    if value1 == value2:
        return value1
    if value1.istitle() and not value2.istitle():
        return value1
    if not value1.isupper() and value2.isupper():
        return value1
    if not value1.isupper() and value1[0].isupper() and not value2[0].isupper():
        return value1
    if _count_title_words(value1) > _count_title_words(value2):
        return value1
    return value2
 def _count_title_words(value):
    """
    Count only many words are titles in value.
    :param value:
    :type value:
    :return:
    :rtype:
    """
    ret = 0
    for word in iter_words(value):
        if word.value.istitle():
            ret += 1
    return ret
 class SeasonYear(Rule):
    """
    If a season is a valid year and no year was found, create an match with year.
    """
    priority = POST_PROCESS
    consequence = AppendMatch
    def when(self, matches, context):
        ret = []
        if not matches.named('year'):
            for season in matches.named('season'):
                if valid_year(season.value):
                    year = copy.copy(season)
                    year.name = 'year'
                    ret.append(year)
        return ret
 class YearSeason(Rule):
    """
    If a year is found, no season found, and episode is found, create an match with season.
    """
    priority = POST_PROCESS
    consequence = AppendMatch
    def when(self, matches, context):
        ret = []
        if not matches.named('season') and matches.named('episode'):
            for year in matches.named('year'):
                season = copy.copy(year)
                season.name = 'season'
                ret.append(season)
        return ret
 class Processors(CustomRule):
    """
    Empty rule for ordering post_processing properly.
    """
    priority = POST_PROCESS
    def when(self, matches, context):
        pass
    def then(self, matches, when_response, context):  # pragma: no cover
        pass
 class StripSeparators(CustomRule):
    """
    Strip separators from matches. Keep separators if they are from acronyms, like in ".S.H.I.E.L.D."
    """
    priority = POST_PROCESS
    def when(self, matches, context):
        return matches
    def then(self, matches, when_response, context):  # pragma: no cover
        for match in matches:
            for _ in range(0, len(match.span)):
                if match.raw[0] in seps_no_groups and (len(match.raw) < 3 or match.raw[2] not in seps_no_groups):
                    match.raw_start += 1
            for _ in reversed(range(0, len(match.span))):
                if match.raw[-1] in seps_no_groups and (len(match.raw) < 3 or match.raw[-3] not in seps_no_groups):
                    match.raw_end -= 1
 def processors(config):  # pylint:disable=unused-argument
    """
    Builder for rebulk object.
    :param config: rule configuration
    :type config: dict
    :return: Created Rebulk object
    :rtype: Rebulk
    """
    return Rebulk().rules(EnlargeGroupMatches, EquivalentHoles,
                          RemoveLessSpecificSeasonEpisode('season'),
                          RemoveLessSpecificSeasonEpisode('episode'),
                          RemoveAmbiguous, SeasonYear, YearSeason, Processors, StripSeparators)
@@ -1,5 +0,0 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 """
 Properties
 """
@@ -1,235 +0,0 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 """
 audio_codec, audio_profile and audio_channels property
 """
 from rebulk import Rebulk, Rule, RemoveMatch
 from rebulk.remodule import re
 from ..common import dash
 from ..common.pattern import is_disabled
 from ..common.validators import seps_before, seps_after
 audio_properties = ['audio_codec', 'audio_profile', 'audio_channels']
 def audio_codec(config):  # pylint:disable=unused-argument
    """
    Builder for rebulk object.
    :param config: rule configuration
    :type config: dict
    :return: Created Rebulk object
    :rtype: Rebulk
    """
    rebulk = Rebulk()\
        .regex_defaults(flags=re.IGNORECASE, abbreviations=[dash])\
        .string_defaults(ignore_case=True)
    def audio_codec_priority(match1, match2):
        """
        Gives priority to audio_codec
        :param match1:
        :type match1:
        :param match2:
        :type match2:
        :return:
        :rtype:
        """
        if match1.name == 'audio_codec' and match2.name in ['audio_profile', 'audio_channels']:
            return match2
        if match1.name in ['audio_profile', 'audio_channels'] and match2.name == 'audio_codec':
            return match1
        return '__default__'
    rebulk.defaults(name='audio_codec',
                    conflict_solver=audio_codec_priority,
                    disabled=lambda context: is_disabled(context, 'audio_codec'))
    rebulk.regex("MP3", "LAME", r"LAME(?:\d)+-?(?:\d)+", value="MP3")
    rebulk.string("MP2", value="MP2")
    rebulk.regex('Dolby', 'DolbyDigital', 'Dolby-Digital', 'DD', 'AC3D?', value='Dolby Digital')
    rebulk.regex('Dolby-?Atmos', 'Atmos', value='Dolby Atmos')
    rebulk.string("AAC", value="AAC")
    rebulk.string('EAC3', 'DDP', 'DD+', value='Dolby Digital Plus')
    rebulk.string("Flac", value="FLAC")
    rebulk.string("DTS", value="DTS")
    rebulk.regex('DTS-?HD', 'DTS(?=-?MA)', value='DTS-HD',
                 conflict_solver=lambda match, other: other if other.name == 'audio_codec' else '__default__')
    rebulk.regex('True-?HD', value='Dolby TrueHD')
    rebulk.string('Opus', value='Opus')
    rebulk.string('Vorbis', value='Vorbis')
    rebulk.string('PCM', value='PCM')
    rebulk.string('LPCM', value='LPCM')
    rebulk.defaults(clear=True,
                    name='audio_profile',
                    disabled=lambda context: is_disabled(context, 'audio_profile'))
    rebulk.string('MA', value='Master Audio', tags=['audio_profile.rule', 'DTS-HD'])
    rebulk.string('HR', 'HRA', value='High Resolution Audio', tags=['audio_profile.rule', 'DTS-HD'])
    rebulk.string('ES', value='Extended Surround', tags=['audio_profile.rule', 'DTS'])
    rebulk.string('HE', value='High Efficiency', tags=['audio_profile.rule', 'AAC'])
    rebulk.string('LC', value='Low Complexity', tags=['audio_profile.rule', 'AAC'])
    rebulk.string('HQ', value='High Quality', tags=['audio_profile.rule', 'Dolby Digital'])
    rebulk.string('EX', value='EX', tags=['audio_profile.rule', 'Dolby Digital'])
    rebulk.defaults(clear=True,
                    name="audio_channels",
                    disabled=lambda context: is_disabled(context, 'audio_channels'))
    rebulk.regex('7[01]', value='7.1', validator=seps_after, tags='weak-audio_channels')
    rebulk.regex('5[01]', value='5.1', validator=seps_after, tags='weak-audio_channels')
    rebulk.string('20', value='2.0', validator=seps_after, tags='weak-audio_channels')
    for value, items in config.get('audio_channels').items():
        for item in items:
            if item.startswith('re:'):
                rebulk.regex(item[3:], value=value, children=True)
            else:
                rebulk.string(item, value=value)
    rebulk.rules(DtsHDRule, DtsRule, AacRule, DolbyDigitalRule, AudioValidatorRule, HqConflictRule,
                 AudioChannelsValidatorRule)
    return rebulk
 class AudioValidatorRule(Rule):
    """
    Remove audio properties if not surrounded by separators and not next each others
    """
    priority = 64
    consequence = RemoveMatch
    def when(self, matches, context):
        ret = []
        audio_list = matches.range(predicate=lambda match: match.name in audio_properties)
        for audio in audio_list:
            if not seps_before(audio):
                valid_before = matches.range(audio.start - 1, audio.start,
                                             lambda match: match.name in audio_properties)
                if not valid_before:
                    ret.append(audio)
                    continue
            if not seps_after(audio):
                valid_after = matches.range(audio.end, audio.end + 1,
                                            lambda match: match.name in audio_properties)
                if not valid_after:
                    ret.append(audio)
                    continue
        return ret
 class AudioProfileRule(Rule):
    """
    Abstract rule to validate audio profiles
    """
    priority = 64
    dependency = AudioValidatorRule
    consequence = RemoveMatch
    def __init__(self, codec):
        super(AudioProfileRule, self).__init__()
        self.codec = codec
    def enabled(self, context):
        return not is_disabled(context, 'audio_profile')
    def when(self, matches, context):
        profile_list = matches.named('audio_profile',
                                     lambda match: 'audio_profile.rule' in match.tags and
                                     self.codec in match.tags)
        ret = []
        for profile in profile_list:
            codec = matches.at_span(profile.span,
                                    lambda match: match.name == 'audio_codec' and
                                    match.value == self.codec, 0)
            if not codec:
                codec = matches.previous(profile,
                                         lambda match: match.name == 'audio_codec' and
                                         match.value == self.codec)
            if not codec:
                codec = matches.next(profile,
                                     lambda match: match.name == 'audio_codec' and
                                     match.value == self.codec)
            if not codec:
                ret.append(profile)
            if codec:
                ret.extend(matches.conflicting(profile))
        return ret
 class DtsHDRule(AudioProfileRule):
    """
    Rule to validate DTS-HD profile
    """
    def __init__(self):
        super(DtsHDRule, self).__init__('DTS-HD')
 class DtsRule(AudioProfileRule):
    """
    Rule to validate DTS profile
    """
    def __init__(self):
        super(DtsRule, self).__init__('DTS')
 class AacRule(AudioProfileRule):
    """
    Rule to validate AAC profile
    """
    def __init__(self):
        super(AacRule, self).__init__('AAC')
 class DolbyDigitalRule(AudioProfileRule):
    """
    Rule to validate Dolby Digital profile
    """
    def __init__(self):
        super(DolbyDigitalRule, self).__init__('Dolby Digital')
 class HqConflictRule(Rule):
    """
    Solve conflict between HQ from other property and from audio_profile.
    """
    dependency = [DtsHDRule, DtsRule, AacRule, DolbyDigitalRule]
    consequence = RemoveMatch
    def enabled(self, context):
        return not is_disabled(context, 'audio_profile')
    def when(self, matches, context):
        hq_audio = matches.named('audio_profile', lambda m: m.value == 'High Quality')
        hq_audio_spans = [match.span for match in hq_audio]
        return matches.named('other', lambda m: m.span in hq_audio_spans)
 class AudioChannelsValidatorRule(Rule):
    """
    Remove audio_channel if no audio codec as previous match.
    """
    priority = 128
    consequence = RemoveMatch
    def enabled(self, context):
        return not is_disabled(context, 'audio_channels')
    def when(self, matches, context):
        ret = []
        for audio_channel in matches.tagged('weak-audio_channels'):
            valid_before = matches.range(audio_channel.start - 1, audio_channel.start,
                                         lambda match: match.name == 'audio_codec')
            if not valid_before:
                ret.append(audio_channel)
        return ret
@@ -1,74 +0,0 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 """
 video_bit_rate and audio_bit_rate properties
 """
 import re
 from rebulk import Rebulk
 from rebulk.rules import Rule, RemoveMatch, RenameMatch
 from ..common import dash, seps
 from ..common.pattern import is_disabled
 from ..common.quantity import BitRate
 from ..common.validators import seps_surround
 def bit_rate(config):  # pylint:disable=unused-argument
    """
    Builder for rebulk object.
    :param config: rule configuration
    :type config: dict
    :return: Created Rebulk object
    :rtype: Rebulk
    """
    rebulk = Rebulk(disabled=lambda context: (is_disabled(context, 'audio_bit_rate')
                                              and is_disabled(context, 'video_bit_rate')))
    rebulk = rebulk.regex_defaults(flags=re.IGNORECASE, abbreviations=[dash])
    rebulk.defaults(name='audio_bit_rate', validator=seps_surround)
    rebulk.regex(r'\d+-?[kmg]b(ps|its?)', r'\d+\.\d+-?[kmg]b(ps|its?)',
                 conflict_solver=(
                     lambda match, other: match
                     if other.name == 'audio_channels' and 'weak-audio_channels' not in other.tags
                     else other
                 ),
                 formatter=BitRate.fromstring, tags=['release-group-prefix'])
    rebulk.rules(BitRateTypeRule)
    return rebulk
 class BitRateTypeRule(Rule):
    """
    Convert audio bit rate guess into video bit rate.
    """
    consequence = [RenameMatch('video_bit_rate'), RemoveMatch]
    def when(self, matches, context):
        to_rename = []
        to_remove = []
        if is_disabled(context, 'audio_bit_rate'):
            to_remove.extend(matches.named('audio_bit_rate'))
        else:
            video_bit_rate_disabled = is_disabled(context, 'video_bit_rate')
            for match in matches.named('audio_bit_rate'):
                previous = matches.previous(match, index=0,
                                            predicate=lambda m: m.name in ('source', 'screen_size', 'video_codec'))
                if previous and not matches.holes(previous.end, match.start, predicate=lambda m: m.value.strip(seps)):
                    after = matches.next(match, index=0, predicate=lambda m: m.name == 'audio_codec')
                    if after and not matches.holes(match.end, after.start, predicate=lambda m: m.value.strip(seps)):
                        bitrate = match.value
                        if bitrate.units == 'Kbps' or (bitrate.units == 'Mbps' and bitrate.magnitude < 10):
                            continue
                    if video_bit_rate_disabled:
                        to_remove.append(match)
                    else:
                        to_rename.append(match)
        if to_rename or to_remove:
            return to_rename, to_remove
        return False
@@ -1,56 +0,0 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 """
 bonus property
 """
 from rebulk.remodule import re
 from rebulk import Rebulk, AppendMatch, Rule
 from .title import TitleFromPosition
 from ..common.formatters import cleanup
 from ..common.pattern import is_disabled
 from ..common.validators import seps_surround
 def bonus(config):  # pylint:disable=unused-argument
    """
    Builder for rebulk object.
    :param config: rule configuration
    :type config: dict
    :return: Created Rebulk object
    :rtype: Rebulk
    """
    rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'bonus'))
    rebulk = rebulk.regex_defaults(flags=re.IGNORECASE)
    rebulk.regex(r'x(\d+)', name='bonus', private_parent=True, children=True, formatter=int,
                 validator={'__parent__': seps_surround},
                 validate_all=True,
                 conflict_solver=lambda match, conflicting: match
                 if conflicting.name in ('video_codec', 'episode') and 'weak-episode' not in conflicting.tags
                 else '__default__')
    rebulk.rules(BonusTitleRule)
    return rebulk
 class BonusTitleRule(Rule):
    """
    Find bonus title after bonus.
    """
    dependency = TitleFromPosition
    consequence = AppendMatch
    properties = {'bonus_title': [None]}
    def when(self, matches, context):  # pylint:disable=inconsistent-return-statements
        bonus_number = matches.named('bonus', lambda match: not match.private, index=0)
        if bonus_number:
            filepath = matches.markers.at_match(bonus_number, lambda marker: marker.name == 'path', 0)
            hole = matches.holes(bonus_number.end, filepath.end + 1, formatter=cleanup, index=0)
            if hole and hole.value:
                hole.name = 'bonus_title'
                return hole
@@ -1,41 +0,0 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 """
 cd and cd_count properties
 """
 from rebulk.remodule import re
 from rebulk import Rebulk
 from ..common import dash
 from ..common.pattern import is_disabled
 def cds(config):  # pylint:disable=unused-argument
    """
    Builder for rebulk object.
    :param config: rule configuration
    :type config: dict
    :return: Created Rebulk object
    :rtype: Rebulk
    """
    rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'cd'))
    rebulk = rebulk.regex_defaults(flags=re.IGNORECASE, abbreviations=[dash])
    rebulk.regex(r'cd-?(?P<cd>\d+)(?:-?of-?(?P<cd_count>\d+))?',
                 validator={'cd': lambda match: 0 < match.value < 100,
                            'cd_count': lambda match: 0 < match.value < 100},
                 formatter={'cd': int, 'cd_count': int},
                 children=True,
                 private_parent=True,
                 properties={'cd': [None], 'cd_count': [None]})
    rebulk.regex(r'(?P<cd_count>\d+)-?cds?',
                 validator={'cd': lambda match: 0 < match.value < 100,
                            'cd_count': lambda match: 0 < match.value < 100},
                 formatter={'cd_count': int},
                 children=True,
                 private_parent=True,
                 properties={'cd': [None], 'cd_count': [None]})
    return rebulk
@@ -1,61 +0,0 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 """
 container property
 """
 from rebulk.remodule import re
 from rebulk import Rebulk
 from ..common import seps
 from ..common.pattern import is_disabled
 from ..common.validators import seps_surround
 from ...reutils import build_or_pattern
 def container(config):
    """
    Builder for rebulk object.
    :param config: rule configuration
    :type config: dict
    :return: Created Rebulk object
    :rtype: Rebulk
    """
    rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'container'))
    rebulk = rebulk.regex_defaults(flags=re.IGNORECASE).string_defaults(ignore_case=True)
    rebulk.defaults(name='container',
                    formatter=lambda value: value.strip(seps),
                    tags=['extension'],
                    conflict_solver=lambda match, other: other
                    if other.name in ('source', 'video_codec') or
                    other.name == 'container' and 'extension' not in other.tags
                    else '__default__')
    subtitles = config['subtitles']
    info = config['info']
    videos = config['videos']
    torrent = config['torrent']
    nzb = config['nzb']
    rebulk.regex(r'\.'+build_or_pattern(subtitles)+'$', exts=subtitles, tags=['extension', 'subtitle'])
    rebulk.regex(r'\.'+build_or_pattern(info)+'$', exts=info, tags=['extension', 'info'])
    rebulk.regex(r'\.'+build_or_pattern(videos)+'$', exts=videos, tags=['extension', 'video'])
    rebulk.regex(r'\.'+build_or_pattern(torrent)+'$', exts=torrent, tags=['extension', 'torrent'])
    rebulk.regex(r'\.'+build_or_pattern(nzb)+'$', exts=nzb, tags=['extension', 'nzb'])
    rebulk.defaults(clear=True,
                    name='container',
                    validator=seps_surround,
                    formatter=lambda s: s.lower(),
                    conflict_solver=lambda match, other: match
                    if other.name in ('source',
                                      'video_codec') or other.name == 'container' and 'extension' in other.tags
                    else '__default__')
    rebulk.string(*[sub for sub in subtitles if sub not in ('sub', 'ass')], tags=['subtitle'])
    rebulk.string(*videos, tags=['video'])
    rebulk.string(*torrent, tags=['torrent'])
    rebulk.string(*nzb, tags=['nzb'])
    return rebulk
@@ -1,114 +0,0 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 """
 country property
 """
 # pylint: disable=no-member
 import babelfish
 from rebulk import Rebulk
 from ..common.pattern import is_disabled
 from ..common.words import iter_words
 def country(config, common_words):
    """
    Builder for rebulk object.
    :param config: rule configuration
    :type config: dict
    :param common_words: common words
    :type common_words: set
    :return: Created Rebulk object
    :rtype: Rebulk
    """
    rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'country'))
    rebulk = rebulk.defaults(name='country')
    def find_countries(string, context=None):
        """
        Find countries in given string.
        """
        allowed_countries = context.get('allowed_countries') if context else None
        return CountryFinder(allowed_countries, common_words).find(string)
    rebulk.functional(find_countries,
                      #  Prefer language and any other property over country if not US or GB.
                      conflict_solver=lambda match, other: match
                      if other.name != 'language' or match.value not in (babelfish.Country('US'),
                                                                         babelfish.Country('GB'))
                      else other,
                      properties={'country': [None]},
                      disabled=lambda context: not context.get('allowed_countries'))
    babelfish.country_converters['guessit'] = GuessitCountryConverter(config['synonyms'])
    return rebulk
 class GuessitCountryConverter(babelfish.CountryReverseConverter):  # pylint: disable=missing-docstring
    def __init__(self, synonyms):
        self.guessit_exceptions = {}
        for alpha2, synlist in synonyms.items():
            for syn in synlist:
                self.guessit_exceptions[syn.lower()] = alpha2
    @property
    def codes(self):  # pylint: disable=missing-docstring
        return (babelfish.country_converters['name'].codes |
                frozenset(babelfish.COUNTRIES.values()) |
                frozenset(self.guessit_exceptions.keys()))
    def convert(self, alpha2):
        if alpha2 == 'GB':
            return 'UK'
        return str(babelfish.Country(alpha2))
    def reverse(self, name):  # pylint:disable=arguments-differ
        # exceptions come first, as they need to override a potential match
        # with any of the other guessers
        try:
            return self.guessit_exceptions[name.lower()]
        except KeyError:
            pass
        try:
            return babelfish.Country(name.upper()).alpha2
        except ValueError:
            pass
        for conv in [babelfish.Country.fromname]:
            try:
                return conv(name).alpha2
            except babelfish.CountryReverseError:
                pass
        raise babelfish.CountryReverseError(name)
 class CountryFinder(object):
    """Helper class to search and return country matches."""
    def __init__(self, allowed_countries, common_words):
        self.allowed_countries = {l.lower() for l in allowed_countries or []}
        self.common_words = common_words
    def find(self, string):
        """Return all matches for country."""
        for word_match in iter_words(string.strip().lower()):
            word = word_match.value
            if word.lower() in self.common_words:
                continue
            try:
                country_object = babelfish.Country.fromguessit(word)
                if (country_object.name.lower() in self.allowed_countries or
                        country_object.alpha2.lower() in self.allowed_countries):
                    yield self._to_rebulk_match(word_match, country_object)
            except babelfish.Error:
                continue
    @classmethod
    def _to_rebulk_match(cls, word, value):
        return word.span[0], word.span[1], {'value': value}
@@ -1,90 +0,0 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 """
 crc and uuid properties
 """
 from rebulk.remodule import re
 from rebulk import Rebulk
 from ..common.pattern import is_disabled
 from ..common.validators import seps_surround
 def crc(config):  # pylint:disable=unused-argument
    """
    Builder for rebulk object.
    :param config: rule configuration
    :type config: dict
    :return: Created Rebulk object
    :rtype: Rebulk
    """
    rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'crc32'))
    rebulk = rebulk.regex_defaults(flags=re.IGNORECASE)
    rebulk.defaults(validator=seps_surround)
    rebulk.regex('(?:[a-fA-F]|[0-9]){8}', name='crc32',
                 conflict_solver=lambda match, other: other
                 if other.name in ['episode', 'season']
                 else '__default__')
    rebulk.functional(guess_idnumber, name='uuid',
                      conflict_solver=lambda match, other: match
                      if other.name in ['episode', 'season']
                      else '__default__')
    return rebulk
 _DIGIT = 0
 _LETTER = 1
 _OTHER = 2
 _idnum = re.compile(r'(?P<uuid>[a-zA-Z0-9-]{20,})')  # 1.0, (0, 0))
 def guess_idnumber(string):
    """
    Guess id number function
    :param string:
    :type string:
    :return:
    :rtype:
    """
    # pylint:disable=invalid-name
    ret = []
    matches = list(_idnum.finditer(string))
    for match in matches:
        result = match.groupdict()
        switch_count = 0
        switch_letter_count = 0
        letter_count = 0
        last_letter = None
        last = _LETTER
        for c in result['uuid']:
            if c in '0123456789':
                ci = _DIGIT
            elif c in 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ':
                ci = _LETTER
                if c != last_letter:
                    switch_letter_count += 1
                last_letter = c
                letter_count += 1
            else:
                ci = _OTHER
            if ci != last:
                switch_count += 1
            last = ci
        # only return the result as probable if we alternate often between
        # char type (more likely for hash values than for common words)
        switch_ratio = float(switch_count) / len(result['uuid'])
        letters_ratio = (float(switch_letter_count) / letter_count) if letter_count > 0 else 1
        if switch_ratio > 0.4 and letters_ratio > 0.4:
            ret.append(match.span())
    return ret
@@ -1,84 +0,0 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 """
 date and year properties
 """
 from rebulk import Rebulk, RemoveMatch, Rule
 from ..common.date import search_date, valid_year
 from ..common.pattern import is_disabled
 from ..common.validators import seps_surround
 def date(config):  # pylint:disable=unused-argument
    """
    Builder for rebulk object.
    :param config: rule configuration
    :type config: dict
    :return: Created Rebulk object
    :rtype: Rebulk
    """
    rebulk = Rebulk().defaults(validator=seps_surround)
    rebulk.regex(r"\d{4}", name="year", formatter=int,
                 disabled=lambda context: is_disabled(context, 'year'),
                 conflict_solver=lambda match, other: other
                 if other.name in ('episode', 'season') and len(other.raw) < len(match.raw)
                 else '__default__',
                 validator=lambda match: seps_surround(match) and valid_year(match.value))
    def date_functional(string, context):  # pylint:disable=inconsistent-return-statements
        """
        Search for date in the string and retrieves match
        :param string:
        :return:
        """
        ret = search_date(string, context.get('date_year_first'), context.get('date_day_first'))
        if ret:
            return ret[0], ret[1], {'value': ret[2]}
    rebulk.functional(date_functional, name="date", properties={'date': [None]},
                      disabled=lambda context: is_disabled(context, 'date'),
                      conflict_solver=lambda match, other: other
                      if other.name in ('episode', 'season', 'crc32')
                      else '__default__')
    rebulk.rules(KeepMarkedYearInFilepart)
    return rebulk
 class KeepMarkedYearInFilepart(Rule):
    """
    Keep first years marked with [](){} in filepart, or if no year is marked, ensure it won't override titles.
    """
    priority = 64
    consequence = RemoveMatch
    def enabled(self, context):
        return not is_disabled(context, 'year')
    def when(self, matches, context):
        ret = []
        if len(matches.named('year')) > 1:
            for filepart in matches.markers.named('path'):
                years = matches.range(filepart.start, filepart.end, lambda match: match.name == 'year')
                if len(years) > 1:
                    group_years = []
                    ungroup_years = []
                    for year in years:
                        if matches.markers.at_match(year, lambda marker: marker.name == 'group'):
                            group_years.append(year)
                        else:
                            ungroup_years.append(year)
                    if group_years and ungroup_years:
                        ret.extend(ungroup_years)
                        ret.extend(group_years[1:])  # Keep the first year in marker.
                    elif not group_years:
                        ret.append(ungroup_years[0])  # Keep first year for title.
                        if len(ungroup_years) > 2:
                            ret.extend(ungroup_years[2:])
        return ret
@@ -1,52 +0,0 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 """
 edition property
 """
 from rebulk.remodule import re
 from rebulk import Rebulk
 from ..common import dash
 from ..common.pattern import is_disabled
 from ..common.validators import seps_surround
 def edition(config):  # pylint:disable=unused-argument
    """
    Builder for rebulk object.
    :param config: rule configuration
    :type config: dict
    :return: Created Rebulk object
    :rtype: Rebulk
    """
    rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'edition'))
    rebulk = rebulk.regex_defaults(flags=re.IGNORECASE, abbreviations=[dash]).string_defaults(ignore_case=True)
    rebulk.defaults(name='edition', validator=seps_surround)
    rebulk.regex('collector', "collector'?s?-edition", 'edition-collector', value='Collector')
    rebulk.regex('special-edition', 'edition-special', value='Special',
                 conflict_solver=lambda match, other: other
                 if other.name == 'episode_details' and other.value == 'Special'
                 else '__default__')
    rebulk.string('se', value='Special', tags='has-neighbor')
    rebulk.string('ddc', value="Director's Definitive Cut")
    rebulk.regex('criterion-edition', 'edition-criterion', 'CC', value='Criterion')
    rebulk.regex('deluxe', 'deluxe-edition', 'edition-deluxe', value='Deluxe')
    rebulk.regex('limited', 'limited-edition', value='Limited', tags=['has-neighbor', 'release-group-prefix'])
    rebulk.regex(r'theatrical-cut', r'theatrical-edition', r'theatrical', value='Theatrical')
    rebulk.regex(r"director'?s?-cut", r"director'?s?-cut-edition", r"edition-director'?s?-cut", 'DC',
                 value="Director's Cut")
    rebulk.regex('extended', 'extended-?cut', 'extended-?version',
                 value='Extended', tags=['has-neighbor', 'release-group-prefix'])
    rebulk.regex('alternat(e|ive)(?:-?Cut)?', value='Alternative Cut', tags=['has-neighbor', 'release-group-prefix'])
    for value in ('Remastered', 'Uncensored', 'Uncut', 'Unrated'):
        rebulk.string(value, value=value, tags=['has-neighbor', 'release-group-prefix'])
    rebulk.string('Festival', value='Festival', tags=['has-neighbor-before', 'has-neighbor-after'])
    rebulk.regex('imax', 'imax-edition', value='IMAX')
    rebulk.regex('fan-edit(?:ion)?', 'fan-collection', value='Fan')
    rebulk.regex('ultimate-edition', value='Ultimate')
    rebulk.regex("ultimate-collector'?s?-edition", value=['Ultimate', 'Collector'])
    rebulk.regex('ultimate-fan-edit(?:ion)?', 'ultimate-fan-collection', value=['Ultimate', 'Fan'])
    return rebulk
@@ -1,300 +0,0 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 """
 Episode title
 """
 from collections import defaultdict
 from rebulk import Rebulk, Rule, AppendMatch, RemoveMatch, RenameMatch, POST_PROCESS
 from ..common import seps, title_seps
 from ..common.formatters import cleanup
 from ..common.pattern import is_disabled
 from ..common.validators import or_
 from ..properties.title import TitleFromPosition, TitleBaseRule
 from ..properties.type import TypeProcessor
 def episode_title(config):  # pylint:disable=unused-argument
    """
    Builder for rebulk object.
    :param config: rule configuration
    :type config: dict
    :return: Created Rebulk object
    :rtype: Rebulk
    """
    previous_names = ('episode', 'episode_count',
                      'season', 'season_count', 'date', 'title', 'year')
    rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'episode_title'))
    rebulk = rebulk.rules(RemoveConflictsWithEpisodeTitle(previous_names),
                          EpisodeTitleFromPosition(previous_names),
                          AlternativeTitleReplace(previous_names),
                          TitleToEpisodeTitle,
                          Filepart3EpisodeTitle,
                          Filepart2EpisodeTitle,
                          RenameEpisodeTitleWhenMovieType)
    return rebulk
 class RemoveConflictsWithEpisodeTitle(Rule):
    """
    Remove conflicting matches that might lead to wrong episode_title parsing.
    """
    priority = 64
    consequence = RemoveMatch
    def __init__(self, previous_names):
        super(RemoveConflictsWithEpisodeTitle, self).__init__()
        self.previous_names = previous_names
        self.next_names = ('streaming_service', 'screen_size', 'source',
                           'video_codec', 'audio_codec', 'other', 'container')
        self.affected_if_holes_after = ('part', )
        self.affected_names = ('part', 'year')
    def when(self, matches, context):
        to_remove = []
        for filepart in matches.markers.named('path'):
            for match in matches.range(filepart.start, filepart.end,
                                       predicate=lambda m: m.name in self.affected_names):
                before = matches.range(filepart.start, match.start, predicate=lambda m: not m.private, index=-1)
                if not before or before.name not in self.previous_names:
                    continue
                after = matches.range(match.end, filepart.end, predicate=lambda m: not m.private, index=0)
                if not after or after.name not in self.next_names:
                    continue
                group = matches.markers.at_match(match, predicate=lambda m: m.name == 'group', index=0)
                def has_value_in_same_group(current_match, current_group=group):
                    """Return true if current match has value and belongs to the current group."""
                    return current_match.value.strip(seps) and (
                        current_group == matches.markers.at_match(current_match,
                                                                  predicate=lambda mm: mm.name == 'group', index=0)
                    )
                holes_before = matches.holes(before.end, match.start, predicate=has_value_in_same_group)
                holes_after = matches.holes(match.end, after.start, predicate=has_value_in_same_group)
                if not holes_before and not holes_after:
                    continue
                if match.name in self.affected_if_holes_after and not holes_after:
                    continue
                to_remove.append(match)
                if match.parent:
                    to_remove.append(match.parent)
        return to_remove
 class TitleToEpisodeTitle(Rule):
    """
    If multiple different title are found, convert the one following episode number to episode_title.
    """
    dependency = TitleFromPosition
    def when(self, matches, context):
        titles = matches.named('title')
        title_groups = defaultdict(list)
        for title in titles:
            title_groups[title.value].append(title)
        episode_titles = []
        if len(title_groups) < 2:
            return episode_titles
        for title in titles:
            if matches.previous(title, lambda match: match.name == 'episode'):
                episode_titles.append(title)
        return episode_titles
    def then(self, matches, when_response, context):
        for title in when_response:
            matches.remove(title)
            title.name = 'episode_title'
            matches.append(title)
 class EpisodeTitleFromPosition(TitleBaseRule):
    """
    Add episode title match in existing matches
    Must run after TitleFromPosition rule.
    """
    dependency = TitleToEpisodeTitle
    def __init__(self, previous_names):
        super(EpisodeTitleFromPosition, self).__init__('episode_title', ['title'])
        self.previous_names = previous_names
    def hole_filter(self, hole, matches):
        episode = matches.previous(hole,
                                   lambda previous: previous.named(*self.previous_names),
                                   0)
        crc32 = matches.named('crc32')
        return episode or crc32
    def filepart_filter(self, filepart, matches):
        # Filepart where title was found.
        if matches.range(filepart.start, filepart.end, lambda match: match.name == 'title'):
            return True
        return False
    def should_remove(self, match, matches, filepart, hole, context):
        if match.name == 'episode_details':
            return False
        return super(EpisodeTitleFromPosition, self).should_remove(match, matches, filepart, hole, context)
    def when(self, matches, context):  # pylint:disable=inconsistent-return-statements
        if matches.named('episode_title'):
            return
        return super(EpisodeTitleFromPosition, self).when(matches, context)
 class AlternativeTitleReplace(Rule):
    """
    If alternateTitle was found and title is next to episode, season or date, replace it with episode_title.
    """
    dependency = EpisodeTitleFromPosition
    consequence = RenameMatch
    def __init__(self, previous_names):
        super(AlternativeTitleReplace, self).__init__()
        self.previous_names = previous_names
    def when(self, matches, context):  # pylint:disable=inconsistent-return-statements
        if matches.named('episode_title'):
            return
        alternative_title = matches.range(predicate=lambda match: match.name == 'alternative_title', index=0)
        if alternative_title:
            main_title = matches.chain_before(alternative_title.start, seps=seps,
                                              predicate=lambda match: 'title' in match.tags, index=0)
            if main_title:
                episode = matches.previous(main_title,
                                           lambda previous: previous.named(*self.previous_names),
                                           0)
                crc32 = matches.named('crc32')
                if episode or crc32:
                    return alternative_title
    def then(self, matches, when_response, context):
        matches.remove(when_response)
        when_response.name = 'episode_title'
        when_response.tags.append('alternative-replaced')
        matches.append(when_response)
 class RenameEpisodeTitleWhenMovieType(Rule):
    """
    Rename episode_title by alternative_title when type is movie.
    """
    priority = POST_PROCESS
    dependency = TypeProcessor
    consequence = RenameMatch
    def when(self, matches, context):  # pylint:disable=inconsistent-return-statements
        if matches.named('episode_title', lambda m: 'alternative-replaced' not in m.tags) \
                and not matches.named('type', lambda m: m.value == 'episode'):
            return matches.named('episode_title')
    def then(self, matches, when_response, context):
        for match in when_response:
            matches.remove(match)
            match.name = 'alternative_title'
            matches.append(match)
 class Filepart3EpisodeTitle(Rule):
    """
    If we have at least 3 filepart structured like this:
    Serie name/SO1/E01-episode_title.mkv
    AAAAAAAAAA/BBB/CCCCCCCCCCCCCCCCCCCC
    Serie name/SO1/episode_title-E01.mkv
    AAAAAAAAAA/BBB/CCCCCCCCCCCCCCCCCCCC
    If CCCC contains episode and BBB contains seasonNumber
    Then title is to be found in AAAA.
    """
    consequence = AppendMatch('title')
    def when(self, matches, context):  # pylint:disable=inconsistent-return-statements
        if matches.tagged('filepart-title'):
            return
        fileparts = matches.markers.named('path')
        if len(fileparts) < 3:
            return
        filename = fileparts[-1]
        directory = fileparts[-2]
        subdirectory = fileparts[-3]
        episode_number = matches.range(filename.start, filename.end, lambda match: match.name == 'episode', 0)
        if episode_number:
            season = matches.range(directory.start, directory.end, lambda match: match.name == 'season', 0)
            if season:
                hole = matches.holes(subdirectory.start, subdirectory.end,
                                     ignore=or_(lambda match: 'weak-episode' in match.tags, TitleBaseRule.is_ignored),
                                     formatter=cleanup, seps=title_seps, predicate=lambda match: match.value,
                                     index=0)
                if hole:
                    return hole
 class Filepart2EpisodeTitle(Rule):
    """
    If we have at least 2 filepart structured like this:
    Serie name SO1/E01-episode_title.mkv
    AAAAAAAAAAAAA/BBBBBBBBBBBBBBBBBBBBB
    If BBBB contains episode and AAA contains a hole followed by seasonNumber
    then title is to be found in AAAA.
    or
    Serie name/SO1E01-episode_title.mkv
    AAAAAAAAAA/BBBBBBBBBBBBBBBBBBBBB
    If BBBB contains season and episode and AAA contains a hole
    then title is to be found in AAAA.
    """
    consequence = AppendMatch('title')
    def when(self, matches, context):  # pylint:disable=inconsistent-return-statements
        if matches.tagged('filepart-title'):
            return
        fileparts = matches.markers.named('path')
        if len(fileparts) < 2:
            return
        filename = fileparts[-1]
        directory = fileparts[-2]
        episode_number = matches.range(filename.start, filename.end, lambda match: match.name == 'episode', 0)
        if episode_number:
            season = (matches.range(directory.start, directory.end, lambda match: match.name == 'season', 0) or
                      matches.range(filename.start, filename.end, lambda match: match.name == 'season', 0))
            if season:
                hole = matches.holes(directory.start, directory.end,
                                     ignore=or_(lambda match: 'weak-episode' in match.tags, TitleBaseRule.is_ignored),
                                     formatter=cleanup, seps=title_seps,
                                     predicate=lambda match: match.value, index=0)
                if hole:
                    hole.tags.append('filepart-title')
                    return hole
@@ -1,912 +0,0 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 """
 episode, season, disc, episode_count, season_count and episode_details properties
 """
 import copy
 from collections import defaultdict
 from rebulk import Rebulk, RemoveMatch, Rule, AppendMatch, RenameMatch
 from rebulk.match import Match
 from rebulk.remodule import re
 from rebulk.utils import is_iterable
 from guessit.rules import match_processors
 from guessit.rules.common.numeral import parse_numeral, numeral
 from .title import TitleFromPosition
 from ..common import dash, alt_dash, seps, seps_no_fs
 from ..common.formatters import strip
 from ..common.pattern import is_disabled
 from ..common.validators import seps_surround, int_coercable, and_
 from ...reutils import build_or_pattern
 def episodes(config):
    """
    Builder for rebulk object.
    :param config: rule configuration
    :type config: dict
    :return: Created Rebulk object
    :rtype: Rebulk
    """
    # pylint: disable=too-many-branches,too-many-statements,too-many-locals
    def is_season_episode_disabled(context):
        """Whether season and episode rules should be enabled."""
        return is_disabled(context, 'episode') or is_disabled(context, 'season')
    def episodes_season_chain_breaker(matches):
        """
        Break chains if there's more than 100 offset between two neighbor values.
        :param matches:
        :type matches:
        :return:
        :rtype:
        """
        eps = matches.named('episode')
        if len(eps) > 1 and abs(eps[-1].value - eps[-2].value) > episode_max_range:
            return True
        seasons = matches.named('season')
        if len(seasons) > 1 and abs(seasons[-1].value - seasons[-2].value) > season_max_range:
            return True
        return False
    def season_episode_conflict_solver(match, other):
        """
        Conflict solver for episode/season patterns
        :param match:
        :param other:
        :return:
        """
        if match.name != other.name:
            if match.name == 'episode' and other.name == 'year':
                return match
            if match.name in ('season', 'episode'):
                if other.name in ('video_codec', 'audio_codec', 'container', 'date'):
                    return match
                if (other.name == 'audio_channels' and 'weak-audio_channels' not in other.tags
                        and not match.initiator.children.named(match.name + 'Marker')) or (
                            other.name == 'screen_size' and not int_coercable(other.raw)):
                    return match
                if other.name in ('season', 'episode') and match.initiator != other.initiator:
                    if (match.initiator.name in ('weak_episode', 'weak_duplicate')
                            and other.initiator.name in ('weak_episode', 'weak_duplicate')):
                        return '__default__'
                    for current in (match, other):
                        if 'weak-episode' in current.tags or 'x' in current.initiator.raw.lower():
                            return current
        return '__default__'
    def ordering_validator(match):
        """
        Validator for season list. They should be in natural order to be validated.
        episode/season separated by a weak discrete separator should be consecutive, unless a strong discrete separator
        or a range separator is present in the chain (1.3&5 is valid, but 1.3-5 is not valid and 1.3.5 is not valid)
        """
        values = match.children.to_dict()
        if 'season' in values and is_iterable(values['season']):
            # Season numbers must be in natural order to be validated.
            if not list(sorted(values['season'])) == values['season']:
                return False
        if 'episode' in values and is_iterable(values['episode']):
            # Season numbers must be in natural order to be validated.
            if not list(sorted(values['episode'])) == values['episode']:
                return False
        def is_consecutive(property_name):
            """
            Check if the property season or episode has valid consecutive values.
            :param property_name:
            :type property_name:
            :return:
            :rtype:
            """
            previous_match = None
            valid = True
            for current_match in match.children.named(property_name):
                if previous_match:
                    match.children.previous(current_match,
                                            lambda m: m.name == property_name + 'Separator')
                    separator = match.children.previous(current_match,
                                                        lambda m: m.name == property_name + 'Separator', 0)
                    if separator:
                        if separator.raw not in range_separators and separator.raw in weak_discrete_separators:
                            if not 0 < current_match.value - previous_match.value <= max_range_gap + 1:
                                valid = False
                        if separator.raw in strong_discrete_separators:
                            valid = True
                            break
                previous_match = current_match
            return valid
        return is_consecutive('episode') and is_consecutive('season')
    def validate_roman(match):
        """
        Validate a roman match if surrounded by separators
        :param match:
        :type match:
        :return:
        :rtype:
        """
        if int_coercable(match.raw):
            return True
        return seps_surround(match)
    season_words = config['season_words']
    episode_words = config['episode_words']
    of_words = config['of_words']
    all_words = config['all_words']
    season_markers = config['season_markers']
    season_ep_markers = config['season_ep_markers']
    disc_markers = config['disc_markers']
    episode_markers = config['episode_markers']
    range_separators = config['range_separators']
    weak_discrete_separators = list(sep for sep in seps_no_fs if sep not in range_separators)
    strong_discrete_separators = config['discrete_separators']
    discrete_separators = strong_discrete_separators + weak_discrete_separators
    episode_max_range = config['episode_max_range']
    season_max_range = config['season_max_range']
    max_range_gap = config['max_range_gap']
    rebulk = Rebulk() \
        .regex_defaults(flags=re.IGNORECASE) \
        .string_defaults(ignore_case=True) \
        .chain_defaults(chain_breaker=episodes_season_chain_breaker) \
        .defaults(private_names=['episodeSeparator', 'seasonSeparator', 'episodeMarker', 'seasonMarker'],
                  formatter={'season': int, 'episode': int, 'version': int, 'count': int},
                  children=True,
                  private_parent=True,
                  conflict_solver=season_episode_conflict_solver,
                  abbreviations=[alt_dash])
    # S01E02, 01x02, S01S02S03
    rebulk.chain(
        tags=['SxxExx'],
        validate_all=True,
        validator={'__parent__': and_(seps_surround, ordering_validator)},
        disabled=is_season_episode_disabled) \
        .defaults(tags=['SxxExx']) \
        .regex(build_or_pattern(season_markers, name='seasonMarker') + r'(?P<season>\d+)@?' +
               build_or_pattern(episode_markers + disc_markers, name='episodeMarker') + r'@?(?P<episode>\d+)')\
        .repeater('+') \
        .regex(build_or_pattern(episode_markers + disc_markers + discrete_separators + range_separators,
                                name='episodeSeparator',
                                escape=True) +
               r'(?P<episode>\d+)').repeater('*')
    rebulk.chain(tags=['SxxExx'],
                 validate_all=True,
                 validator={'__parent__': and_(seps_surround, ordering_validator)},
                 disabled=is_season_episode_disabled) \
        .defaults(tags=['SxxExx']) \
        .regex(r'(?P<season>\d+)@?' +
               build_or_pattern(season_ep_markers, name='episodeMarker') +
               r'@?(?P<episode>\d+)').repeater('+') \
    rebulk.chain(tags=['SxxExx'],
                 validate_all=True,
                 validator={'__parent__': and_(seps_surround, ordering_validator)},
                 disabled=is_season_episode_disabled) \
        .defaults(tags=['SxxExx']) \
        .regex(r'(?P<season>\d+)@?' +
               build_or_pattern(season_ep_markers, name='episodeMarker') +
               r'@?(?P<episode>\d+)') \
        .regex(build_or_pattern(season_ep_markers + discrete_separators + range_separators,
                                name='episodeSeparator',
                                escape=True) +
               r'(?P<episode>\d+)').repeater('*')
    rebulk.chain(tags=['SxxExx'],
                 validate_all=True,
                 validator={'__parent__': and_(seps_surround, ordering_validator)},
                 disabled=is_season_episode_disabled) \
        .defaults(tags=['SxxExx']) \
        .regex(build_or_pattern(season_markers, name='seasonMarker') + r'(?P<season>\d+)') \
        .regex('(?P<other>Extras)', name='other', value='Extras', tags=['no-release-group-prefix']).repeater('?') \
        .regex(build_or_pattern(season_markers + discrete_separators + range_separators,
                                name='seasonSeparator',
                                escape=True) +
               r'(?P<season>\d+)').repeater('*')
    # episode_details property
    for episode_detail in ('Special', 'Pilot', 'Unaired', 'Final'):
        rebulk.string(episode_detail,
                      private_parent=False,
                      children=False,
                      value=episode_detail,
                      name='episode_details',
                      disabled=lambda context: is_disabled(context, 'episode_details'))
    rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator', 'episodeMarker', 'seasonMarker'],
                    validate_all=True,
                    validator={'__parent__': and_(seps_surround, ordering_validator)},
                    children=True,
                    private_parent=True,
                    conflict_solver=season_episode_conflict_solver)
    rebulk.chain(validate_all=True,
                 conflict_solver=season_episode_conflict_solver,
                 formatter={'season': parse_numeral, 'count': parse_numeral},
                 validator={'__parent__': and_(seps_surround, ordering_validator),
                            'season': validate_roman,
                            'count': validate_roman},
                 disabled=lambda context: context.get('type') == 'movie' or is_disabled(context, 'season')) \
        .defaults(formatter={'season': parse_numeral, 'count': parse_numeral},
                  validator={'season': validate_roman, 'count': validate_roman},
                  conflict_solver=season_episode_conflict_solver) \
        .regex(build_or_pattern(season_words, name='seasonMarker') + '@?(?P<season>' + numeral + ')') \
        .regex(r'' + build_or_pattern(of_words) + '@?(?P<count>' + numeral + ')').repeater('?') \
        .regex(r'@?' + build_or_pattern(range_separators + discrete_separators + ['@'],
                                        name='seasonSeparator', escape=True) +
               r'@?(?P<season>\d+)').repeater('*')
    rebulk.defaults(abbreviations=[dash])
    rebulk.regex(build_or_pattern(episode_words, name='episodeMarker') + r'-?(?P<episode>\d+)' +
                 r'(?:v(?P<version>\d+))?' +
                 r'(?:-?' + build_or_pattern(of_words) + r'-?(?P<count>\d+))?',  # Episode 4
                 disabled=lambda context: context.get('type') == 'episode' or is_disabled(context, 'episode'))
    rebulk.regex(build_or_pattern(episode_words, name='episodeMarker') + r'-?(?P<episode>' + numeral + ')' +
                 r'(?:v(?P<version>\d+))?' +
                 r'(?:-?' + build_or_pattern(of_words) + r'-?(?P<count>\d+))?',  # Episode 4
                 validator={'episode': validate_roman},
                 formatter={'episode': parse_numeral},
                 disabled=lambda context: context.get('type') != 'episode' or is_disabled(context, 'episode'))
    rebulk.regex(r'S?(?P<season>\d+)-?(?:xE|Ex|E|x)-?(?P<other>' + build_or_pattern(all_words) + ')',
                 tags=['SxxExx'],
                 formatter={'other': lambda match: 'Complete'},
                 disabled=lambda context: is_disabled(context, 'season'))
    # 12, 13
    rebulk.chain(tags=['weak-episode'],
                 disabled=lambda context: context.get('type') == 'movie' or is_disabled(context, 'episode')) \
        .defaults(validator=None, tags=['weak-episode']) \
        .regex(r'(?P<episode>\d{2})') \
        .regex(r'v(?P<version>\d+)').repeater('?') \
        .regex(r'(?P<episodeSeparator>[x-])(?P<episode>\d{2})', abbreviations=None).repeater('*')
    # 012, 013
    rebulk.chain(tags=['weak-episode'],
                 disabled=lambda context: context.get('type') == 'movie' or is_disabled(context, 'episode')) \
        .defaults(validator=None, tags=['weak-episode']) \
        .regex(r'0(?P<episode>\d{1,2})') \
        .regex(r'v(?P<version>\d+)').repeater('?') \
        .regex(r'(?P<episodeSeparator>[x-])0(?P<episode>\d{1,2})', abbreviations=None).repeater('*')
    # 112, 113
    rebulk.chain(tags=['weak-episode'],
                 name='weak_episode',
                 disabled=lambda context: context.get('type') == 'movie' or is_disabled(context, 'episode')) \
        .defaults(validator=None, tags=['weak-episode'], name='weak_episode') \
        .regex(r'(?P<episode>\d{3,4})') \
        .regex(r'v(?P<version>\d+)').repeater('?') \
        .regex(r'(?P<episodeSeparator>[x-])(?P<episode>\d{3,4})', abbreviations=None).repeater('*')
    # 1, 2, 3
    rebulk.chain(tags=['weak-episode'],
                 disabled=lambda context: context.get('type') != 'episode' or is_disabled(context, 'episode')) \
        .defaults(validator=None, tags=['weak-episode']) \
        .regex(r'(?P<episode>\d)') \
        .regex(r'v(?P<version>\d+)').repeater('?') \
        .regex(r'(?P<episodeSeparator>[x-])(?P<episode>\d{1,2})', abbreviations=None).repeater('*')
    # e112, e113, 1e18, 3e19
    rebulk.chain(disabled=lambda context: is_disabled(context, 'episode')) \
        .defaults(validator=None) \
        .regex(r'(?P<season>\d{1,2})?(?P<episodeMarker>e)(?P<episode>\d{1,4})') \
        .regex(r'v(?P<version>\d+)').repeater('?') \
        .regex(r'(?P<episodeSeparator>e|x|-)(?P<episode>\d{1,4})', abbreviations=None).repeater('*')
    # ep 112, ep113, ep112, ep113
    rebulk.chain(disabled=lambda context: is_disabled(context, 'episode')) \
        .defaults(validator=None) \
        .regex(r'ep-?(?P<episode>\d{1,4})') \
        .regex(r'v(?P<version>\d+)').repeater('?') \
        .regex(r'(?P<episodeSeparator>ep|e|x|-)(?P<episode>\d{1,4})', abbreviations=None).repeater('*')
    # cap 112, cap 112_114
    rebulk.chain(tags=['see-pattern'],
                 disabled=is_season_episode_disabled) \
        .defaults(validator=None, tags=['see-pattern']) \
        .regex(r'(?P<seasonMarker>cap)-?(?P<season>\d{1,2})(?P<episode>\d{2})') \
        .regex(r'(?P<episodeSeparator>-)(?P<season>\d{1,2})(?P<episode>\d{2})').repeater('?')
    # 102, 0102
    rebulk.chain(tags=['weak-episode', 'weak-duplicate'],
                 name='weak_duplicate',
                 conflict_solver=season_episode_conflict_solver,
                 disabled=lambda context: (context.get('episode_prefer_number', False) or
                                           context.get('type') == 'movie') or is_season_episode_disabled(context)) \
        .defaults(tags=['weak-episode', 'weak-duplicate'],
                  name='weak_duplicate',
                  validator=None,
                  conflict_solver=season_episode_conflict_solver) \
        .regex(r'(?P<season>\d{1,2})(?P<episode>\d{2})') \
        .regex(r'v(?P<version>\d+)').repeater('?') \
        .regex(r'(?P<episodeSeparator>x|-)(?P<episode>\d{2})', abbreviations=None).repeater('*')
    rebulk.regex(r'v(?P<version>\d+)',
                 formatter=int,
                 disabled=lambda context: is_disabled(context, 'version'))
    rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator'])
    # TODO: List of words
    # detached of X count (season/episode)
    rebulk.regex(r'(?P<episode>\d+)-?' + build_or_pattern(of_words) +
                 r'-?(?P<count>\d+)-?' + build_or_pattern(episode_words) + '?',
                 formatter=int,
                 pre_match_processor=match_processors.strip,
                 disabled=lambda context: is_disabled(context, 'episode'))
    rebulk.regex(r'Minisodes?',
                 children=False,
                 private_parent=False,
                 name='episode_format',
                 value="Minisode",
                 disabled=lambda context: is_disabled(context, 'episode_format'))
    rebulk.rules(WeakConflictSolver, RemoveInvalidSeason, RemoveInvalidEpisode,
                 SeePatternRange(range_separators + ['_']),
                 EpisodeNumberSeparatorRange(range_separators),
                 SeasonSeparatorRange(range_separators), RemoveWeakIfMovie, RemoveWeakIfSxxExx, RemoveWeakDuplicate,
                 EpisodeDetailValidator, RemoveDetachedEpisodeNumber, VersionValidator, RemoveWeak(episode_words),
                 RenameToAbsoluteEpisode, CountValidator, EpisodeSingleDigitValidator, RenameToDiscMatch)
    return rebulk
 class WeakConflictSolver(Rule):
    """
    Rule to decide whether weak-episode or weak-duplicate matches should be kept.
    If an anime is detected:
        - weak-duplicate matches should be removed
        - weak-episode matches should be tagged as anime
    Otherwise:
        - weak-episode matches are removed unless they're part of an episode range match.
    """
    priority = 128
    consequence = [RemoveMatch, AppendMatch]
    def enabled(self, context):
        return context.get('type') != 'movie'
    @classmethod
    def is_anime(cls, matches):
        """Return True if it seems to be an anime.
        Anime characteristics:
            - version, crc32 matches
            - screen_size inside brackets
            - release_group at start and inside brackets
        """
        if matches.named('version') or matches.named('crc32'):
            return True
        for group in matches.markers.named('group'):
            if matches.range(group.start, group.end, predicate=lambda m: m.name == 'screen_size'):
                return True
            if matches.markers.starting(group.start, predicate=lambda m: m.name == 'path'):
                hole = matches.holes(group.start, group.end, index=0)
                if hole and hole.raw == group.raw:
                    return True
        return False
    def when(self, matches, context):
        to_remove = []
        to_append = []
        anime_detected = self.is_anime(matches)
        for filepart in matches.markers.named('path'):
            weak_matches = matches.range(filepart.start, filepart.end, predicate=(
                lambda m: m.initiator.name == 'weak_episode'))
            weak_dup_matches = matches.range(filepart.start, filepart.end, predicate=(
                lambda m: m.initiator.name == 'weak_duplicate'))
            if anime_detected:
                if weak_matches:
                    to_remove.extend(weak_dup_matches)
                    for match in matches.range(filepart.start, filepart.end, predicate=(
                            lambda m: m.name == 'episode' and m.initiator.name != 'weak_duplicate')):
                        episode = copy.copy(match)
                        episode.tags = episode.tags + ['anime']
                        to_append.append(episode)
                        to_remove.append(match)
            elif weak_dup_matches:
                episodes_in_range = matches.range(filepart.start, filepart.end, predicate=(
                    lambda m:
                    m.name == 'episode' and m.initiator.name == 'weak_episode'
                    and m.initiator.children.named('episodeSeparator')
                ))
                if not episodes_in_range and not matches.range(filepart.start, filepart.end,
                                                               predicate=lambda m: 'SxxExx' in m.tags):
                    to_remove.extend(weak_matches)
                else:
                    for match in episodes_in_range:
                        episode = copy.copy(match)
                        episode.tags = []
                        to_append.append(episode)
                        to_remove.append(match)
                if to_append:
                    to_remove.extend(weak_dup_matches)
        if to_remove or to_append:
            return to_remove, to_append
        return False
 class CountValidator(Rule):
    """
    Validate count property and rename it
    """
    priority = 64
    consequence = [RemoveMatch, RenameMatch('episode_count'), RenameMatch('season_count')]
    properties = {'episode_count': [None], 'season_count': [None]}
    def when(self, matches, context):
        to_remove = []
        episode_count = []
        season_count = []
        for count in matches.named('count'):
            previous = matches.previous(count, lambda match: match.name in ['episode', 'season'], 0)
            if previous:
                if previous.name == 'episode':
                    episode_count.append(count)
                elif previous.name == 'season':
                    season_count.append(count)
            else:
                to_remove.append(count)
        if to_remove or episode_count or season_count:
            return to_remove, episode_count, season_count
        return False
 class SeePatternRange(Rule):
    """
    Create matches for episode range for SEE pattern. E.g.: Cap.102_104
    """
    priority = 128
    consequence = [RemoveMatch, AppendMatch]
    def __init__(self, range_separators):
        super(SeePatternRange, self).__init__()
        self.range_separators = range_separators
    def when(self, matches, context):
        to_remove = []
        to_append = []
        for separator in matches.tagged('see-pattern', lambda m: m.name == 'episodeSeparator'):
            previous_match = matches.previous(separator, lambda m: m.name == 'episode' and 'see-pattern' in m.tags, 0)
            next_match = matches.next(separator, lambda m: m.name == 'season' and 'see-pattern' in m.tags, 0)
            if not next_match:
                continue
            next_match = matches.next(next_match, lambda m: m.name == 'episode' and 'see-pattern' in m.tags, 0)
            if previous_match and next_match and separator.value in self.range_separators:
                to_remove.append(next_match)
                for episode_number in range(previous_match.value + 1, next_match.value + 1):
                    match = copy.copy(next_match)
                    match.value = episode_number
                    to_append.append(match)
            to_remove.append(separator)
        if to_remove or to_append:
            return to_remove, to_append
        return False
 class AbstractSeparatorRange(Rule):
    """
    Remove separator matches and create matches for season range.
    """
    priority = 128
    consequence = [RemoveMatch, AppendMatch]
    def __init__(self, range_separators, property_name):
        super(AbstractSeparatorRange, self).__init__()
        self.range_separators = range_separators
        self.property_name = property_name
    def when(self, matches, context):
        to_remove = []
        to_append = []
        for separator in matches.named(self.property_name + 'Separator'):
            previous_match = matches.previous(separator, lambda m: m.name == self.property_name, 0)
            next_match = matches.next(separator, lambda m: m.name == self.property_name, 0)
            initiator = separator.initiator
            if previous_match and next_match and separator.value in self.range_separators:
                to_remove.append(next_match)
                for episode_number in range(previous_match.value + 1, next_match.value):
                    match = copy.copy(next_match)
                    match.value = episode_number
                    initiator.children.append(match)
                    to_append.append(match)
                to_append.append(next_match)
            to_remove.append(separator)
        previous_match = None
        for next_match in matches.named(self.property_name):
            if previous_match:
                separator = matches.input_string[previous_match.initiator.end:next_match.initiator.start]
                if separator not in self.range_separators:
                    separator = strip(separator)
                if separator in self.range_separators:
                    initiator = previous_match.initiator
                    for episode_number in range(previous_match.value + 1, next_match.value):
                        match = copy.copy(next_match)
                        match.value = episode_number
                        initiator.children.append(match)
                        to_append.append(match)
                    to_append.append(Match(previous_match.end, next_match.start - 1,
                                           name=self.property_name + 'Separator',
                                           private=True,
                                           input_string=matches.input_string))
                to_remove.append(next_match)  # Remove and append match to support proper ordering
                to_append.append(next_match)
            previous_match = next_match
        if to_remove or to_append:
            return to_remove, to_append
        return False
 class RenameToAbsoluteEpisode(Rule):
    """
    Rename episode to absolute_episodes.
    Absolute episodes are only used if two groups of episodes are detected:
        S02E04-06 25-27
        25-27 S02E04-06
        2x04-06  25-27
        28. Anime Name S02E05
    The matches in the group with higher episode values are renamed to absolute_episode.
    """
    consequence = RenameMatch('absolute_episode')
    def when(self, matches, context):  # pylint:disable=inconsistent-return-statements
        initiators = {match.initiator for match in matches.named('episode')
                      if len(match.initiator.children.named('episode')) > 1}
        if len(initiators) != 2:
            ret = []
            for filepart in matches.markers.named('path'):
                if matches.range(filepart.start + 1, filepart.end, predicate=lambda m: m.name == 'episode'):
                    ret.extend(
                        matches.starting(filepart.start, predicate=lambda m: m.initiator.name == 'weak_episode'))
            return ret
        initiators = sorted(initiators, key=lambda item: item.end)
        if not matches.holes(initiators[0].end, initiators[1].start, predicate=lambda m: m.raw.strip(seps)):
            first_range = matches.named('episode', predicate=lambda m: m.initiator == initiators[0])
            second_range = matches.named('episode', predicate=lambda m: m.initiator == initiators[1])
            if len(first_range) == len(second_range):
                if second_range[0].value > first_range[0].value:
                    return second_range
                if first_range[0].value > second_range[0].value:
                    return first_range
 class EpisodeNumberSeparatorRange(AbstractSeparatorRange):
    """
    Remove separator matches and create matches for episoderNumber range.
    """
    def __init__(self, range_separators):
        super(EpisodeNumberSeparatorRange, self).__init__(range_separators, "episode")
 class SeasonSeparatorRange(AbstractSeparatorRange):
    """
    Remove separator matches and create matches for season range.
    """
    def __init__(self, range_separators):
        super(SeasonSeparatorRange, self).__init__(range_separators, "season")
 class RemoveWeakIfMovie(Rule):
    """
    Remove weak-episode tagged matches if it seems to be a movie.
    """
    priority = 64
    consequence = RemoveMatch
    def enabled(self, context):
        return context.get('type') != 'episode'
    def when(self, matches, context):
        to_remove = []
        to_ignore = set()
        remove = False
        for filepart in matches.markers.named('path'):
            year = matches.range(filepart.start, filepart.end, predicate=lambda m: m.name == 'year', index=0)
            if year:
                remove = True
                next_match = matches.range(year.end, filepart.end, predicate=lambda m: m.private, index=0)
                if (next_match and not matches.holes(year.end, next_match.start, predicate=lambda m: m.raw.strip(seps))
                        and not matches.at_match(next_match, predicate=lambda m: m.name == 'year')):
                    to_ignore.add(next_match.initiator)
                to_ignore.update(matches.range(filepart.start, filepart.end,
                                               predicate=lambda m: len(m.children.named('episode')) > 1))
                to_remove.extend(matches.conflicting(year))
        if remove:
            to_remove.extend(matches.tagged('weak-episode', predicate=(
                lambda m: m.initiator not in to_ignore and 'anime' not in m.tags)))
        return to_remove
 class RemoveWeak(Rule):
    """
    Remove weak-episode matches which appears after video, source, and audio matches.
    """
    priority = 16
    consequence = RemoveMatch, AppendMatch
    def __init__(self, episode_words):
        super(RemoveWeak, self).__init__()
        self.episode_words = episode_words
    def when(self, matches, context):
        to_remove = []
        to_append = []
        for filepart in matches.markers.named('path'):
            weaks = matches.range(filepart.start, filepart.end, predicate=lambda m: 'weak-episode' in m.tags)
            if weaks:
                weak = weaks[0]
                previous = matches.previous(weak, predicate=lambda m: m.name in (
                    'audio_codec', 'screen_size', 'streaming_service', 'source', 'video_profile',
                    'audio_channels', 'audio_profile'), index=0)
                if previous and not matches.holes(
                        previous.end, weak.start, predicate=lambda m: m.raw.strip(seps)):
                    if previous.raw.lower() in self.episode_words:
                        try:
                            episode = copy.copy(weak)
                            episode.name = 'episode'
                            episode.value = int(weak.value)
                            episode.start = previous.start
                            episode.private = False
                            episode.tags = []
                            to_append.append(episode)
                        except ValueError:
                            pass
                    to_remove.extend(weaks)
        if to_remove or to_append:
            return to_remove, to_append
        return False
 class RemoveWeakIfSxxExx(Rule):
    """
    Remove weak-episode tagged matches if SxxExx pattern is matched.
    Weak episodes at beginning of filepart are kept.
    """
    priority = 64
    consequence = RemoveMatch
    def when(self, matches, context):
        to_remove = []
        for filepart in matches.markers.named('path'):
            if matches.range(filepart.start, filepart.end,
                             predicate=lambda m: not m.private and 'SxxExx' in m.tags):
                for match in matches.range(filepart.start, filepart.end, predicate=lambda m: 'weak-episode' in m.tags):
                    if match.start != filepart.start or match.initiator.name != 'weak_episode':
                        to_remove.append(match)
        return to_remove
 class RemoveInvalidSeason(Rule):
    """
    Remove invalid season matches.
    """
    priority = 64
    consequence = RemoveMatch
    def when(self, matches, context):
        to_remove = []
        for filepart in matches.markers.named('path'):
            strong_season = matches.range(filepart.start, filepart.end, index=0,
                                          predicate=lambda m: m.name == 'season'
                                          and not m.private and 'SxxExx' in m.tags)
            if strong_season:
                if strong_season.initiator.children.named('episode'):
                    for season in matches.range(strong_season.end, filepart.end,
                                                predicate=lambda m: m.name == 'season' and not m.private):
                        # remove weak season or seasons without episode matches
                        if 'SxxExx' not in season.tags or not season.initiator.children.named('episode'):
                            if season.initiator:
                                to_remove.append(season.initiator)
                                to_remove.extend(season.initiator.children)
                            else:
                                to_remove.append(season)
        return to_remove
 class RemoveInvalidEpisode(Rule):
    """
    Remove invalid episode matches.
    """
    priority = 64
    consequence = RemoveMatch
    def when(self, matches, context):
        to_remove = []
        for filepart in matches.markers.named('path'):
            strong_episode = matches.range(filepart.start, filepart.end, index=0,
                                           predicate=lambda m: m.name == 'episode'
                                           and not m.private and 'SxxExx' in m.tags)
            if strong_episode:
                strong_ep_marker = RemoveInvalidEpisode.get_episode_prefix(matches, strong_episode)
                for episode in matches.range(strong_episode.end, filepart.end,
                                             predicate=lambda m: m.name == 'episode' and not m.private):
                    ep_marker = RemoveInvalidEpisode.get_episode_prefix(matches, episode)
                    if strong_ep_marker and ep_marker and strong_ep_marker.value.lower() != ep_marker.value.lower():
                        if episode.initiator:
                            to_remove.append(episode.initiator)
                            to_remove.extend(episode.initiator.children)
                        else:
                            to_remove.append(ep_marker)
                            to_remove.append(episode)
        return to_remove
    @staticmethod
    def get_episode_prefix(matches, episode):
        """
        Return episode prefix: episodeMarker or episodeSeparator
        """
        return matches.previous(episode, index=0,
                                predicate=lambda m: m.name in ('episodeMarker', 'episodeSeparator'))
 class RemoveWeakDuplicate(Rule):
    """
    Remove weak-duplicate tagged matches if duplicate patterns, for example The 100.109
    """
    priority = 64
    consequence = RemoveMatch
    def when(self, matches, context):
        to_remove = []
        for filepart in matches.markers.named('path'):
            patterns = defaultdict(list)
            for match in reversed(matches.range(filepart.start, filepart.end,
                                                predicate=lambda m: 'weak-duplicate' in m.tags)):
                if match.pattern in patterns[match.name]:
                    to_remove.append(match)
                else:
                    patterns[match.name].append(match.pattern)
        return to_remove
 class EpisodeDetailValidator(Rule):
    """
    Validate episode_details if they are detached or next to season or episode.
    """
    priority = 64
    consequence = RemoveMatch
    def when(self, matches, context):
        ret = []
        for detail in matches.named('episode_details'):
            if not seps_surround(detail) \
                    and not matches.previous(detail, lambda match: match.name in ['season', 'episode']) \
                    and not matches.next(detail, lambda match: match.name in ['season', 'episode']):
                ret.append(detail)
        return ret
 class RemoveDetachedEpisodeNumber(Rule):
    """
    If multiple episode are found, remove those that are not detached from a range and less than 10.
    Fairy Tail 2 - 16-20, 2 should be removed.
    """
    priority = 64
    consequence = RemoveMatch
    dependency = [RemoveWeakIfSxxExx, RemoveWeakDuplicate]
    def when(self, matches, context):
        ret = []
        episode_numbers = []
        episode_values = set()
        for match in matches.named('episode', lambda m: not m.private and 'weak-episode' in m.tags):
            if match.value not in episode_values:
                episode_numbers.append(match)
                episode_values.add(match.value)
        episode_numbers = list(sorted(episode_numbers, key=lambda m: m.value))
        if len(episode_numbers) > 1 and \
                episode_numbers[0].value < 10 and \
                episode_numbers[1].value - episode_numbers[0].value != 1:
            parent = episode_numbers[0]
            while parent:  # TODO: Add a feature in rebulk to avoid this ...
                ret.append(parent)
                parent = parent.parent
        return ret
 class VersionValidator(Rule):
    """
    Validate version if previous match is episode or if surrounded by separators.
    """
    priority = 64
    dependency = [RemoveWeakIfMovie, RemoveWeakIfSxxExx]
    consequence = RemoveMatch
    def when(self, matches, context):
        ret = []
        for version in matches.named('version'):
            episode_number = matches.previous(version, lambda match: match.name == 'episode', 0)
            if not episode_number and not seps_surround(version.initiator):
                ret.append(version)
        return ret
 class EpisodeSingleDigitValidator(Rule):
    """
    Remove single digit episode when inside a group that doesn't own title.
    """
    dependency = [TitleFromPosition]
    consequence = RemoveMatch
    def when(self, matches, context):
        ret = []
        for episode in matches.named('episode', lambda match: len(match.initiator) == 1):
            group = matches.markers.at_match(episode, lambda marker: marker.name == 'group', index=0)
            if group:
                if not matches.range(*group.span, predicate=lambda match: match.name == 'title'):
                    ret.append(episode)
        return ret
 class RenameToDiscMatch(Rule):
    """
    Rename episodes detected with `d` episodeMarkers to `disc`.
    """
    consequence = [RenameMatch('disc'), RenameMatch('discMarker'), RemoveMatch]
    def when(self, matches, context):
        discs = []
        markers = []
        to_remove = []
        disc_disabled = is_disabled(context, 'disc')
        for marker in matches.named('episodeMarker', predicate=lambda m: m.value.lower() == 'd'):
            if disc_disabled:
                to_remove.append(marker)
                to_remove.extend(marker.initiator.children)
                continue
            markers.append(marker)
            discs.extend(sorted(marker.initiator.children.named('episode'), key=lambda m: m.value))
        if discs or markers or to_remove:
            return discs, markers, to_remove
        return False
@@ -1,48 +0,0 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 """
 film property
 """
 from rebulk import Rebulk, AppendMatch, Rule
 from rebulk.remodule import re
 from ..common.formatters import cleanup
 from ..common.pattern import is_disabled
 from ..common.validators import seps_surround
 def film(config):  # pylint:disable=unused-argument
    """
    Builder for rebulk object.
    :return: Created Rebulk object
    :rtype: Rebulk
    """
    rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE, validate_all=True, validator={'__parent__': seps_surround})
    rebulk.regex(r'f(\d{1,2})', name='film', private_parent=True, children=True, formatter=int,
                 disabled=lambda context: is_disabled(context, 'film'))
    rebulk.rules(FilmTitleRule)
    return rebulk
 class FilmTitleRule(Rule):
    """
    Rule to find out film_title (hole after film property
    """
    consequence = AppendMatch
    properties = {'film_title': [None]}
    def enabled(self, context):
        return not is_disabled(context, 'film_title')
    def when(self, matches, context):  # pylint:disable=inconsistent-return-statements
        bonus_number = matches.named('film', lambda match: not match.private, index=0)
        if bonus_number:
            filepath = matches.markers.at_match(bonus_number, lambda marker: marker.name == 'path', 0)
            hole = matches.holes(filepath.start, bonus_number.start + 1, formatter=cleanup, index=0)
            if hole and hole.value:
                hole.name = 'film_title'
                return hole
@@ -1,510 +0,0 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 """
 language and subtitle_language properties
 """
 # pylint: disable=no-member
 import copy
 from collections import defaultdict, namedtuple
 import babelfish
 from rebulk import Rebulk, Rule, RemoveMatch, RenameMatch
 from rebulk.remodule import re
 from ..common import seps
 from ..common.pattern import is_disabled
 from ..common.words import iter_words
 from ..common.validators import seps_surround
 def language(config, common_words):
    """
    Builder for rebulk object.
    :param config: rule configuration
    :type config: dict
    :param common_words: common words
    :type common_words: set
    :return: Created Rebulk object
    :rtype: Rebulk
    """
    subtitle_both = config['subtitle_affixes']
    subtitle_prefixes = sorted(subtitle_both + config['subtitle_prefixes'], key=length_comparator)
    subtitle_suffixes = sorted(subtitle_both + config['subtitle_suffixes'], key=length_comparator)
    lang_both = config['language_affixes']
    lang_prefixes = sorted(lang_both + config['language_prefixes'], key=length_comparator)
    lang_suffixes = sorted(lang_both + config['language_suffixes'], key=length_comparator)
    weak_affixes = frozenset(config['weak_affixes'])
    rebulk = Rebulk(disabled=lambda context: (is_disabled(context, 'language') and
                                              is_disabled(context, 'subtitle_language')))
    rebulk.string(*subtitle_prefixes, name="subtitle_language.prefix", ignore_case=True, private=True,
                  validator=seps_surround, tags=['release-group-prefix'],
                  disabled=lambda context: is_disabled(context, 'subtitle_language'))
    rebulk.string(*subtitle_suffixes, name="subtitle_language.suffix", ignore_case=True, private=True,
                  validator=seps_surround,
                  disabled=lambda context: is_disabled(context, 'subtitle_language'))
    rebulk.string(*lang_suffixes, name="language.suffix", ignore_case=True, private=True,
                  validator=seps_surround, tags=['source-suffix'],
                  disabled=lambda context: is_disabled(context, 'language'))
    def find_languages(string, context=None):
        """Find languages in the string
        :return: list of tuple (property, Language, lang_word, word)
        """
        return LanguageFinder(context, subtitle_prefixes, subtitle_suffixes,
                              lang_prefixes, lang_suffixes, weak_affixes).find(string)
    rebulk.functional(find_languages,
                      properties={'language': [None]},
                      disabled=lambda context: not context.get('allowed_languages'))
    rebulk.rules(SubtitleExtensionRule,
                 SubtitlePrefixLanguageRule,
                 SubtitleSuffixLanguageRule,
                 RemoveLanguage,
                 RemoveInvalidLanguages(common_words))
    babelfish.language_converters['guessit'] = GuessitConverter(config['synonyms'])
    return rebulk
 UNDETERMINED = babelfish.Language('und')
 MULTIPLE = babelfish.Language('mul')
 NON_SPECIFIC_LANGUAGES = frozenset([UNDETERMINED, MULTIPLE])
 class GuessitConverter(babelfish.LanguageReverseConverter):  # pylint: disable=missing-docstring
    _with_country_regexp = re.compile(r'(.*)\((.*)\)')
    _with_country_regexp2 = re.compile(r'(.*)-(.*)')
    def __init__(self, synonyms):
        self.guessit_exceptions = {}
        for code, synlist in synonyms.items():
            if '_' in code:
                (alpha3, country) = code.split('_')
            else:
                (alpha3, country) = (code, None)
            for syn in synlist:
                self.guessit_exceptions[syn.lower()] = (alpha3, country, None)
    @property
    def codes(self):  # pylint: disable=missing-docstring
        return (babelfish.language_converters['alpha3b'].codes |
                babelfish.language_converters['alpha2'].codes |
                babelfish.language_converters['name'].codes |
                babelfish.language_converters['opensubtitles'].codes |
                babelfish.country_converters['name'].codes |
                frozenset(self.guessit_exceptions.keys()))
    def convert(self, alpha3, country=None, script=None):
        return str(babelfish.Language(alpha3, country, script))
    def reverse(self, name):  # pylint:disable=arguments-differ
        name = name.lower()
        # exceptions come first, as they need to override a potential match
        # with any of the other guessers
        try:
            return self.guessit_exceptions[name]
        except KeyError:
            pass
        for conv in [babelfish.Language,
                     babelfish.Language.fromalpha3b,
                     babelfish.Language.fromalpha2,
                     babelfish.Language.fromname,
                     babelfish.Language.fromopensubtitles,
                     babelfish.Language.fromietf]:
            try:
                reverse = conv(name)
                return reverse.alpha3, reverse.country, reverse.script
            except (ValueError, babelfish.LanguageReverseError):
                pass
        raise babelfish.LanguageReverseError(name)
 def length_comparator(value):
    """
    Return value length.
    """
    return len(value)
 _LanguageMatch = namedtuple('_LanguageMatch', ['property_name', 'word', 'lang'])
 class LanguageWord(object):
    """
    Extension to the Word namedtuple in order to create compound words.
    E.g.: pt-BR, soft subtitles, custom subs
    """
    def __init__(self, start, end, value, input_string, next_word=None):
        self.start = start
        self.end = end
        self.value = value
        self.input_string = input_string
        self.next_word = next_word
    @property
    def extended_word(self):  # pylint:disable=inconsistent-return-statements
        """
        Return the extended word for this instance, if any.
        """
        if self.next_word:
            separator = self.input_string[self.end:self.next_word.start]
            next_separator = self.input_string[self.next_word.end:self.next_word.end + 1]
            if (separator == '-' and separator != next_separator) or separator in (' ', '.'):
                value = self.input_string[self.start:self.next_word.end].replace('.', ' ')
                return LanguageWord(self.start, self.next_word.end, value, self.input_string, self.next_word.next_word)
    def __repr__(self):
        return '<({start},{end}): {value}'.format(start=self.start, end=self.end, value=self.value)
 def to_rebulk_match(language_match):
    """
    Convert language match to rebulk Match: start, end, dict
    """
    word = language_match.word
    start = word.start
    end = word.end
    name = language_match.property_name
    if language_match.lang == UNDETERMINED:
        return start, end, {
            'name': name,
            'value': word.value.lower(),
            'formatter': babelfish.Language,
            'tags': ['weak-language']
        }
    return start, end, {
        'name': name,
        'value': language_match.lang
    }
 class LanguageFinder(object):
    """
    Helper class to search and return language matches: 'language' and 'subtitle_language' properties
    """
    def __init__(self, context,
                 subtitle_prefixes, subtitle_suffixes,
                 lang_prefixes, lang_suffixes, weak_affixes):
        allowed_languages = context.get('allowed_languages') if context else None
        self.allowed_languages = {l.lower() for l in allowed_languages or []}
        self.weak_affixes = weak_affixes
        self.prefixes_map = {}
        self.suffixes_map = {}
        if not is_disabled(context, 'subtitle_language'):
            self.prefixes_map['subtitle_language'] = subtitle_prefixes
            self.suffixes_map['subtitle_language'] = subtitle_suffixes
        self.prefixes_map['language'] = lang_prefixes
        self.suffixes_map['language'] = lang_suffixes
    def find(self, string):
        """
        Return all matches for language and subtitle_language.
        Undetermined language matches are removed if a regular language is found.
        Multi language matches are removed if there are only undetermined language matches
        """
        regular_lang_map = defaultdict(set)
        undetermined_map = defaultdict(set)
        multi_map = defaultdict(set)
        for match in self.iter_language_matches(string):
            key = match.property_name
            if match.lang == UNDETERMINED:
                undetermined_map[key].add(match)
            elif match.lang == 'mul':
                multi_map[key].add(match)
            else:
                regular_lang_map[key].add(match)
        for key, values in multi_map.items():
            if key in regular_lang_map or key not in undetermined_map:
                for value in values:
                    yield to_rebulk_match(value)
        for key, values in undetermined_map.items():
            if key not in regular_lang_map:
                for value in values:
                    yield to_rebulk_match(value)
        for values in regular_lang_map.values():
            for value in values:
                yield to_rebulk_match(value)
    def iter_language_matches(self, string):
        """
        Return language matches for the given string.
        """
        candidates = []
        previous = None
        for word in iter_words(string):
            language_word = LanguageWord(start=word.span[0], end=word.span[1], value=word.value, input_string=string)
            if previous:
                previous.next_word = language_word
                candidates.append(previous)
            previous = language_word
        if previous:
            candidates.append(previous)
        for candidate in candidates:
            for match in self.iter_matches_for_candidate(candidate):
                yield match
    def iter_matches_for_candidate(self, language_word):
        """
        Return language matches for the given candidate word.
        """
        tuples = [
            (language_word, language_word.next_word,
             self.prefixes_map,
             lambda string, prefix: string.startswith(prefix),
             lambda string, prefix: string[len(prefix):]),
            (language_word.next_word, language_word,
             self.suffixes_map,
             lambda string, suffix: string.endswith(suffix),
             lambda string, suffix: string[:len(string) - len(suffix)])
        ]
        for word, fallback_word, affixes, is_affix, strip_affix in tuples:
            if not word:
                continue
            match = self.find_match_for_word(word, fallback_word, affixes, is_affix, strip_affix)
            if match:
                yield match
        match = self.find_language_match_for_word(language_word)
        if match:
            yield match
    def find_match_for_word(self, word, fallback_word, affixes, is_affix, strip_affix):  # pylint:disable=inconsistent-return-statements
        """
        Return the language match for the given word and affixes.
        """
        for current_word in (word.extended_word, word):
            if not current_word:
                continue
            word_lang = current_word.value.lower()
            for key, parts in affixes.items():
                for part in parts:
                    if not is_affix(word_lang, part):
                        continue
                    match = None
                    value = strip_affix(word_lang, part)
                    if not value:
                        if fallback_word and (
                                abs(fallback_word.start - word.end) <= 1 or abs(word.start - fallback_word.end) <= 1):
                            match = self.find_language_match_for_word(fallback_word, key=key)
                        if not match and part not in self.weak_affixes:
                            match = self.create_language_match(key, LanguageWord(current_word.start, current_word.end,
                                                                                 'und', current_word.input_string))
                    else:
                        match = self.create_language_match(key, LanguageWord(current_word.start, current_word.end,
                                                                             value, current_word.input_string))
                    if match:
                        return match
    def find_language_match_for_word(self, word, key='language'):  # pylint:disable=inconsistent-return-statements
        """
        Return the language match for the given word.
        """
        for current_word in (word.extended_word, word):
            if current_word:
                match = self.create_language_match(key, current_word)
                if match:
                    return match
    def create_language_match(self, key, word):  # pylint:disable=inconsistent-return-statements
        """
        Create a LanguageMatch for a given word
        """
        lang = self.parse_language(word.value.lower())
        if lang is not None:
            return _LanguageMatch(property_name=key, word=word, lang=lang)
    def parse_language(self, lang_word):  # pylint:disable=inconsistent-return-statements
        """
        Parse the lang_word into a valid Language.
        Multi and Undetermined languages are also valid languages.
        """
        try:
            lang = babelfish.Language.fromguessit(lang_word)
            if ((hasattr(lang, 'name') and lang.name.lower() in self.allowed_languages) or
                    (hasattr(lang, 'alpha2') and lang.alpha2.lower() in self.allowed_languages) or
                    lang.alpha3.lower() in self.allowed_languages):
                return lang
        except babelfish.Error:
            pass
 class SubtitlePrefixLanguageRule(Rule):
    """
    Convert language guess as subtitle_language if previous match is a subtitle language prefix
    """
    consequence = RemoveMatch
    properties = {'subtitle_language': [None]}
    def enabled(self, context):
        return not is_disabled(context, 'subtitle_language')
    def when(self, matches, context):
        to_rename = []
        to_remove = matches.named('subtitle_language.prefix')
        for lang in matches.named('language'):
            prefix = matches.previous(lang, lambda match: match.name == 'subtitle_language.prefix', 0)
            if not prefix:
                group_marker = matches.markers.at_match(lang, lambda marker: marker.name == 'group', 0)
                if group_marker:
                    # Find prefix if placed just before the group
                    prefix = matches.previous(group_marker, lambda match: match.name == 'subtitle_language.prefix',
                                              0)
                    if not prefix:
                        # Find prefix if placed before in the group
                        prefix = matches.range(group_marker.start, lang.start,
                                               lambda match: match.name == 'subtitle_language.prefix', 0)
            if prefix:
                to_rename.append((prefix, lang))
                to_remove.extend(matches.conflicting(lang))
                if prefix in to_remove:
                    to_remove.remove(prefix)
        if to_rename or to_remove:
            return to_rename, to_remove
        return False
    def then(self, matches, when_response, context):
        to_rename, to_remove = when_response
        super(SubtitlePrefixLanguageRule, self).then(matches, to_remove, context)
        for prefix, match in to_rename:
            # Remove suffix equivalent of  prefix.
            suffix = copy.copy(prefix)
            suffix.name = 'subtitle_language.suffix'
            if suffix in matches:
                matches.remove(suffix)
            matches.remove(match)
            match.name = 'subtitle_language'
            matches.append(match)
 class SubtitleSuffixLanguageRule(Rule):
    """
    Convert language guess as subtitle_language if next match is a subtitle language suffix
    """
    dependency = SubtitlePrefixLanguageRule
    consequence = RemoveMatch
    properties = {'subtitle_language': [None]}
    def enabled(self, context):
        return not is_disabled(context, 'subtitle_language')
    def when(self, matches, context):
        to_append = []
        to_remove = matches.named('subtitle_language.suffix')
        for lang in matches.named('language'):
            suffix = matches.next(lang, lambda match: match.name == 'subtitle_language.suffix', 0)
            if suffix:
                to_append.append(lang)
                if suffix in to_remove:
                    to_remove.remove(suffix)
        if to_append or to_remove:
            return to_append, to_remove
        return False
    def then(self, matches, when_response, context):
        to_rename, to_remove = when_response
        super(SubtitleSuffixLanguageRule, self).then(matches, to_remove, context)
        for match in to_rename:
            matches.remove(match)
            match.name = 'subtitle_language'
            matches.append(match)
 class SubtitleExtensionRule(Rule):
    """
    Convert language guess as subtitle_language if next match is a subtitle extension.
    Since it's a strong match, it also removes any conflicting source with it.
    """
    consequence = [RemoveMatch, RenameMatch('subtitle_language')]
    properties = {'subtitle_language': [None]}
    def enabled(self, context):
        return not is_disabled(context, 'subtitle_language')
    def when(self, matches, context):  # pylint:disable=inconsistent-return-statements
        subtitle_extension = matches.named('container',
                                           lambda match: 'extension' in match.tags and 'subtitle' in match.tags,
                                           0)
        if subtitle_extension:
            subtitle_lang = matches.previous(subtitle_extension, lambda match: match.name == 'language', 0)
            if subtitle_lang:
                for weak in matches.named('subtitle_language', predicate=lambda m: 'weak-language' in m.tags):
                    weak.private = True
                return matches.conflicting(subtitle_lang, lambda m: m.name == 'source'), subtitle_lang
 class RemoveLanguage(Rule):
    """Remove language matches that were not converted to subtitle_language when language is disabled."""
    consequence = RemoveMatch
    def enabled(self, context):
        return is_disabled(context, 'language')
    def when(self, matches, context):
        return matches.named('language')
 class RemoveInvalidLanguages(Rule):
    """Remove language matches that matches the blacklisted common words."""
    consequence = RemoveMatch
    priority = 32
    def __init__(self, common_words):
        """Constructor."""
        super(RemoveInvalidLanguages, self).__init__()
        self.common_words = common_words
    def when(self, matches, context):
        to_remove = []
        for match in matches.range(0, len(matches.input_string),
                                   predicate=lambda m: m.name in ('language', 'subtitle_language')):
            if match.raw.lower() not in self.common_words:
                continue
            group = matches.markers.at_match(match, index=0, predicate=lambda m: m.name == 'group')
            if group and (
                    not matches.range(
                        group.start, group.end, predicate=lambda m: m.name not in ('language', 'subtitle_language')
                    ) and (not matches.holes(group.start, group.end, predicate=lambda m: m.value.strip(seps)))):
                continue
            to_remove.append(match)
        return to_remove
@@ -1,55 +0,0 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 """
 mimetype property
 """
 import mimetypes
 from rebulk import Rebulk, CustomRule, POST_PROCESS
 from rebulk.match import Match
 from ..common.pattern import is_disabled
 from ...rules.processors import Processors
 def mimetype(config):  # pylint:disable=unused-argument
    """
    Builder for rebulk object.
    :param config: rule configuration
    :type config: dict
    :return: Created Rebulk object
    :rtype: Rebulk
    """
    rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'mimetype'))
    rebulk.rules(Mimetype)
    return rebulk
 class Mimetype(CustomRule):
    """
    Mimetype post processor
    :param matches:
    :type matches:
    :return:
    :rtype:
    """
    priority = POST_PROCESS
    dependency = Processors
    def when(self, matches, context):
        mime, _ = mimetypes.guess_type(matches.input_string, strict=False)
        return mime
    def then(self, matches, when_response, context):
        mime = when_response
        matches.append(Match(len(matches.input_string), len(matches.input_string), name='mimetype', value=mime))
    @property
    def properties(self):
        """
        Properties for this rule.
        """
        return {'mimetype': [None]}
@@ -1,383 +0,0 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 """
 other property
 """
 import copy
 from rebulk import Rebulk, Rule, RemoveMatch, RenameMatch, POST_PROCESS, AppendMatch
 from rebulk.remodule import re
 from ..common import dash
 from ..common import seps
 from ..common.pattern import is_disabled
 from ..common.validators import seps_after, seps_before, seps_surround, and_
 from ...reutils import build_or_pattern
 from ...rules.common.formatters import raw_cleanup
 def other(config):  # pylint:disable=unused-argument,too-many-statements
    """
    Builder for rebulk object.
    :param config: rule configuration
    :type config: dict
    :return: Created Rebulk object
    :rtype: Rebulk
    """
    rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'other'))
    rebulk = rebulk.regex_defaults(flags=re.IGNORECASE, abbreviations=[dash]).string_defaults(ignore_case=True)
    rebulk.defaults(name="other", validator=seps_surround)
    rebulk.regex('Audio-?Fix', 'Audio-?Fixed', value='Audio Fixed')
    rebulk.regex('Sync-?Fix', 'Sync-?Fixed', value='Sync Fixed')
    rebulk.regex('Dual', 'Dual-?Audio', value='Dual Audio')
    rebulk.regex('ws', 'wide-?screen', value='Widescreen')
    rebulk.regex('Re-?Enc(?:oded)?', value='Reencoded')
    rebulk.string('Repack', 'Rerip', value='Proper',
                  tags=['streaming_service.prefix', 'streaming_service.suffix'])
    rebulk.string('Proper', value='Proper',
                  tags=['has-neighbor', 'streaming_service.prefix', 'streaming_service.suffix'])
    rebulk.regex('Real-Proper', 'Real-Repack', 'Real-Rerip', value='Proper',
                 tags=['streaming_service.prefix', 'streaming_service.suffix', 'real'])
    rebulk.regex('Real', value='Proper',
                 tags=['has-neighbor', 'streaming_service.prefix', 'streaming_service.suffix', 'real'])
    rebulk.string('Fix', 'Fixed', value='Fix', tags=['has-neighbor-before', 'has-neighbor-after',
                                                     'streaming_service.prefix', 'streaming_service.suffix'])
    rebulk.string('Dirfix', 'Nfofix', 'Prooffix', value='Fix',
                  tags=['streaming_service.prefix', 'streaming_service.suffix'])
    rebulk.regex('(?:Proof-?)?Sample-?Fix', value='Fix',
                 tags=['streaming_service.prefix', 'streaming_service.suffix'])
    rebulk.string('Fansub', value='Fan Subtitled', tags='has-neighbor')
    rebulk.string('Fastsub', value='Fast Subtitled', tags='has-neighbor')
    season_words = build_or_pattern(["seasons?", "series?"])
    complete_articles = build_or_pattern(["The"])
    def validate_complete(match):
        """
        Make sure season word is are defined.
        :param match:
        :type match:
        :return:
        :rtype:
        """
        children = match.children
        if not children.named('completeWordsBefore') and not children.named('completeWordsAfter'):
            return False
        return True
    rebulk.regex('(?P<completeArticle>' + complete_articles + '-)?' +
                 '(?P<completeWordsBefore>' + season_words + '-)?' +
                 'Complete' + '(?P<completeWordsAfter>-' + season_words + ')?',
                 private_names=['completeArticle', 'completeWordsBefore', 'completeWordsAfter'],
                 value={'other': 'Complete'},
                 tags=['release-group-prefix'],
                 validator={'__parent__': and_(seps_surround, validate_complete)})
    rebulk.string('R5', value='Region 5')
    rebulk.string('RC', value='Region C')
    rebulk.regex('Pre-?Air', value='Preair')
    rebulk.regex('(?:PS-?)Vita', value='PS Vita')
    rebulk.regex('Vita', value='PS Vita', tags='has-neighbor')
    rebulk.regex('(HD)(?P<another>Rip)', value={'other': 'HD', 'another': 'Rip'},
                 private_parent=True, children=True, validator={'__parent__': seps_surround}, validate_all=True)
    for value in ('Screener', 'Remux', 'PAL', 'SECAM', 'NTSC', 'XXX'):
        rebulk.string(value, value=value)
    rebulk.string('3D', value='3D', tags='has-neighbor')
    rebulk.string('HQ', value='High Quality', tags='uhdbluray-neighbor')
    rebulk.string('HR', value='High Resolution')
    rebulk.string('LD', value='Line Dubbed')
    rebulk.string('MD', value='Mic Dubbed')
    rebulk.string('mHD', 'HDLight', value='Micro HD')
    rebulk.string('LDTV', value='Low Definition')
    rebulk.string('HFR', value='High Frame Rate')
    rebulk.string('VFR', value='Variable Frame Rate')
    rebulk.string('HD', value='HD', validator=None,
                  tags=['streaming_service.prefix', 'streaming_service.suffix'])
    rebulk.regex('Full-?HD', 'FHD', value='Full HD', validator=None,
                 tags=['streaming_service.prefix', 'streaming_service.suffix'])
    rebulk.regex('Ultra-?(?:HD)?', 'UHD', value='Ultra HD', validator=None,
                 tags=['streaming_service.prefix', 'streaming_service.suffix'])
    rebulk.regex('Upscaled?', value='Upscaled')
    for value in ('Complete', 'Classic', 'Bonus', 'Trailer', 'Retail',
                  'Colorized', 'Internal'):
        rebulk.string(value, value=value, tags=['has-neighbor', 'release-group-prefix'])
    rebulk.regex('LiNE', value='Line Audio', tags=['has-neighbor-before', 'has-neighbor-after', 'release-group-prefix'])
    rebulk.regex('Read-?NFO', value='Read NFO')
    rebulk.string('CONVERT', value='Converted', tags='has-neighbor')
    rebulk.string('DOCU', 'DOKU', value='Documentary', tags='has-neighbor')
    rebulk.string('OM', value='Open Matte', tags='has-neighbor')
    rebulk.string('STV', value='Straight to Video', tags='has-neighbor')
    rebulk.string('OAR', value='Original Aspect Ratio', tags='has-neighbor')
    rebulk.string('Complet', value='Complete', tags=['has-neighbor', 'release-group-prefix'])
    for coast in ('East', 'West'):
        rebulk.regex(r'(?:Live-)?(?:Episode-)?' + coast + '-?(?:Coast-)?Feed', value=coast + ' Coast Feed')
    rebulk.string('VO', 'OV', value='Original Video', tags='has-neighbor')
    rebulk.string('Ova', 'Oav', value='Original Animated Video')
    rebulk.regex('Scr(?:eener)?', value='Screener', validator=None,
                 tags=['other.validate.screener', 'source-prefix', 'source-suffix'])
    rebulk.string('Mux', value='Mux', validator=seps_after,
                  tags=['other.validate.mux', 'video-codec-prefix', 'source-suffix'])
    rebulk.string('HC', 'vost', value='Hardcoded Subtitles')
    rebulk.string('SDR', value='Standard Dynamic Range', tags='uhdbluray-neighbor')
    rebulk.regex('HDR(?:10)?', value='HDR10', tags='uhdbluray-neighbor')
    rebulk.regex('Dolby-?Vision', value='Dolby Vision', tags='uhdbluray-neighbor')
    rebulk.regex('BT-?2020', value='BT.2020', tags='uhdbluray-neighbor')
    rebulk.string('Sample', value='Sample', tags=['at-end', 'not-a-release-group'])
    rebulk.string('Extras', value='Extras', tags='has-neighbor')
    rebulk.regex('Digital-?Extras?', value='Extras')
    rebulk.string('Proof', value='Proof', tags=['at-end', 'not-a-release-group'])
    rebulk.string('Obfuscated', 'Scrambled', value='Obfuscated', tags=['at-end', 'not-a-release-group'])
    rebulk.string('xpost', 'postbot', 'asrequested', value='Repost', tags='not-a-release-group')
    rebulk.rules(RenameAnotherToOther, ValidateHasNeighbor, ValidateHasNeighborAfter, ValidateHasNeighborBefore,
                 ValidateScreenerRule, ValidateMuxRule, ValidateHardcodedSubs, ValidateStreamingServiceNeighbor,
                 ValidateAtEnd, ValidateReal, ProperCountRule)
    return rebulk
 class ProperCountRule(Rule):
    """
    Add proper_count property
    """
    priority = POST_PROCESS
    consequence = AppendMatch
    properties = {'proper_count': [None]}
    def when(self, matches, context):  # pylint:disable=inconsistent-return-statements
        propers = matches.named('other', lambda match: match.value == 'Proper')
        if propers:
            raws = {}  # Count distinct raw values
            for proper in propers:
                raws[raw_cleanup(proper.raw)] = proper
            proper_count_match = copy.copy(propers[-1])
            proper_count_match.name = 'proper_count'
            value = 0
            for raw in raws.values():
                value += 2 if 'real' in raw.tags else 1
            proper_count_match.value = value
            return proper_count_match
 class RenameAnotherToOther(Rule):
    """
    Rename `another` properties to `other`
    """
    priority = 32
    consequence = RenameMatch('other')
    def when(self, matches, context):
        return matches.named('another')
 class ValidateHasNeighbor(Rule):
    """
    Validate tag has-neighbor
    """
    consequence = RemoveMatch
    priority = 64
    def when(self, matches, context):
        ret = []
        for to_check in matches.range(predicate=lambda match: 'has-neighbor' in match.tags):
            previous_match = matches.previous(to_check, index=0)
            previous_group = matches.markers.previous(to_check, lambda marker: marker.name == 'group', 0)
            if previous_group and (not previous_match or previous_group.end > previous_match.end):
                previous_match = previous_group
            if previous_match and not matches.input_string[previous_match.end:to_check.start].strip(seps):
                break
            next_match = matches.next(to_check, index=0)
            next_group = matches.markers.next(to_check, lambda marker: marker.name == 'group', 0)
            if next_group and (not next_match or next_group.start < next_match.start):
                next_match = next_group
            if next_match and not matches.input_string[to_check.end:next_match.start].strip(seps):
                break
            ret.append(to_check)
        return ret
 class ValidateHasNeighborBefore(Rule):
    """
    Validate tag has-neighbor-before that previous match exists.
    """
    consequence = RemoveMatch
    priority = 64
    def when(self, matches, context):
        ret = []
        for to_check in matches.range(predicate=lambda match: 'has-neighbor-before' in match.tags):
            next_match = matches.next(to_check, index=0)
            next_group = matches.markers.next(to_check, lambda marker: marker.name == 'group', 0)
            if next_group and (not next_match or next_group.start < next_match.start):
                next_match = next_group
            if next_match and not matches.input_string[to_check.end:next_match.start].strip(seps):
                break
            ret.append(to_check)
        return ret
 class ValidateHasNeighborAfter(Rule):
    """
    Validate tag has-neighbor-after that next match exists.
    """
    consequence = RemoveMatch
    priority = 64
    def when(self, matches, context):
        ret = []
        for to_check in matches.range(predicate=lambda match: 'has-neighbor-after' in match.tags):
            previous_match = matches.previous(to_check, index=0)
            previous_group = matches.markers.previous(to_check, lambda marker: marker.name == 'group', 0)
            if previous_group and (not previous_match or previous_group.end > previous_match.end):
                previous_match = previous_group
            if previous_match and not matches.input_string[previous_match.end:to_check.start].strip(seps):
                break
            ret.append(to_check)
        return ret
 class ValidateScreenerRule(Rule):
    """
    Validate tag other.validate.screener
    """
    consequence = RemoveMatch
    priority = 64
    def when(self, matches, context):
        ret = []
        for screener in matches.named('other', lambda match: 'other.validate.screener' in match.tags):
            source_match = matches.previous(screener, lambda match: match.initiator.name == 'source', 0)
            if not source_match or matches.input_string[source_match.end:screener.start].strip(seps):
                ret.append(screener)
        return ret
 class ValidateMuxRule(Rule):
    """
    Validate tag other.validate.mux
    """
    consequence = RemoveMatch
    priority = 64
    def when(self, matches, context):
        ret = []
        for mux in matches.named('other', lambda match: 'other.validate.mux' in match.tags):
            source_match = matches.previous(mux, lambda match: match.initiator.name == 'source', 0)
            if not source_match:
                ret.append(mux)
        return ret
 class ValidateHardcodedSubs(Rule):
    """Validate HC matches."""
    priority = 32
    consequence = RemoveMatch
    def when(self, matches, context):
        to_remove = []
        for hc_match in matches.named('other', predicate=lambda match: match.value == 'Hardcoded Subtitles'):
            next_match = matches.next(hc_match, predicate=lambda match: match.name == 'subtitle_language', index=0)
            if next_match and not matches.holes(hc_match.end, next_match.start,
                                                predicate=lambda match: match.value.strip(seps)):
                continue
            previous_match = matches.previous(hc_match,
                                              predicate=lambda match: match.name == 'subtitle_language', index=0)
            if previous_match and not matches.holes(previous_match.end, hc_match.start,
                                                    predicate=lambda match: match.value.strip(seps)):
                continue
            to_remove.append(hc_match)
        return to_remove
 class ValidateStreamingServiceNeighbor(Rule):
    """Validate streaming service's neighbors."""
    priority = 32
    consequence = RemoveMatch
    def when(self, matches, context):
        to_remove = []
        for match in matches.named('other',
                                   predicate=lambda m: (m.initiator.name != 'source'
                                                        and ('streaming_service.prefix' in m.tags
                                                             or 'streaming_service.suffix' in m.tags))):
            match = match.initiator
            if not seps_after(match):
                if 'streaming_service.prefix' in match.tags:
                    next_match = matches.next(match, lambda m: m.name == 'streaming_service', 0)
                    if next_match and not matches.holes(match.end, next_match.start,
                                                        predicate=lambda m: m.value.strip(seps)):
                        continue
                if match.children:
                    to_remove.extend(match.children)
                to_remove.append(match)
            elif not seps_before(match):
                if 'streaming_service.suffix' in match.tags:
                    previous_match = matches.previous(match, lambda m: m.name == 'streaming_service', 0)
                    if previous_match and not matches.holes(previous_match.end, match.start,
                                                            predicate=lambda m: m.value.strip(seps)):
                        continue
                if match.children:
                    to_remove.extend(match.children)
                to_remove.append(match)
        return to_remove
 class ValidateAtEnd(Rule):
    """Validate other which should occur at the end of a filepart."""
    priority = 32
    consequence = RemoveMatch
    def when(self, matches, context):
        to_remove = []
        for filepart in matches.markers.named('path'):
            for match in matches.range(filepart.start, filepart.end,
                                       predicate=lambda m: m.name == 'other' and 'at-end' in m.tags):
                if (matches.holes(match.end, filepart.end, predicate=lambda m: m.value.strip(seps)) or
                        matches.range(match.end, filepart.end, predicate=lambda m: m.name not in (
                            'other', 'container'))):
                    to_remove.append(match)
        return to_remove
 class ValidateReal(Rule):
    """
    Validate Real
    """
    consequence = RemoveMatch
    priority = 64
    def when(self, matches, context):
        ret = []
        for filepart in matches.markers.named('path'):
            for match in matches.range(filepart.start, filepart.end, lambda m: m.name == 'other' and 'real' in m.tags):
                if not matches.range(filepart.start, match.start):
                    ret.append(match)
        return ret
@@ -1,46 +0,0 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 """
 part property
 """
 from rebulk.remodule import re
 from rebulk import Rebulk
 from ..common import dash
 from ..common.pattern import is_disabled
 from ..common.validators import seps_surround, int_coercable, and_
 from ..common.numeral import numeral, parse_numeral
 from ...reutils import build_or_pattern
 def part(config):  # pylint:disable=unused-argument
    """
    Builder for rebulk object.
    :param config: rule configuration
    :type config: dict
    :return: Created Rebulk object
    :rtype: Rebulk
    """
    rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'part'))
    rebulk.regex_defaults(flags=re.IGNORECASE, abbreviations=[dash], validator={'__parent__': seps_surround})
    prefixes = config['prefixes']
    def validate_roman(match):
        """
        Validate a roman match if surrounded by separators
        :param match:
        :type match:
        :return:
        :rtype:
        """
        if int_coercable(match.raw):
            return True
        return seps_surround(match)
    rebulk.regex(build_or_pattern(prefixes) + r'-?(?P<part>' + numeral + r')',
                 prefixes=prefixes, validate_all=True, private_parent=True, children=True, formatter=parse_numeral,
                 validator={'part': and_(validate_roman, lambda m: 0 < m.value < 100)})
    return rebulk
@@ -1,347 +0,0 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 """
 release_group property
 """
 import copy
 from rebulk import Rebulk, Rule, AppendMatch, RemoveMatch
 from rebulk.match import Match
 from ..common import seps
 from ..common.comparators import marker_sorted
 from ..common.expected import build_expected_function
 from ..common.formatters import cleanup
 from ..common.pattern import is_disabled
 from ..common.validators import int_coercable, seps_surround
 from ..properties.title import TitleFromPosition
 def release_group(config):
    """
    Builder for rebulk object.
    :param config: rule configuration
    :type config: dict
    :return: Created Rebulk object
    :rtype: Rebulk
    """
    forbidden_groupnames = config['forbidden_names']
    groupname_ignore_seps = config['ignored_seps']
    groupname_seps = ''.join([c for c in seps if c not in groupname_ignore_seps])
    def clean_groupname(string):
        """
        Removes and strip separators from input_string
        :param string:
        :type string:
        :return:
        :rtype:
        """
        string = string.strip(groupname_seps)
        if not (string.endswith(tuple(groupname_ignore_seps)) and string.startswith(tuple(groupname_ignore_seps))) \
                and not any(i in string.strip(groupname_ignore_seps) for i in groupname_ignore_seps):
            string = string.strip(groupname_ignore_seps)
        for forbidden in forbidden_groupnames:
            if string.lower().startswith(forbidden) and string[len(forbidden):len(forbidden) + 1] in seps:
                string = string[len(forbidden):]
                string = string.strip(groupname_seps)
            if string.lower().endswith(forbidden) and string[-len(forbidden) - 1:-len(forbidden)] in seps:
                string = string[:len(forbidden)]
                string = string.strip(groupname_seps)
        return string.strip()
    rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'release_group'))
    expected_group = build_expected_function('expected_group')
    rebulk.functional(expected_group, name='release_group', tags=['expected'],
                      validator=seps_surround,
                      conflict_solver=lambda match, other: other,
                      disabled=lambda context: not context.get('expected_group'))
    return rebulk.rules(
        DashSeparatedReleaseGroup(clean_groupname),
        SceneReleaseGroup(clean_groupname),
        AnimeReleaseGroup
    )
 _scene_previous_names = ('video_codec', 'source', 'video_api', 'audio_codec', 'audio_profile', 'video_profile',
                         'audio_channels', 'screen_size', 'other', 'container', 'language', 'subtitle_language',
                         'subtitle_language.suffix', 'subtitle_language.prefix', 'language.suffix')
 _scene_previous_tags = ('release-group-prefix',)
 _scene_no_previous_tags = ('no-release-group-prefix',)
 class DashSeparatedReleaseGroup(Rule):
    """
    Detect dash separated release groups that might appear at the end or at the beginning of a release name.
    Series.S01E02.Pilot.DVDRip.x264-CS.mkv
        release_group: CS
    abc-the.title.name.1983.1080p.bluray.x264.mkv
        release_group: abc
    At the end: Release groups should be dash-separated and shouldn't contain spaces nor
    appear in a group with other matches. The preceding matches should be separated by dot.
    If a release group is found, the conflicting matches are removed.
    At the beginning: Release groups should be dash-separated and shouldn't contain spaces nor appear in a group.
    It should be followed by a hole with dot-separated words.
    Detection only happens if no matches exist at the beginning.
    """
    consequence = [RemoveMatch, AppendMatch]
    def __init__(self, value_formatter):
        """Default constructor."""
        super(DashSeparatedReleaseGroup, self).__init__()
        self.value_formatter = value_formatter
    @classmethod
    def is_valid(cls, matches, candidate, start, end, at_end):  # pylint:disable=inconsistent-return-statements
        """
        Whether a candidate is a valid release group.
        """
        if not at_end:
            if len(candidate.value) <= 1:
                return False
            if matches.markers.at_match(candidate, predicate=lambda m: m.name == 'group'):
                return False
            first_hole = matches.holes(candidate.end, end, predicate=lambda m: m.start == candidate.end, index=0)
            if not first_hole:
                return False
            raw_value = first_hole.raw
            return raw_value[0] == '-' and '-' not in raw_value[1:] and '.' in raw_value and ' ' not in raw_value
        group = matches.markers.at_match(candidate, predicate=lambda m: m.name == 'group', index=0)
        if group and matches.at_match(group, predicate=lambda m: not m.private and m.span != candidate.span):
            return False
        count = 0
        match = candidate
        while match:
            current = matches.range(start,
                                    match.start,
                                    index=-1,
                                    predicate=lambda m: not m.private and not 'expected' in m.tags)
            if not current:
                break
            separator = match.input_string[current.end:match.start]
            if not separator and match.raw[0] == '-':
                separator = '-'
            match = current
            if count == 0:
                if separator != '-':
                    break
                count += 1
                continue
            if separator == '.':
                return True
    def detect(self, matches, start, end, at_end):  # pylint:disable=inconsistent-return-statements
        """
        Detect release group at the end or at the beginning of a filepart.
        """
        candidate = None
        if at_end:
            container = matches.ending(end, lambda m: m.name == 'container', index=0)
            if container:
                end = container.start
            candidate = matches.ending(end, index=0, predicate=(
                lambda m: not m.private and not (
                    m.name == 'other' and 'not-a-release-group' in m.tags
                ) and '-' not in m.raw and m.raw.strip() == m.raw))
        if not candidate:
            if at_end:
                candidate = matches.holes(start, end, seps=seps, index=-1,
                                          predicate=lambda m: m.end == end and m.raw.strip(seps) and m.raw[0] == '-')
            else:
                candidate = matches.holes(start, end, seps=seps, index=0,
                                          predicate=lambda m: m.start == start and m.raw.strip(seps))
        if candidate and self.is_valid(matches, candidate, start, end, at_end):
            return candidate
    def when(self, matches, context):  # pylint:disable=inconsistent-return-statements
        if matches.named('release_group'):
            return
        to_remove = []
        to_append = []
        for filepart in matches.markers.named('path'):
            candidate = self.detect(matches, filepart.start, filepart.end, True)
            if candidate:
                to_remove.extend(matches.at_match(candidate))
            else:
                candidate = self.detect(matches, filepart.start, filepart.end, False)
            if candidate:
                releasegroup = Match(candidate.start, candidate.end, name='release_group',
                                     formatter=self.value_formatter, input_string=candidate.input_string)
                if releasegroup.value:
                    to_append.append(releasegroup)
                if to_remove or to_append:
                    return to_remove, to_append
 class SceneReleaseGroup(Rule):
    """
    Add release_group match in existing matches (scene format).
    Something.XViD-ReleaseGroup.mkv
    """
    dependency = [TitleFromPosition]
    consequence = AppendMatch
    properties = {'release_group': [None]}
    def __init__(self, value_formatter):
        """Default constructor."""
        super(SceneReleaseGroup, self).__init__()
        self.value_formatter = value_formatter
    @staticmethod
    def is_previous_match(match):
        """
        Check if match can precede release_group
        :param match:
        :return:
        """
        return not match.tagged(*_scene_no_previous_tags) if match.name in _scene_previous_names else \
            match.tagged(*_scene_previous_tags)
    def when(self, matches, context):  # pylint:disable=too-many-locals
        # If a release_group is found before, ignore this kind of release_group rule.
        ret = []
        for filepart in marker_sorted(matches.markers.named('path'), matches):
            # pylint:disable=cell-var-from-loop
            start, end = filepart.span
            if matches.named('release_group', predicate=lambda m: m.start >= start and m.end <= end):
                continue
            titles = matches.named('title', predicate=lambda m: m.start >= start and m.end <= end)
            def keep_only_first_title(match):
                """
                Keep only first title from this filepart, as other ones are most likely release group.
                :param match:
                :type match:
                :return:
                :rtype:
                """
                return match in titles[1:]
            last_hole = matches.holes(start, end + 1, formatter=self.value_formatter,
                                      ignore=keep_only_first_title,
                                      predicate=lambda hole: cleanup(hole.value), index=-1)
            if last_hole:
                def previous_match_filter(match):
                    """
                    Filter to apply to find previous match
                    :param match:
                    :type match:
                    :return:
                    :rtype:
                    """
                    if match.start < filepart.start:
                        return False
                    return not match.private or self.is_previous_match(match)
                previous_match = matches.previous(last_hole,
                                                  previous_match_filter,
                                                  index=0)
                if previous_match and (self.is_previous_match(previous_match)) and \
                        not matches.input_string[previous_match.end:last_hole.start].strip(seps) \
                        and not int_coercable(last_hole.value.strip(seps)):
                    last_hole.name = 'release_group'
                    last_hole.tags = ['scene']
                    # if hole is inside a group marker with same value, remove [](){} ...
                    group = matches.markers.at_match(last_hole, lambda marker: marker.name == 'group', 0)
                    if group:
                        group.formatter = self.value_formatter
                        if group.value == last_hole.value:
                            last_hole.start = group.start + 1
                            last_hole.end = group.end - 1
                            last_hole.tags = ['anime']
                    ignored_matches = matches.range(last_hole.start, last_hole.end, keep_only_first_title)
                    for ignored_match in ignored_matches:
                        matches.remove(ignored_match)
                    ret.append(last_hole)
        return ret
 class AnimeReleaseGroup(Rule):
    """
    Add release_group match in existing matches (anime format)
    ...[ReleaseGroup] Something.mkv
    """
    dependency = [SceneReleaseGroup, TitleFromPosition]
    consequence = [RemoveMatch, AppendMatch]
    properties = {'release_group': [None]}
    def when(self, matches, context):
        to_remove = []
        to_append = []
        # If a release_group is found before, ignore this kind of release_group rule.
        if matches.named('release_group'):
            return False
        if not matches.named('episode') and not matches.named('season') and matches.named('release_group'):
            # This doesn't seems to be an anime, and we already found another release_group.
            return False
        for filepart in marker_sorted(matches.markers.named('path'), matches):
            # pylint:disable=bad-continuation
            empty_group = matches.markers.range(filepart.start,
                                                filepart.end,
                                                lambda marker: (marker.name == 'group'
                                                                and not matches.range(marker.start, marker.end,
                                                                                      lambda m:
                                                                                      'weak-language' not in m.tags)
                                                                and marker.value.strip(seps)
                                                                and not int_coercable(marker.value.strip(seps))), 0)
            if empty_group:
                group = copy.copy(empty_group)
                group.marker = False
                group.raw_start += 1
                group.raw_end -= 1
                group.tags = ['anime']
                group.name = 'release_group'
                to_append.append(group)
                to_remove.extend(matches.range(empty_group.start, empty_group.end,
                                               lambda m: 'weak-language' in m.tags))
        if to_remove or to_append:
            return to_remove, to_append
        return False
@@ -1,163 +0,0 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 """
 screen_size property
 """
 from rebulk.match import Match
 from rebulk.remodule import re
 from rebulk import Rebulk, Rule, RemoveMatch, AppendMatch
 from ..common.pattern import is_disabled
 from ..common.quantity import FrameRate
 from ..common.validators import seps_surround
 from ..common import dash, seps
 from ...reutils import build_or_pattern
 def screen_size(config):
    """
    Builder for rebulk object.
    :param config: rule configuration
    :type config: dict
    :return: Created Rebulk object
    :rtype: Rebulk
    """
    interlaced = frozenset({res for res in config['interlaced']})
    progressive = frozenset({res for res in config['progressive']})
    frame_rates = [re.escape(rate) for rate in config['frame_rates']]
    min_ar = config['min_ar']
    max_ar = config['max_ar']
    rebulk = Rebulk()
    rebulk = rebulk.string_defaults(ignore_case=True).regex_defaults(flags=re.IGNORECASE)
    rebulk.defaults(name='screen_size', validator=seps_surround, abbreviations=[dash],
                    disabled=lambda context: is_disabled(context, 'screen_size'))
    frame_rate_pattern = build_or_pattern(frame_rates, name='frame_rate')
    interlaced_pattern = build_or_pattern(interlaced, name='height')
    progressive_pattern = build_or_pattern(progressive, name='height')
    res_pattern = r'(?:(?P<width>\d{3,4})(?:x|\*))?'
    rebulk.regex(res_pattern + interlaced_pattern + r'(?P<scan_type>i)' + frame_rate_pattern + '?')
    rebulk.regex(res_pattern + progressive_pattern + r'(?P<scan_type>p)' + frame_rate_pattern + '?')
    rebulk.regex(res_pattern + progressive_pattern + r'(?P<scan_type>p)?(?:hd)')
    rebulk.regex(res_pattern + progressive_pattern + r'(?P<scan_type>p)?x?')
    rebulk.string('4k', value='2160p')
    rebulk.regex(r'(?P<width>\d{3,4})-?(?:x|\*)-?(?P<height>\d{3,4})',
                 conflict_solver=lambda match, other: '__default__' if other.name == 'screen_size' else other)
    rebulk.regex(frame_rate_pattern + '(p|fps)', name='frame_rate',
                 formatter=FrameRate.fromstring, disabled=lambda context: is_disabled(context, 'frame_rate'))
    rebulk.rules(PostProcessScreenSize(progressive, min_ar, max_ar), ScreenSizeOnlyOne, ResolveScreenSizeConflicts)
    return rebulk
 class PostProcessScreenSize(Rule):
    """
    Process the screen size calculating the aspect ratio if available.
    Convert to a standard notation (720p, 1080p, etc) when it's a standard resolution and
    aspect ratio is valid or not available.
    It also creates an aspect_ratio match when available.
    """
    consequence = AppendMatch
    def __init__(self, standard_heights, min_ar, max_ar):
        super(PostProcessScreenSize, self).__init__()
        self.standard_heights = standard_heights
        self.min_ar = min_ar
        self.max_ar = max_ar
    def when(self, matches, context):
        to_append = []
        for match in matches.named('screen_size'):
            if not is_disabled(context, 'frame_rate'):
                for frame_rate in match.children.named('frame_rate'):
                    frame_rate.formatter = FrameRate.fromstring
                    to_append.append(frame_rate)
            values = match.children.to_dict()
            if 'height' not in values:
                continue
            scan_type = (values.get('scan_type') or 'p').lower()
            height = values['height']
            if 'width' not in values:
                match.value = '{0}{1}'.format(height, scan_type)
                continue
            width = values['width']
            calculated_ar = float(width) / float(height)
            aspect_ratio = Match(match.start, match.end, input_string=match.input_string,
                                 name='aspect_ratio', value=round(calculated_ar, 3))
            if not is_disabled(context, 'aspect_ratio'):
                to_append.append(aspect_ratio)
            if height in self.standard_heights and self.min_ar < calculated_ar < self.max_ar:
                match.value = '{0}{1}'.format(height, scan_type)
            else:
                match.value = '{0}x{1}'.format(width, height)
        return to_append
 class ScreenSizeOnlyOne(Rule):
    """
    Keep a single screen_size per filepath part.
    """
    consequence = RemoveMatch
    def when(self, matches, context):
        to_remove = []
        for filepart in matches.markers.named('path'):
            screensize = list(reversed(matches.range(filepart.start, filepart.end,
                                                     lambda match: match.name == 'screen_size')))
            if len(screensize) > 1 and len(set((match.value for match in screensize))) > 1:
                to_remove.extend(screensize[1:])
        return to_remove
 class ResolveScreenSizeConflicts(Rule):
    """
    Resolve screen_size conflicts with season and episode matches.
    """
    consequence = RemoveMatch
    def when(self, matches, context):
        to_remove = []
        for filepart in matches.markers.named('path'):
            screensize = matches.range(filepart.start, filepart.end, lambda match: match.name == 'screen_size', 0)
            if not screensize:
                continue
            conflicts = matches.conflicting(screensize, lambda match: match.name in ('season', 'episode'))
            if not conflicts:
                continue
            has_neighbor = False
            video_profile = matches.range(screensize.end, filepart.end, lambda match: match.name == 'video_profile', 0)
            if video_profile and not matches.holes(screensize.end, video_profile.start,
                                                   predicate=lambda h: h.value and h.value.strip(seps)):
                to_remove.extend(conflicts)
                has_neighbor = True
            previous = matches.previous(screensize, index=0, predicate=(
                lambda m: m.name in ('date', 'source', 'other', 'streaming_service')))
            if previous and not matches.holes(previous.end, screensize.start,
                                              predicate=lambda h: h.value and h.value.strip(seps)):
                to_remove.extend(conflicts)
                has_neighbor = True
            if not has_neighbor:
                to_remove.append(screensize)
        return to_remove
@@ -1,30 +0,0 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 """
 size property
 """
 import re
 from rebulk import Rebulk
 from ..common import dash
 from ..common.quantity import Size
 from ..common.pattern import is_disabled
 from ..common.validators import seps_surround
 def size(config):  # pylint:disable=unused-argument
    """
    Builder for rebulk object.
    :param config: rule configuration
    :type config: dict
    :return: Created Rebulk object
    :rtype: Rebulk
    """
    rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'size'))
    rebulk.regex_defaults(flags=re.IGNORECASE, abbreviations=[dash])
    rebulk.defaults(name='size', validator=seps_surround)
    rebulk.regex(r'\d+-?[mgt]b', r'\d+\.\d+-?[mgt]b', formatter=Size.fromstring, tags=['release-group-prefix'])
    return rebulk
@@ -1,235 +0,0 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 """
 source property
 """
 import copy
 from rebulk.remodule import re
 from rebulk import AppendMatch, Rebulk, RemoveMatch, Rule
 from .audio_codec import HqConflictRule
 from ..common import dash, seps
 from ..common.pattern import is_disabled
 from ..common.validators import seps_before, seps_after, or_
 def source(config):  # pylint:disable=unused-argument
    """
    Builder for rebulk object.
    :param config: rule configuration
    :type config: dict
    :return: Created Rebulk object
    :rtype: Rebulk
    """
    rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'source'))
    rebulk = rebulk.regex_defaults(flags=re.IGNORECASE, abbreviations=[dash], private_parent=True, children=True)
    rebulk = rebulk.defaults(name='source',
                             tags=['video-codec-prefix', 'streaming_service.suffix'],
                             validate_all=True,
                             validator={'__parent__': or_(seps_before, seps_after)})
    rip_prefix = '(?P<other>Rip)-?'
    rip_suffix = '-?(?P<other>Rip)'
    rip_optional_suffix = '(?:' + rip_suffix + ')?'
    def build_source_pattern(*patterns, **kwargs):
        """Helper pattern to build source pattern."""
        prefix_format = kwargs.get('prefix') or ''
        suffix_format = kwargs.get('suffix') or ''
        string_format = prefix_format + '({0})' + suffix_format
        return [string_format.format(pattern) for pattern in patterns]
    def demote_other(match, other):  # pylint: disable=unused-argument
        """Default conflict solver with 'other' property."""
        return other if other.name == 'other' or other.name == 'release_group' else '__default__'
    rebulk.regex(*build_source_pattern('VHS', suffix=rip_optional_suffix),
                 value={'source': 'VHS', 'other': 'Rip'})
    rebulk.regex(*build_source_pattern('CAM', suffix=rip_optional_suffix),
                 value={'source': 'Camera', 'other': 'Rip'})
    rebulk.regex(*build_source_pattern('HD-?CAM', suffix=rip_optional_suffix),
                 value={'source': 'HD Camera', 'other': 'Rip'})
    rebulk.regex(*build_source_pattern('TELESYNC', 'TS', suffix=rip_optional_suffix),
                 value={'source': 'Telesync', 'other': 'Rip'})
    rebulk.regex(*build_source_pattern('HD-?TELESYNC', 'HD-?TS', suffix=rip_optional_suffix),
                 value={'source': 'HD Telesync', 'other': 'Rip'})
    rebulk.regex(*build_source_pattern('WORKPRINT', 'WP'), value='Workprint')
    rebulk.regex(*build_source_pattern('TELECINE', 'TC', suffix=rip_optional_suffix),
                 value={'source': 'Telecine', 'other': 'Rip'})
    rebulk.regex(*build_source_pattern('HD-?TELECINE', 'HD-?TC', suffix=rip_optional_suffix),
                 value={'source': 'HD Telecine', 'other': 'Rip'})
    rebulk.regex(*build_source_pattern('PPV', suffix=rip_optional_suffix),
                 value={'source': 'Pay-per-view', 'other': 'Rip'})
    rebulk.regex(*build_source_pattern('SD-?TV', suffix=rip_optional_suffix),
                 value={'source': 'TV', 'other': 'Rip'})
    rebulk.regex(*build_source_pattern('TV', suffix=rip_suffix),  # TV is too common to allow matching
                 value={'source': 'TV', 'other': 'Rip'})
    rebulk.regex(*build_source_pattern('TV', 'SD-?TV', prefix=rip_prefix),
                 value={'source': 'TV', 'other': 'Rip'})
    rebulk.regex(*build_source_pattern('TV-?(?=Dub)'), value='TV')
    rebulk.regex(*build_source_pattern('DVB', 'PD-?TV', suffix=rip_optional_suffix),
                 value={'source': 'Digital TV', 'other': 'Rip'})
    rebulk.regex(*build_source_pattern('DVD', suffix=rip_optional_suffix),
                 value={'source': 'DVD', 'other': 'Rip'})
    rebulk.regex(*build_source_pattern('DM', suffix=rip_optional_suffix),
                 value={'source': 'Digital Master', 'other': 'Rip'})
    rebulk.regex(*build_source_pattern('VIDEO-?TS', 'DVD-?R(?:$|(?!E))',  # 'DVD-?R(?:$|^E)' => DVD-Real ...
                                       'DVD-?9', 'DVD-?5'), value='DVD')
    rebulk.regex(*build_source_pattern('HD-?TV', suffix=rip_optional_suffix), conflict_solver=demote_other,
                 value={'source': 'HDTV', 'other': 'Rip'})
    rebulk.regex(*build_source_pattern('TV-?HD', suffix=rip_suffix), conflict_solver=demote_other,
                 value={'source': 'HDTV', 'other': 'Rip'})
    rebulk.regex(*build_source_pattern('TV', suffix='-?(?P<other>Rip-?HD)'), conflict_solver=demote_other,
                 value={'source': 'HDTV', 'other': 'Rip'})
    rebulk.regex(*build_source_pattern('VOD', suffix=rip_optional_suffix),
                 value={'source': 'Video on Demand', 'other': 'Rip'})
    rebulk.regex(*build_source_pattern('WEB', 'WEB-?DL', suffix=rip_suffix),
                 value={'source': 'Web', 'other': 'Rip'})
    # WEBCap is a synonym to WEBRip, mostly used by non english
    rebulk.regex(*build_source_pattern('WEB-?(?P<another>Cap)', suffix=rip_optional_suffix),
                 value={'source': 'Web', 'other': 'Rip', 'another': 'Rip'})
    rebulk.regex(*build_source_pattern('WEB-?DL', 'WEB-?U?HD', 'DL-?WEB', 'DL(?=-?Mux)'),
                 value={'source': 'Web'})
    rebulk.regex('(WEB)', value='Web', tags='weak.source')
    rebulk.regex(*build_source_pattern('HD-?DVD', suffix=rip_optional_suffix),
                 value={'source': 'HD-DVD', 'other': 'Rip'})
    rebulk.regex(*build_source_pattern('Blu-?ray', 'BD', 'BD[59]', 'BD25', 'BD50', suffix=rip_optional_suffix),
                 value={'source': 'Blu-ray', 'other': 'Rip'})
    rebulk.regex(*build_source_pattern('(?P<another>BR)-?(?=Scr(?:eener)?)', '(?P<another>BR)-?(?=Mux)'),  # BRRip
                 value={'source': 'Blu-ray', 'another': 'Reencoded'})
    rebulk.regex(*build_source_pattern('(?P<another>BR)', suffix=rip_suffix),  # BRRip
                 value={'source': 'Blu-ray', 'other': 'Rip', 'another': 'Reencoded'})
    rebulk.regex(*build_source_pattern('Ultra-?Blu-?ray', 'Blu-?ray-?Ultra'), value='Ultra HD Blu-ray')
    rebulk.regex(*build_source_pattern('AHDTV'), value='Analog HDTV')
    rebulk.regex(*build_source_pattern('UHD-?TV', suffix=rip_optional_suffix), conflict_solver=demote_other,
                 value={'source': 'Ultra HDTV', 'other': 'Rip'})
    rebulk.regex(*build_source_pattern('UHD', suffix=rip_suffix), conflict_solver=demote_other,
                 value={'source': 'Ultra HDTV', 'other': 'Rip'})
    rebulk.regex(*build_source_pattern('DSR', 'DTH', suffix=rip_optional_suffix),
                 value={'source': 'Satellite', 'other': 'Rip'})
    rebulk.regex(*build_source_pattern('DSR?', 'SAT', suffix=rip_suffix),
                 value={'source': 'Satellite', 'other': 'Rip'})
    rebulk.rules(ValidateSourcePrefixSuffix, ValidateWeakSource, UltraHdBlurayRule)
    return rebulk
 class UltraHdBlurayRule(Rule):
    """
    Replace other:Ultra HD and source:Blu-ray with source:Ultra HD Blu-ray
    """
    dependency = HqConflictRule
    consequence = [RemoveMatch, AppendMatch]
    @classmethod
    def find_ultrahd(cls, matches, start, end, index):
        """Find Ultra HD match."""
        return matches.range(start, end, index=index, predicate=(
            lambda m: not m.private and m.name == 'other' and m.value == 'Ultra HD'
        ))
    @classmethod
    def validate_range(cls, matches, start, end):
        """Validate no holes or invalid matches exist in the specified range."""
        return (
            not matches.holes(start, end, predicate=lambda m: m.value.strip(seps)) and
            not matches.range(start, end, predicate=(
                lambda m: not m.private and (
                    m.name not in ('screen_size', 'color_depth') and (
                        m.name != 'other' or 'uhdbluray-neighbor' not in m.tags))))
        )
    def when(self, matches, context):
        to_remove = []
        to_append = []
        for filepart in matches.markers.named('path'):
            for match in matches.range(filepart.start, filepart.end, predicate=(
                    lambda m: not m.private and m.name == 'source' and m.value == 'Blu-ray')):
                other = self.find_ultrahd(matches, filepart.start, match.start, -1)
                if not other or not self.validate_range(matches, other.end, match.start):
                    other = self.find_ultrahd(matches, match.end, filepart.end, 0)
                    if not other or not self.validate_range(matches, match.end, other.start):
                        if not matches.range(filepart.start, filepart.end, predicate=(
                                lambda m: m.name == 'screen_size' and m.value == '2160p')):
                            continue
                if other:
                    other.private = True
                new_source = copy.copy(match)
                new_source.value = 'Ultra HD Blu-ray'
                to_remove.append(match)
                to_append.append(new_source)
        if to_remove or to_append:
            return to_remove, to_append
        return False
 class ValidateSourcePrefixSuffix(Rule):
    """
    Validate source with source prefix, source suffix.
    """
    priority = 64
    consequence = RemoveMatch
    def when(self, matches, context):
        ret = []
        for filepart in matches.markers.named('path'):
            for match in matches.range(filepart.start, filepart.end, predicate=lambda m: m.name == 'source'):
                match = match.initiator
                if not seps_before(match) and \
                        not matches.range(match.start - 1, match.start - 2,
                                          lambda m: 'source-prefix' in m.tags):
                    if match.children:
                        ret.extend(match.children)
                    ret.append(match)
                    continue
                if not seps_after(match) and \
                        not matches.range(match.end, match.end + 1,
                                          lambda m: 'source-suffix' in m.tags):
                    if match.children:
                        ret.extend(match.children)
                    ret.append(match)
                    continue
        return ret
 class ValidateWeakSource(Rule):
    """
    Validate weak source
    """
    dependency = [ValidateSourcePrefixSuffix]
    priority = 64
    consequence = RemoveMatch
    def when(self, matches, context):
        ret = []
        for filepart in matches.markers.named('path'):
            for match in matches.range(filepart.start, filepart.end, predicate=lambda m: m.name == 'source'):
                # if there are more than 1 source in this filepart, just before the year and with holes for the title
                # most likely the source is part of the title
                if 'weak.source' in match.tags \
                        and matches.range(match.end, filepart.end, predicate=lambda m: m.name == 'source') \
                        and matches.holes(filepart.start, match.start,
                                          predicate=lambda m: m.value.strip(seps), index=-1):
                    if match.children:
                        ret.extend(match.children)
                    ret.append(match)
                    continue
        return ret
@@ -1,78 +0,0 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 """
 streaming_service property
 """
 import re
 from rebulk import Rebulk
 from rebulk.rules import Rule, RemoveMatch
 from ..common.pattern import is_disabled
 from ...rules.common import seps, dash
 from ...rules.common.validators import seps_before, seps_after
 def streaming_service(config):  # pylint: disable=too-many-statements,unused-argument
    """Streaming service property.
    :param config: rule configuration
    :type config: dict
    :return:
    :rtype: Rebulk
    """
    rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'streaming_service'))
    rebulk = rebulk.string_defaults(ignore_case=True).regex_defaults(flags=re.IGNORECASE, abbreviations=[dash])
    rebulk.defaults(name='streaming_service', tags=['source-prefix'])
    for value, items in config.items():
        patterns = items if isinstance(items, list) else [items]
        for pattern in patterns:
            if pattern.startswith('re:'):
                rebulk.regex(pattern, value=value)
            else:
                rebulk.string(pattern, value=value)
    rebulk.rules(ValidateStreamingService)
    return rebulk
 class ValidateStreamingService(Rule):
    """Validate streaming service matches."""
    priority = 128
    consequence = RemoveMatch
    def when(self, matches, context):
        """Streaming service is always before source.
        :param matches:
        :type matches: rebulk.match.Matches
        :param context:
        :type context: dict
        :return:
        """
        to_remove = []
        for service in matches.named('streaming_service'):
            next_match = matches.next(service, lambda match: 'streaming_service.suffix' in match.tags, 0)
            previous_match = matches.previous(service, lambda match: 'streaming_service.prefix' in match.tags, 0)
            has_other = service.initiator and service.initiator.children.named('other')
            if not has_other:
                if (not next_match or
                        matches.holes(service.end, next_match.start,
                                      predicate=lambda match: match.value.strip(seps)) or
                        not seps_before(service)):
                    if (not previous_match or
                            matches.holes(previous_match.end, service.start,
                                          predicate=lambda match: match.value.strip(seps)) or
                            not seps_after(service)):
                        to_remove.append(service)
                        continue
            if service.value == 'Comedy Central':
                # Current match is a valid streaming service, removing invalid Criterion Collection (CC) matches
                to_remove.extend(matches.named('edition', predicate=lambda match: match.value == 'Criterion'))
        return to_remove
@@ -1,349 +0,0 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 """
 title property
 """
 from rebulk import Rebulk, Rule, AppendMatch, RemoveMatch, AppendTags
 from rebulk.formatters import formatters
 from .film import FilmTitleRule
 from .language import (
    SubtitlePrefixLanguageRule,
    SubtitleSuffixLanguageRule,
    SubtitleExtensionRule,
    NON_SPECIFIC_LANGUAGES
 )
 from ..common import seps, title_seps
 from ..common.comparators import marker_sorted
 from ..common.expected import build_expected_function
 from ..common.formatters import cleanup, reorder_title
 from ..common.pattern import is_disabled
 from ..common.validators import seps_surround
 def title(config):  # pylint:disable=unused-argument
    """
    Builder for rebulk object.
    :param config: rule configuration
    :type config: dict
    :return: Created Rebulk object
    :rtype: Rebulk
    """
    rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'title'))
    rebulk.rules(TitleFromPosition, PreferTitleWithYear)
    expected_title = build_expected_function('expected_title')
    rebulk.functional(expected_title, name='title', tags=['expected', 'title'],
                      validator=seps_surround,
                      formatter=formatters(cleanup, reorder_title),
                      conflict_solver=lambda match, other: other,
                      disabled=lambda context: not context.get('expected_title'))
    return rebulk
 class TitleBaseRule(Rule):
    """
    Add title match in existing matches
    """
    # pylint:disable=no-self-use,unused-argument
    consequence = [AppendMatch, RemoveMatch]
    def __init__(self, match_name, match_tags=None, alternative_match_name=None):
        super(TitleBaseRule, self).__init__()
        self.match_name = match_name
        self.match_tags = match_tags
        self.alternative_match_name = alternative_match_name
    def hole_filter(self, hole, matches):
        """
        Filter holes for titles.
        :param hole:
        :type hole:
        :param matches:
        :type matches:
        :return:
        :rtype:
        """
        return True
    def filepart_filter(self, filepart, matches):
        """
        Filter filepart for titles.
        :param filepart:
        :type filepart:
        :param matches:
        :type matches:
        :return:
        :rtype:
        """
        return True
    def holes_process(self, holes, matches):
        """
        process holes
        :param holes:
        :type holes:
        :param matches:
        :type matches:
        :return:
        :rtype:
        """
        cropped_holes = []
        group_markers = matches.markers.named('group')
        for group_marker in group_markers:
            path_marker = matches.markers.at_match(group_marker, predicate=lambda m: m.name == 'path', index=0)
            if path_marker and path_marker.span == group_marker.span:
                group_markers.remove(group_marker)
        for hole in holes:
            cropped_holes.extend(hole.crop(group_markers))
        return cropped_holes
    @staticmethod
    def is_ignored(match):
        """
        Ignore matches when scanning for title (hole).
        Full word language and countries won't be ignored if they are uppercase.
        """
        return not (len(match) > 3 and match.raw.isupper()) and match.name in ('language', 'country', 'episode_details')
    def should_keep(self, match, to_keep, matches, filepart, hole, starting):
        """
        Check if this match should be accepted when ending or starting a hole.
        :param match:
        :type match:
        :param to_keep:
        :type to_keep: list[Match]
        :param matches:
        :type matches: Matches
        :param hole: the filepart match
        :type hole: Match
        :param hole: the hole match
        :type hole: Match
        :param starting: true if match is starting the hole
        :type starting: bool
        :return:
        :rtype:
        """
        if match.name in ('language', 'country'):
            # Keep language if exactly matching the hole.
            if len(hole.value) == len(match.raw):
                return True
            # Keep language if other languages exists in the filepart.
            outside_matches = filepart.crop(hole)
            other_languages = []
            for outside in outside_matches:
                other_languages.extend(matches.range(outside.start, outside.end,
                                                     lambda c_match: c_match.name == match.name and
                                                     c_match not in to_keep and
                                                     c_match.value not in NON_SPECIFIC_LANGUAGES))
            if not other_languages and (not starting or len(match.raw) <= 3):
                return True
        return False
    def should_remove(self, match, matches, filepart, hole, context):
        """
        Check if this match should be removed after beeing ignored.
        :param match:
        :param matches:
        :param filepart:
        :param hole:
        :return:
        """
        if context.get('type') == 'episode' and match.name == 'episode_details':
            return match.start >= hole.start and match.end <= hole.end
        return True
    def check_titles_in_filepart(self, filepart, matches, context):  # pylint:disable=inconsistent-return-statements
        """
        Find title in filepart (ignoring language)
        """
        # pylint:disable=too-many-locals,too-many-branches,too-many-statements
        start, end = filepart.span
        holes = matches.holes(start, end + 1, formatter=formatters(cleanup, reorder_title),
                              ignore=self.is_ignored,
                              predicate=lambda m: m.value)
        holes = self.holes_process(holes, matches)
        for hole in holes:
            if not hole or (self.hole_filter and not self.hole_filter(hole, matches)):
                continue
            to_remove = []
            to_keep = []
            ignored_matches = matches.range(hole.start, hole.end, self.is_ignored)
            if ignored_matches:
                for ignored_match in reversed(ignored_matches):
                    # pylint:disable=undefined-loop-variable, cell-var-from-loop
                    trailing = matches.chain_before(hole.end, seps, predicate=lambda m: m == ignored_match)
                    if trailing:
                        should_keep = self.should_keep(ignored_match, to_keep, matches, filepart, hole, False)
                        if should_keep:
                            # pylint:disable=unpacking-non-sequence
                            try:
                                append, crop = should_keep
                            except TypeError:
                                append, crop = should_keep, should_keep
                            if append:
                                to_keep.append(ignored_match)
                            if crop:
                                hole.end = ignored_match.start
                for ignored_match in ignored_matches:
                    if ignored_match not in to_keep:
                        starting = matches.chain_after(hole.start, seps,
                                                       predicate=lambda m: m == ignored_match)
                        if starting:
                            should_keep = self.should_keep(ignored_match, to_keep, matches, filepart, hole, True)
                            if should_keep:
                                # pylint:disable=unpacking-non-sequence
                                try:
                                    append, crop = should_keep
                                except TypeError:
                                    append, crop = should_keep, should_keep
                                if append:
                                    to_keep.append(ignored_match)
                                if crop:
                                    hole.start = ignored_match.end
            for match in ignored_matches:
                if self.should_remove(match, matches, filepart, hole, context):
                    to_remove.append(match)
            for keep_match in to_keep:
                if keep_match in to_remove:
                    to_remove.remove(keep_match)
            if hole and hole.value:
                hole.name = self.match_name
                hole.tags = self.match_tags
                if self.alternative_match_name:
                    # Split and keep values that can be a title
                    titles = hole.split(title_seps, lambda m: m.value)
                    for title_match in list(titles[1:]):
                        previous_title = titles[titles.index(title_match) - 1]
                        separator = matches.input_string[previous_title.end:title_match.start]
                        if len(separator) == 1 and separator == '-' \
                                and previous_title.raw[-1] not in seps \
                                and title_match.raw[0] not in seps:
                            titles[titles.index(title_match) - 1].end = title_match.end
                            titles.remove(title_match)
                        else:
                            title_match.name = self.alternative_match_name
                else:
                    titles = [hole]
                return titles, to_remove
    def when(self, matches, context):
        ret = []
        to_remove = []
        if matches.named(self.match_name, lambda match: 'expected' in match.tags):
            return False
        fileparts = [filepart for filepart in list(marker_sorted(matches.markers.named('path'), matches))
                     if not self.filepart_filter or self.filepart_filter(filepart, matches)]
        # Priorize fileparts containing the year
        years_fileparts = []
        for filepart in fileparts:
            year_match = matches.range(filepart.start, filepart.end, lambda match: match.name == 'year', 0)
            if year_match:
                years_fileparts.append(filepart)
        for filepart in fileparts:
            try:
                years_fileparts.remove(filepart)
            except ValueError:
                pass
            titles = self.check_titles_in_filepart(filepart, matches, context)
            if titles:
                titles, to_remove_c = titles
                ret.extend(titles)
                to_remove.extend(to_remove_c)
                break
        # Add title match in all fileparts containing the year.
        for filepart in years_fileparts:
            titles = self.check_titles_in_filepart(filepart, matches, context)
            if titles:
                # pylint:disable=unbalanced-tuple-unpacking
                titles, to_remove_c = titles
                ret.extend(titles)
                to_remove.extend(to_remove_c)
        if ret or to_remove:
            return ret, to_remove
        return False
 class TitleFromPosition(TitleBaseRule):
    """
    Add title match in existing matches
    """
    dependency = [FilmTitleRule, SubtitlePrefixLanguageRule, SubtitleSuffixLanguageRule, SubtitleExtensionRule]
    properties = {'title': [None], 'alternative_title': [None]}
    def __init__(self):
        super(TitleFromPosition, self).__init__('title', ['title'], 'alternative_title')
    def enabled(self, context):
        return not is_disabled(context, 'alternative_title')
 class PreferTitleWithYear(Rule):
    """
    Prefer title where filepart contains year.
    """
    dependency = TitleFromPosition
    consequence = [RemoveMatch, AppendTags(['equivalent-ignore'])]
    properties = {'title': [None]}
    def when(self, matches, context):
        with_year_in_group = []
        with_year = []
        titles = matches.named('title')
        for title_match in titles:
            filepart = matches.markers.at_match(title_match, lambda marker: marker.name == 'path', 0)
            if filepart:
                year_match = matches.range(filepart.start, filepart.end, lambda match: match.name == 'year', 0)
                if year_match:
                    group = matches.markers.at_match(year_match, lambda m: m.name == 'group')
                    if group:
                        with_year_in_group.append(title_match)
                    else:
                        with_year.append(title_match)
        to_tag = []
        if with_year_in_group:
            title_values = {title_match.value for title_match in with_year_in_group}
            to_tag.extend(with_year_in_group)
        elif with_year:
            title_values = {title_match.value for title_match in with_year}
            to_tag.extend(with_year)
        else:
            title_values = {title_match.value for title_match in titles}
        to_remove = []
        for title_match in titles:
            if title_match.value not in title_values:
                to_remove.append(title_match)
        if to_remove or to_tag:
            return to_remove, to_tag
        return False
@@ -1,83 +0,0 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 """
 type property
 """
 from rebulk import CustomRule, Rebulk, POST_PROCESS
 from rebulk.match import Match
 from ..common.pattern import is_disabled
 from ...rules.processors import Processors
 def _type(matches, value):
    """
    Define type match with given value.
    :param matches:
    :param value:
    :return:
    """
    matches.append(Match(len(matches.input_string), len(matches.input_string), name='type', value=value))
 def type_(config):  # pylint:disable=unused-argument
    """
    Builder for rebulk object.
    :param config: rule configuration
    :type config: dict
    :return: Created Rebulk object
    :rtype: Rebulk
    """
    rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'type'))
    rebulk = rebulk.rules(TypeProcessor)
    return rebulk
 class TypeProcessor(CustomRule):
    """
    Post processor to find file type based on all others found matches.
    """
    priority = POST_PROCESS
    dependency = Processors
    properties = {'type': ['episode', 'movie']}
    def when(self, matches, context):  # pylint:disable=too-many-return-statements
        option_type = context.get('type', None)
        if option_type:
            return option_type
        episode = matches.named('episode')
        season = matches.named('season')
        absolute_episode = matches.named('absolute_episode')
        episode_details = matches.named('episode_details')
        if episode or season or episode_details or absolute_episode:
            return 'episode'
        film = matches.named('film')
        if film:
            return 'movie'
        year = matches.named('year')
        date = matches.named('date')
        if date and not year:
            return 'episode'
        bonus = matches.named('bonus')
        if bonus and not year:
            return 'episode'
        crc32 = matches.named('crc32')
        anime_release_group = matches.named('release_group', lambda match: 'anime' in match.tags)
        if crc32 and anime_release_group:
            return 'episode'
        return 'movie'
    def then(self, matches, when_response, context):
        _type(matches, when_response)
@@ -1,126 +0,0 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 """
 video_codec and video_profile property
 """
 from rebulk import Rebulk, Rule, RemoveMatch
 from rebulk.remodule import re
 from ..common import dash
 from ..common.pattern import is_disabled
 from ..common.validators import seps_after, seps_before, seps_surround
 def video_codec(config):  # pylint:disable=unused-argument
    """
    Builder for rebulk object.
    :param config: rule configuration
    :type config: dict
    :return: Created Rebulk object
    :rtype: Rebulk
    """
    rebulk = Rebulk()
    rebulk = rebulk.regex_defaults(flags=re.IGNORECASE, abbreviations=[dash]).string_defaults(ignore_case=True)
    rebulk.defaults(name="video_codec",
                    tags=['source-suffix', 'streaming_service.suffix'],
                    disabled=lambda context: is_disabled(context, 'video_codec'))
    rebulk.regex(r'Rv\d{2}', value='RealVideo')
    rebulk.regex('Mpe?g-?2', '[hx]-?262', value='MPEG-2')
    rebulk.string("DVDivX", "DivX", value="DivX")
    rebulk.string('XviD', value='Xvid')
    rebulk.regex('VC-?1', value='VC-1')
    rebulk.string('VP7', value='VP7')
    rebulk.string('VP8', 'VP80', value='VP8')
    rebulk.string('VP9', value='VP9')
    rebulk.regex('[hx]-?263', value='H.263')
    rebulk.regex('[hx]-?264', '(MPEG-?4)?AVC(?:HD)?', value='H.264')
    rebulk.regex('[hx]-?265', 'HEVC', value='H.265')
    rebulk.regex('(?P<video_codec>hevc)(?P<color_depth>10)', value={'video_codec': 'H.265', 'color_depth': '10-bit'},
                 tags=['video-codec-suffix'], children=True)
    # http://blog.mediacoderhq.com/h264-profiles-and-levels/
    # https://en.wikipedia.org/wiki/H.264/MPEG-4_AVC
    rebulk.defaults(clear=True,
                    name="video_profile",
                    validator=seps_surround,
                    disabled=lambda context: is_disabled(context, 'video_profile'))
    rebulk.string('BP', value='Baseline', tags='video_profile.rule')
    rebulk.string('XP', 'EP', value='Extended', tags='video_profile.rule')
    rebulk.string('MP', value='Main', tags='video_profile.rule')
    rebulk.string('HP', 'HiP', value='High', tags='video_profile.rule')
    # https://en.wikipedia.org/wiki/Scalable_Video_Coding
    rebulk.string('SC', 'SVC', value='Scalable Video Coding', tags='video_profile.rule')
    # https://en.wikipedia.org/wiki/AVCHD
    rebulk.regex('AVC(?:HD)?', value='Advanced Video Codec High Definition', tags='video_profile.rule')
    # https://en.wikipedia.org/wiki/H.265/HEVC
    rebulk.string('HEVC', value='High Efficiency Video Coding', tags='video_profile.rule')
    rebulk.regex('Hi422P', value='High 4:2:2')
    rebulk.regex('Hi444PP', value='High 4:4:4 Predictive')
    rebulk.regex('Hi10P?', value='High 10')  # no profile validation is required
    rebulk.string('DXVA', value='DXVA', name='video_api',
                  disabled=lambda context: is_disabled(context, 'video_api'))
    rebulk.defaults(clear=True,
                    name='color_depth',
                    validator=seps_surround,
                    disabled=lambda context: is_disabled(context, 'color_depth'))
    rebulk.regex('12.?bits?', value='12-bit')
    rebulk.regex('10.?bits?', 'YUV420P10', 'Hi10P?', value='10-bit')
    rebulk.regex('8.?bits?', value='8-bit')
    rebulk.rules(ValidateVideoCodec, VideoProfileRule)
    return rebulk
 class ValidateVideoCodec(Rule):
    """
    Validate video_codec with source property or separated
    """
    priority = 64
    consequence = RemoveMatch
    def enabled(self, context):
        return not is_disabled(context, 'video_codec')
    def when(self, matches, context):
        ret = []
        for codec in matches.named('video_codec'):
            if not seps_before(codec) and \
                    not matches.at_index(codec.start - 1, lambda match: 'video-codec-prefix' in match.tags):
                ret.append(codec)
                continue
            if not seps_after(codec) and \
                    not matches.at_index(codec.end + 1, lambda match: 'video-codec-suffix' in match.tags):
                ret.append(codec)
                continue
        return ret
 class VideoProfileRule(Rule):
    """
    Rule to validate video_profile
    """
    consequence = RemoveMatch
    def enabled(self, context):
        return not is_disabled(context, 'video_profile')
    def when(self, matches, context):
        profile_list = matches.named('video_profile', lambda match: 'video_profile.rule' in match.tags)
        ret = []
        for profile in profile_list:
            codec = matches.at_span(profile.span, lambda match: match.name == 'video_codec', 0)
            if not codec:
                codec = matches.previous(profile, lambda match: match.name == 'video_codec')
            if not codec:
                codec = matches.next(profile, lambda match: match.name == 'video_codec')
            if not codec:
                ret.append(profile)
        return ret
@@ -1,108 +0,0 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 """
 Website property.
 """
 from pkg_resources import resource_stream  # @UnresolvedImport
 from rebulk.remodule import re
 from rebulk import Rebulk, Rule, RemoveMatch
 from ..common import seps
 from ..common.formatters import cleanup
 from ..common.pattern import is_disabled
 from ..common.validators import seps_surround
 from ...reutils import build_or_pattern
 def website(config):
    """
    Builder for rebulk object.
    :param config: rule configuration
    :type config: dict
    :return: Created Rebulk object
    :rtype: Rebulk
    """
    rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'website'))
    rebulk = rebulk.regex_defaults(flags=re.IGNORECASE).string_defaults(ignore_case=True)
    rebulk.defaults(name="website")
    with resource_stream('guessit', 'tlds-alpha-by-domain.txt') as tld_file:
        tlds = [
            tld.strip().decode('utf-8')
            for tld in tld_file.readlines()
            if b'--' not in tld
        ][1:]  # All registered domain extension
    safe_tlds = config['safe_tlds']  # For sure a website extension
    safe_subdomains = config['safe_subdomains']  # For sure a website subdomain
    safe_prefix = config['safe_prefixes']  # Those words before a tlds are sure
    website_prefixes = config['prefixes']
    rebulk.regex(r'(?:[^a-z0-9]|^)((?:'+build_or_pattern(safe_subdomains) +
                 r'\.)+(?:[a-z-]+\.)+(?:'+build_or_pattern(tlds) +
                 r'))(?:[^a-z0-9]|$)',
                 children=True)
    rebulk.regex(r'(?:[^a-z0-9]|^)((?:'+build_or_pattern(safe_subdomains) +
                 r'\.)*[a-z-]+\.(?:'+build_or_pattern(safe_tlds) +
                 r'))(?:[^a-z0-9]|$)',
                 safe_subdomains=safe_subdomains, safe_tlds=safe_tlds, children=True)
    rebulk.regex(r'(?:[^a-z0-9]|^)((?:'+build_or_pattern(safe_subdomains) +
                 r'\.)*[a-z-]+\.(?:'+build_or_pattern(safe_prefix) +
                 r'\.)+(?:'+build_or_pattern(tlds) +
                 r'))(?:[^a-z0-9]|$)',
                 safe_subdomains=safe_subdomains, safe_prefix=safe_prefix, tlds=tlds, children=True)
    rebulk.string(*website_prefixes,
                  validator=seps_surround, private=True, tags=['website.prefix'])
    class PreferTitleOverWebsite(Rule):
        """
        If found match is more likely a title, remove website.
        """
        consequence = RemoveMatch
        @staticmethod
        def valid_followers(match):
            """
            Validator for next website matches
            """
            return match.named('season', 'episode', 'year')
        def when(self, matches, context):
            to_remove = []
            for website_match in matches.named('website'):
                safe = False
                for safe_start in safe_subdomains + safe_prefix:
                    if website_match.value.lower().startswith(safe_start):
                        safe = True
                        break
                if not safe:
                    suffix = matches.next(website_match, PreferTitleOverWebsite.valid_followers, 0)
                    if suffix:
                        group = matches.markers.at_match(website_match, lambda marker: marker.name == 'group', 0)
                        if not group:
                            to_remove.append(website_match)
            return to_remove
    rebulk.rules(PreferTitleOverWebsite, ValidateWebsitePrefix)
    return rebulk
 class ValidateWebsitePrefix(Rule):
    """
    Validate website prefixes
    """
    priority = 64
    consequence = RemoveMatch
    def when(self, matches, context):
        to_remove = []
        for prefix in matches.tagged('website.prefix'):
            website_match = matches.next(prefix, predicate=lambda match: match.name == 'website', index=0)
            if (not website_match or
                    matches.holes(prefix.end, website_match.start,
                                  formatter=cleanup, seps=seps, predicate=lambda match: match.value)):
                to_remove.append(prefix)
        return to_remove
@@ -1,341 +0,0 @@
 # Version 2013112900, Last Updated Fri Nov 29 07:07:01 2013 UTC
 AC
 AD
 AE
 AERO
 AF
 AG
 AI
 AL
 AM
 AN
 AO
 AQ
 AR
 ARPA
 AS
 ASIA
 AT
 AU
 AW
 AX
 AZ
 BA
 BB
 BD
 BE
 BF
 BG
 BH
 BI
 BIKE
 BIZ
 BJ
 BM
 BN
 BO
 BR
 BS
 BT
 BV
 BW
 BY
 BZ
 CA
 CAMERA
 CAT
 CC
 CD
 CF
 CG
 CH
 CI
 CK
 CL
 CLOTHING
 CM
 CN
 CO
 COM
 CONSTRUCTION
 CONTRACTORS
 COOP
 CR
 CU
 CV
 CW
 CX
 CY
 CZ
 DE
 DIAMONDS
 DIRECTORY
 DJ
 DK
 DM
 DO
 DZ
 EC
 EDU
 EE
 EG
 ENTERPRISES
 EQUIPMENT
 ER
 ES
 ESTATE
 ET
 EU
 FI
 FJ
 FK
 FM
 FO
 FR
 GA
 GALLERY
 GB
 GD
 GE
 GF
 GG
 GH
 GI
 GL
 GM
 GN
 GOV
 GP
 GQ
 GR
 GRAPHICS
 GS
 GT
 GU
 GURU
 GW
 GY
 HK
 HM
 HN
 HOLDINGS
 HR
 HT
 HU
 ID
 IE
 IL
 IM
 IN
 INFO
 INT
 IO
 IQ
 IR
 IS
 IT
 JE
 JM
 JO
 JOBS
 JP
 KE
 KG
 KH
 KI
 KITCHEN
 KM
 KN
 KP
 KR
 KW
 KY
 KZ
 LA
 LAND
 LB
 LC
 LI
 LIGHTING
 LK
 LR
 LS
 LT
 LU
 LV
 LY
 MA
 MC
 MD
 ME
 MG
 MH
 MIL
 MK
 ML
 MM
 MN
 MO
 MOBI
 MP
 MQ
 MR
 MS
 MT
 MU
 MUSEUM
 MV
 MW
 MX
 MY
 MZ
 NA
 NAME
 NC
 NE
 NET
 NF
 NG
 NI
 NL
 NO
 NP
 NR
 NU
 NZ
 OM
 ORG
 PA
 PE
 PF
 PG
 PH
 PHOTOGRAPHY
 PK
 PL
 PLUMBING
 PM
 PN
 POST
 PR
 PRO
 PS
 PT
 PW
 PY
 QA
 RE
 RO
 RS
 RU
 RW
 SA
 SB
 SC
 SD
 SE
 SEXY
 SG
 SH
 SI
 SINGLES
 SJ
 SK
 SL
 SM
 SN
 SO
 SR
 ST
 SU
 SV
 SX
 SY
 SZ
 TATTOO
 TC
 TD
 TECHNOLOGY
 TEL
 TF
 TG
 TH
 TIPS
 TJ
 TK
 TL
 TM
 TN
 TO
 TODAY
 TP
 TR
 TRAVEL
 TT
 TV
 TW
 TZ
 UA
 UG
 UK
 US
 UY
 UZ
 VA
 VC
 VE
 VENTURES
 VG
 VI
 VN
 VOYAGE
 VU
 WF
 WS
 XN--3E0B707E
 XN--45BRJ9C
 XN--80AO21A
 XN--80ASEHDB
 XN--80ASWG
 XN--90A3AC
 XN--CLCHC0EA0B2G2A9GCD
 XN--FIQS8S
 XN--FIQZ9S
 XN--FPCRJ9C3D
 XN--FZC2C9E2C
 XN--GECRJ9C
 XN--H2BRJ9C
 XN--J1AMH
 XN--J6W193G
 XN--KPRW13D
 XN--KPRY57D
 XN--L1ACC
 XN--LGBBAT1AD8J
 XN--MGB9AWBF
 XN--MGBA3A4F16A
 XN--MGBAAM7A8H
 XN--MGBAYH7GPA
 XN--MGBBH1A71E
 XN--MGBC0A9AZCG
 XN--MGBERP4A5D4AR
 XN--MGBX4CD0AB
 XN--NGBC5AZD
 XN--O3CW4H
 XN--OGBPF8FL
 XN--P1AI
 XN--PGBS0DH
 XN--Q9JYB4C
 XN--S9BRJ9C
 XN--UNUP4Y
 XN--WGBH1C
 XN--WGBL6A
 XN--XKC2AL3HYE2A
 XN--XKC2DL3A5EE0H
 XN--YFRO4I67O
 XN--YGBI2AMMX
 XXX
 YE
 YT
 ZA
 ZM
 ZW
@@ -1,81 +0,0 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 """
 Options
 """
 try:
    from collections import OrderedDict
 except ImportError:  # pragma: no-cover
    from ordereddict import OrderedDict  # pylint:disable=import-error
 import babelfish
 import yaml  # pylint:disable=wrong-import-order
 from .rules.common.quantity import BitRate, FrameRate, Size
 class OrderedDictYAMLLoader(yaml.Loader):
    """
    A YAML loader that loads mappings into ordered dictionaries.
    From https://gist.github.com/enaeseth/844388
    """
    def __init__(self, *args, **kwargs):
        yaml.Loader.__init__(self, *args, **kwargs)
        self.add_constructor(u'tag:yaml.org,2002:map', type(self).construct_yaml_map)
        self.add_constructor(u'tag:yaml.org,2002:omap', type(self).construct_yaml_map)
    def construct_yaml_map(self, node):
        data = OrderedDict()
        yield data
        value = self.construct_mapping(node)
        data.update(value)
    def construct_mapping(self, node, deep=False):
        if isinstance(node, yaml.MappingNode):
            self.flatten_mapping(node)
        else:  # pragma: no cover
            raise yaml.constructor.ConstructorError(None, None,
                                                    'expected a mapping node, but found %s' % node.id, node.start_mark)
        mapping = OrderedDict()
        for key_node, value_node in node.value:
            key = self.construct_object(key_node, deep=deep)
            try:
                hash(key)
            except TypeError as exc:  # pragma: no cover
                raise yaml.constructor.ConstructorError('while constructing a mapping',
                                                        node.start_mark, 'found unacceptable key (%s)'
                                                        % exc, key_node.start_mark)
            value = self.construct_object(value_node, deep=deep)
            mapping[key] = value
        return mapping
 class CustomDumper(yaml.SafeDumper):
    """
    Custom YAML Dumper.
    """
    pass  # pylint:disable=unnecessary-pass
 def default_representer(dumper, data):
    """Default representer"""
    return dumper.represent_str(str(data))
 CustomDumper.add_representer(babelfish.Language, default_representer)
 CustomDumper.add_representer(babelfish.Country, default_representer)
 CustomDumper.add_representer(BitRate, default_representer)
 CustomDumper.add_representer(FrameRate, default_representer)
 CustomDumper.add_representer(Size, default_representer)
 def ordered_dict_representer(dumper, data):
    """OrderedDict representer"""
    return dumper.represent_mapping('tag:yaml.org,2002:map', data.items())
 CustomDumper.add_representer(OrderedDict, ordered_dict_representer)
@@ -1,10 +0,0 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 """
 Define simple search patterns in bulk to perform advanced matching on any string.
 """
 #  pylint:disable=import-self
 from .rebulk import Rebulk
 from .rules import Rule, CustomRule, AppendMatch, RemoveMatch, RenameMatch, AppendTags, RemoveTags
 from .processors import ConflictSolver, PrivateRemover, POST_PROCESS, PRE_PROCESS
 from .pattern import REGEX_AVAILABLE
@@ -1,7 +0,0 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 """
 Version module
 """
 # pragma: no cover
 __version__ = '2.0.1.dev0'
@@ -1,217 +0,0 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 """
 Base builder class for Rebulk
 """
 from abc import ABCMeta, abstractmethod
 from copy import deepcopy
 from logging import getLogger
 from six import add_metaclass
 from .loose import set_defaults
 from .pattern import RePattern, StringPattern, FunctionalPattern
 log = getLogger(__name__).log
@add_metaclass(ABCMeta)
 class Builder(object):
    """
    Base builder class for patterns
    """
    def __init__(self):
        self._defaults = {}
        self._regex_defaults = {}
        self._string_defaults = {}
        self._functional_defaults = {}
        self._chain_defaults = {}
    def reset(self):
        """
        Reset all defaults.
        :return:
        """
        self.__init__()
    def defaults(self, **kwargs):
        """
        Define default keyword arguments for all patterns
        :param kwargs:
        :type kwargs:
        :return:
        :rtype:
        """
        set_defaults(kwargs, self._defaults, override=True)
        return self
    def regex_defaults(self, **kwargs):
        """
        Define default keyword arguments for functional patterns.
        :param kwargs:
        :type kwargs:
        :return:
        :rtype:
        """
        set_defaults(kwargs, self._regex_defaults, override=True)
        return self
    def string_defaults(self, **kwargs):
        """
        Define default keyword arguments for string patterns.
        :param kwargs:
        :type kwargs:
        :return:
        :rtype:
        """
        set_defaults(kwargs, self._string_defaults, override=True)
        return self
    def functional_defaults(self, **kwargs):
        """
        Define default keyword arguments for functional patterns.
        :param kwargs:
        :type kwargs:
        :return:
        :rtype:
        """
        set_defaults(kwargs, self._functional_defaults, override=True)
        return self
    def chain_defaults(self, **kwargs):
        """
        Define default keyword arguments for patterns chain.
        :param kwargs:
        :type kwargs:
        :return:
        :rtype:
        """
        set_defaults(kwargs, self._chain_defaults, override=True)
        return self
    def build_re(self, *pattern, **kwargs):
        """
        Builds a new regular expression pattern
        :param pattern:
        :type pattern:
        :param kwargs:
        :type kwargs:
        :return:
        :rtype:
        """
        set_defaults(self._regex_defaults, kwargs)
        set_defaults(self._defaults, kwargs)
        return RePattern(*pattern, **kwargs)
    def build_string(self, *pattern, **kwargs):
        """
        Builds a new string pattern
        :param pattern:
        :type pattern:
        :param kwargs:
        :type kwargs:
        :return:
        :rtype:
        """
        set_defaults(self._string_defaults, kwargs)
        set_defaults(self._defaults, kwargs)
        return StringPattern(*pattern, **kwargs)
    def build_functional(self, *pattern, **kwargs):
        """
        Builds a new functional pattern
        :param pattern:
        :type pattern:
        :param kwargs:
        :type kwargs:
        :return:
        :rtype:
        """
        set_defaults(self._functional_defaults, kwargs)
        set_defaults(self._defaults, kwargs)
        return FunctionalPattern(*pattern, **kwargs)
    def build_chain(self, **kwargs):
        """
        Builds a new patterns chain
        :param pattern:
        :type pattern:
        :param kwargs:
        :type kwargs:
        :return:
        :rtype:
        """
        from .chain import Chain
        set_defaults(self._chain_defaults, kwargs)
        set_defaults(self._defaults, kwargs)
        chain = Chain(self, **kwargs)
        chain._defaults = deepcopy(self._defaults)  # pylint: disable=protected-access
        chain._regex_defaults = deepcopy(self._regex_defaults)  # pylint: disable=protected-access
        chain._functional_defaults = deepcopy(self._functional_defaults)  # pylint: disable=protected-access
        chain._string_defaults = deepcopy(self._string_defaults)  # pylint: disable=protected-access
        chain._chain_defaults = deepcopy(self._chain_defaults)  # pylint: disable=protected-access
        return chain
    @abstractmethod
    def pattern(self, *pattern):
        """
        Register a list of Pattern instance
        :param pattern:
        :return:
        """
        pass
    def regex(self, *pattern, **kwargs):
        """
        Add re pattern
        :param pattern:
        :type pattern:
        :return: self
        :rtype: Rebulk
        """
        return self.pattern(self.build_re(*pattern, **kwargs))
    def string(self, *pattern, **kwargs):
        """
        Add string pattern
        :param pattern:
        :type pattern:
        :return: self
        :rtype: Rebulk
        """
        return self.pattern(self.build_string(*pattern, **kwargs))
    def functional(self, *pattern, **kwargs):
        """
        Add functional pattern
        :param pattern:
        :type pattern:
        :return: self
        :rtype: Rebulk
        """
        functional = self.build_functional(*pattern, **kwargs)
        return self.pattern(functional)
    def chain(self, **kwargs):
        """
        Add patterns chain, using configuration of this rebulk
        :param pattern:
        :type pattern:
        :param kwargs:
        :type kwargs:
        :return:
        :rtype:
        """
        chain = self.build_chain(**kwargs)
        self.pattern(chain)
        return chain
@@ -1,380 +0,0 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 """
 Chain patterns and handle repetiting capture group
 """
 # pylint: disable=super-init-not-called
 import itertools
 from .builder import Builder
 from .loose import call
 from .match import Match, Matches
 from .pattern import Pattern, filter_match_kwargs, BasePattern
 from .remodule import re
 class _InvalidChainException(Exception):
    """
    Internal exception raised when a chain is not valid
    """
    pass
 class Chain(Pattern, Builder):
    """
    Definition of a pattern chain to search for.
    """
    def __init__(self, parent, chain_breaker=None, **kwargs):
        Builder.__init__(self)
        call(Pattern.__init__, self, **kwargs)
        self._kwargs = kwargs
        self._match_kwargs = filter_match_kwargs(kwargs)
        if callable(chain_breaker):
            self.chain_breaker = chain_breaker
        else:
            self.chain_breaker = None
        self.parent = parent
        self.parts = []
    def pattern(self, *pattern):
        """
        :param pattern:
        :return:
        """
        if not pattern:
            raise ValueError("One pattern should be given to the chain")
        if len(pattern) > 1:
            raise ValueError("Only one pattern can be given to the chain")
        part = ChainPart(self, pattern[0])
        self.parts.append(part)
        return part
    def close(self):
        """
        Deeply close the chain
        :return: Rebulk instance
        """
        parent = self.parent
        while isinstance(parent, Chain):
            parent = parent.parent
        return parent
    def _match(self, pattern, input_string, context=None):
        # pylint: disable=too-many-locals,too-many-nested-blocks
        chain_matches = []
        chain_input_string = input_string
        offset = 0
        while offset < len(input_string):
            chain_found = False
            current_chain_matches = []
            valid_chain = True
            for chain_part in self.parts:
                try:
                    chain_part_matches, raw_chain_part_matches = chain_part.matches(chain_input_string,
                                                                                    context,
                                                                                    with_raw_matches=True)
                    chain_found, chain_input_string, offset = \
                        self._to_next_chain_part(chain_part, chain_part_matches, raw_chain_part_matches, chain_found,
                                                 input_string, chain_input_string, offset, current_chain_matches)
                except _InvalidChainException:
                    valid_chain = False
                    if current_chain_matches:
                        offset = current_chain_matches[0].raw_end
                    break
            if not chain_found:
                break
            if current_chain_matches and valid_chain:
                match = self._build_chain_match(current_chain_matches, input_string)
                chain_matches.append(match)
        return chain_matches
    def _to_next_chain_part(self, chain_part, chain_part_matches, raw_chain_part_matches, chain_found,
                            input_string, chain_input_string, offset, current_chain_matches):
        Chain._fix_matches_offset(chain_part_matches, input_string, offset)
        Chain._fix_matches_offset(raw_chain_part_matches, input_string, offset)
        if raw_chain_part_matches:
            grouped_matches_dict = self._group_by_match_index(chain_part_matches)
            grouped_raw_matches_dict = self._group_by_match_index(raw_chain_part_matches)
            for match_index, grouped_raw_matches in grouped_raw_matches_dict.items():
                chain_found = True
                offset = grouped_raw_matches[-1].raw_end
                chain_input_string = input_string[offset:]
                if not chain_part.is_hidden:
                    grouped_matches = grouped_matches_dict.get(match_index, [])
                    if self._chain_breaker_eval(current_chain_matches + grouped_matches):
                        current_chain_matches.extend(grouped_matches)
        return chain_found, chain_input_string, offset
    def _process_match(self, match, match_index, child=False):
        """
        Handle a match
        :param match:
        :type match:
        :param match_index:
        :type match_index:
        :param child:
        :type child:
        :return:
        :rtype:
        """
        # pylint: disable=too-many-locals
        ret = super(Chain, self)._process_match(match, match_index, child=child)
        if ret:
            return True
        if match.children:
            last_pattern = match.children[-1].pattern
            last_pattern_groups = self._group_by_match_index(
                [child_ for child_ in match.children if child_.pattern == last_pattern]
            )
            if last_pattern_groups:
                original_children = Matches(match.children)
                original_end = match.end
                for index in reversed(list(last_pattern_groups)):
                    last_matches = last_pattern_groups[index]
                    for last_match in last_matches:
                        match.children.remove(last_match)
                    match.end = match.children[-1].end if match.children else match.start
                    ret = super(Chain, self)._process_match(match, match_index, child=child)
                    if ret:
                        return True
                match.children = original_children
                match.end = original_end
        return False
    def _build_chain_match(self, current_chain_matches, input_string):
        start = None
        end = None
        for match in current_chain_matches:
            if start is None or start > match.start:
                start = match.start
            if end is None or end < match.end:
                end = match.end
        match = call(Match, start, end, pattern=self, input_string=input_string, **self._match_kwargs)
        for chain_match in current_chain_matches:
            if chain_match.children:
                for child in chain_match.children:
                    match.children.append(child)
            if chain_match not in match.children:
                match.children.append(chain_match)
                chain_match.parent = match
        return match
    def _chain_breaker_eval(self, matches):
        return not self.chain_breaker or not self.chain_breaker(Matches(matches))
    @staticmethod
    def _fix_matches_offset(chain_part_matches, input_string, offset):
        for chain_part_match in chain_part_matches:
            if chain_part_match.input_string != input_string:
                chain_part_match.input_string = input_string
                chain_part_match.end += offset
                chain_part_match.start += offset
            if chain_part_match.children:
                Chain._fix_matches_offset(chain_part_match.children, input_string, offset)
    @staticmethod
    def _group_by_match_index(matches):
        grouped_matches_dict = dict()
        for match_index, match in itertools.groupby(matches, lambda m: m.match_index):
            grouped_matches_dict[match_index] = list(match)
        return grouped_matches_dict
    @property
    def match_options(self):
        return {}
    @property
    def patterns(self):
        return [self]
    def __repr__(self):
        defined = ""
        if self.defined_at:
            defined = "@%s" % (self.defined_at,)
        return "<%s%s:%s>" % (self.__class__.__name__, defined, self.parts)
 class ChainPart(BasePattern):
    """
    Part of a pattern chain.
    """
    def __init__(self, chain, pattern):
        self._chain = chain
        self.pattern = pattern
        self.repeater_start = 1
        self.repeater_end = 1
        self._hidden = False
    @property
    def _is_chain_start(self):
        return self._chain.parts[0] == self
    def matches(self, input_string, context=None, with_raw_matches=False):
        matches, raw_matches = self.pattern.matches(input_string, context=context, with_raw_matches=True)
        matches = self._truncate_repeater(matches, input_string)
        raw_matches = self._truncate_repeater(raw_matches, input_string)
        self._validate_repeater(raw_matches)
        if with_raw_matches:
            return matches, raw_matches
        return matches
    def _truncate_repeater(self, matches, input_string):
        if not matches:
            return matches
        if not self._is_chain_start:
            separator = input_string[0:matches[0].initiator.raw_start]
            if separator:
                return []
        j = 1
        for i in range(0, len(matches) - 1):
            separator = input_string[matches[i].initiator.raw_end:
                                     matches[i + 1].initiator.raw_start]
            if separator:
                break
            j += 1
        truncated = matches[:j]
        if self.repeater_end is not None:
            truncated = [m for m in truncated if m.match_index < self.repeater_end]
        return truncated
    def _validate_repeater(self, matches):
        max_match_index = -1
        if matches:
            max_match_index = max([m.match_index for m in matches])
        if max_match_index + 1 < self.repeater_start:
            raise _InvalidChainException
    def chain(self):
        """
        Add patterns chain, using configuration from this chain
        :return:
        :rtype:
        """
        return self._chain.chain()
    def hidden(self, hidden=True):
        """
        Hide chain part results from global chain result
        :param hidden:
        :type hidden:
        :return:
        :rtype:
        """
        self._hidden = hidden
        return self
    @property
    def is_hidden(self):
        """
        Check if the chain part is hidden
        :return:
        :rtype:
        """
        return self._hidden
    def regex(self, *pattern, **kwargs):
        """
        Add re pattern
        :param pattern:
        :type pattern:
        :param kwargs:
        :type kwargs:
        :return:
        :rtype:
        """
        return self._chain.regex(*pattern, **kwargs)
    def functional(self, *pattern, **kwargs):
        """
        Add functional pattern
        :param pattern:
        :type pattern:
        :param kwargs:
        :type kwargs:
        :return:
        :rtype:
        """
        return self._chain.functional(*pattern, **kwargs)
    def string(self, *pattern, **kwargs):
        """
        Add string pattern
        :param pattern:
        :type pattern:
        :param kwargs:
        :type kwargs:
        :return:
        :rtype:
        """
        return self._chain.string(*pattern, **kwargs)
    def close(self):
        """
        Close the chain builder to continue registering other patterns
        :return:
        :rtype:
        """
        return self._chain.close()
    def repeater(self, value):
        """
        Define the repeater of the current chain part.
        :param value:
        :type value:
        :return:
        :rtype:
        """
        try:
            value = int(value)
            self.repeater_start = value
            self.repeater_end = value
            return self
        except ValueError:
            pass
        if value == '+':
            self.repeater_start = 1
            self.repeater_end = None
        if value == '*':
            self.repeater_start = 0
            self.repeater_end = None
        elif value == '?':
            self.repeater_start = 0
            self.repeater_end = 1
        else:
            match = re.match(r'\{\s*(\d*)\s*,?\s*(\d*)\s*\}', value)
            if match:
                start = match.group(1)
                end = match.group(2)
                if start or end:
                    self.repeater_start = int(start) if start else 0
                    self.repeater_end = int(end) if end else None
        return self
    def __repr__(self):
        return "%s({%s,%s})" % (self.pattern, self.repeater_start, self.repeater_end)
@@ -1,56 +0,0 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 """
 Debug tools.
 Can be configured by changing values of those variable.
 DEBUG = False
 Enable this variable to activate debug features (like defined_at parameters). It can slow down Rebulk
 LOG_LEVEL = 0
 Default log level of generated rebulk logs.
 """
 import inspect
 import logging
 import os
 from collections import namedtuple
 DEBUG = False
 LOG_LEVEL = logging.DEBUG
 class Frame(namedtuple('Frame', ['lineno', 'package', 'name', 'filename'])):
    """
    Stack frame representation.
    """
    __slots__ = ()
    def __repr__(self):
        return "%s#L%s" % (os.path.basename(self.filename), self.lineno)
 def defined_at():
    """
    Get definition location of a pattern or a match (outside of rebulk package).
    :return:
    :rtype:
    """
    if DEBUG:
        frame = inspect.currentframe()
        while frame:
            try:
                if frame.f_globals['__package__'] != __package__:
                    break
            except KeyError:  # pragma:no cover
                # If package is missing, consider we are in. Workaround for python 3.3.
                break
            frame = frame.f_back
        ret = Frame(frame.f_lineno,
                    frame.f_globals.get('__package__'),
                    frame.f_globals.get('__name__'),
                    frame.f_code.co_filename)
        del frame
        return ret
@@ -1,33 +0,0 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 """
 Formatter functions to use in patterns.
 All those function have last argument as match.value (str).
 """
 def formatters(*chained_formatters):
    """
    Chain formatter functions.
    :param chained_formatters:
    :type chained_formatters:
    :return:
    :rtype:
    """
    def formatters_chain(input_string):  # pylint:disable=missing-docstring
        for chained_formatter in chained_formatters:
            input_string = chained_formatter(input_string)
        return input_string
    return formatters_chain
 def default_formatter(input_string):
    """
    Default formatter
    :param input_string:
    :return:
    """
    return input_string
@@ -1,127 +0,0 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 """
 Introspect rebulk object to retrieve capabilities.
 """
 from abc import ABCMeta, abstractmethod
 from collections import defaultdict
 import six
 from .pattern import StringPattern, RePattern, FunctionalPattern
 from .utils import extend_safe
@six.add_metaclass(ABCMeta)
 class Description(object):
    """
    Abstract class for a description.
    """
    @property
    @abstractmethod
    def properties(self):  # pragma: no cover
        """
        Properties of described object.
        :return: all properties that described object can generate grouped by name.
        :rtype: dict
        """
        pass
 class PatternDescription(Description):
    """
    Description of a pattern.
    """
    def __init__(self, pattern):  # pylint:disable=too-many-branches
        self.pattern = pattern
        self._properties = defaultdict(list)
        if pattern.properties:
            for key, values in pattern.properties.items():
                extend_safe(self._properties[key], values)
        elif 'value' in pattern.match_options:
            self._properties[pattern.name].append(pattern.match_options['value'])
        elif isinstance(pattern, StringPattern):
            extend_safe(self._properties[pattern.name], pattern.patterns)
        elif isinstance(pattern, RePattern):
            if pattern.name and pattern.name not in pattern.private_names:
                extend_safe(self._properties[pattern.name], [None])
            if not pattern.private_children:
                for regex_pattern in pattern.patterns:
                    for group_name, values in regex_pattern.groupindex.items():
                        if group_name not in pattern.private_names:
                            extend_safe(self._properties[group_name], [None])
        elif isinstance(pattern, FunctionalPattern):
            if pattern.name and pattern.name not in pattern.private_names:
                extend_safe(self._properties[pattern.name], [None])
    @property
    def properties(self):
        """
        Properties for this rule.
        :return:
        :rtype: dict
        """
        return self._properties
 class RuleDescription(Description):
    """
    Description of a rule.
    """
    def __init__(self, rule):
        self.rule = rule
        self._properties = defaultdict(list)
        if rule.properties:
            for key, values in rule.properties.items():
                extend_safe(self._properties[key], values)
    @property
    def properties(self):
        """
        Properties for this rule.
        :return:
        :rtype: dict
        """
        return self._properties
 class Introspection(Description):
    """
    Introspection results.
    """
    def __init__(self, rebulk, context=None):
        self.patterns = [PatternDescription(pattern) for pattern in rebulk.effective_patterns(context)
                         if not pattern.private and not pattern.marker]
        self.rules = [RuleDescription(rule) for rule in rebulk.effective_rules(context)]
    @property
    def properties(self):
        """
        Properties for Introspection results.
        :return:
        :rtype:
        """
        properties = defaultdict(list)
        for pattern in self.patterns:
            for key, values in pattern.properties.items():
                extend_safe(properties[key], values)
        for rule in self.rules:
            for key, values in rule.properties.items():
                extend_safe(properties[key], values)
        return properties
 def introspect(rebulk, context=None):
    """
    Introspect a Rebulk instance to grab defined objects and properties that can be generated.
    :param rebulk:
    :type rebulk: Rebulk
    :param context:
    :type context:
    :return: Introspection instance
    :rtype: Introspection
    """
    return Introspection(rebulk, context)
@@ -1,242 +0,0 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 """
 Various utilities functions
 """
 import sys
 from inspect import isclass
 try:
    from inspect import getfullargspec as getargspec
    _fullargspec_supported = True
 except ImportError:
    _fullargspec_supported = False
    from inspect import getargspec
 from .utils import is_iterable
 if sys.version_info < (3, 4, 0):  # pragma: no cover
    def _constructor(class_):
        """
        Retrieves constructor from given class
        :param class_:
        :type class_: class
        :return: constructor from given class
        :rtype: callable
        """
        return class_.__init__
 else:  # pragma: no cover
    def _constructor(class_):
        """
        Retrieves constructor from given class
        :param class_:
        :type class_: class
        :return: constructor from given class
        :rtype: callable
        """
        return class_
 def call(function, *args, **kwargs):
    """
    Call a function or constructor with given args and kwargs after removing args and kwargs that doesn't match
    function or constructor signature
    :param function: Function or constructor to call
    :type function: callable
    :param args:
    :type args:
    :param kwargs:
    :type kwargs:
    :return: sale vakye as default function call
    :rtype: object
    """
    func = constructor_args if isclass(function) else function_args
    call_args, call_kwargs = func(function, *args, **kwargs)
    return function(*call_args, **call_kwargs)
 def function_args(callable_, *args, **kwargs):
    """
    Return (args, kwargs) matching the function signature
    :param callable: callable to inspect
    :type callable: callable
    :param args:
    :type args:
    :param kwargs:
    :type kwargs:
    :return: (args, kwargs) matching the function signature
    :rtype: tuple
    """
    argspec = getargspec(callable_)  # pylint:disable=deprecated-method
    return argspec_args(argspec, False, *args, **kwargs)
 def constructor_args(class_, *args, **kwargs):
    """
    Return (args, kwargs) matching the function signature
    :param callable: callable to inspect
    :type callable: Callable
    :param args:
    :type args:
    :param kwargs:
    :type kwargs:
    :return: (args, kwargs) matching the function signature
    :rtype: tuple
    """
    argspec = getargspec(_constructor(class_))  # pylint:disable=deprecated-method
    return argspec_args(argspec, True, *args, **kwargs)
 def argspec_args(argspec, constructor, *args, **kwargs):
    """
    Return (args, kwargs) matching the argspec object
    :param argspec: argspec to use
    :type argspec: argspec
    :param constructor: is it a constructor ?
    :type constructor: bool
    :param args:
    :type args:
    :param kwargs:
    :type kwargs:
    :return: (args, kwargs) matching the function signature
    :rtype: tuple
    """
    if argspec.varkw:
        call_kwarg = kwargs
    else:
        call_kwarg = dict((k, kwargs[k]) for k in kwargs if k in argspec.args)  # Python 2.6 dict comprehension
    if argspec.varargs:
        call_args = args
    else:
        call_args = args[:len(argspec.args) - (1 if constructor else 0)]
    return call_args, call_kwarg
 if not _fullargspec_supported:
    def argspec_args_legacy(argspec, constructor, *args, **kwargs):
        """
        Return (args, kwargs) matching the argspec object
        :param argspec: argspec to use
        :type argspec: argspec
        :param constructor: is it a constructor ?
        :type constructor: bool
        :param args:
        :type args:
        :param kwargs:
        :type kwargs:
        :return: (args, kwargs) matching the function signature
        :rtype: tuple
        """
        if argspec.keywords:
            call_kwarg = kwargs
        else:
            call_kwarg = dict((k, kwargs[k]) for k in kwargs if k in argspec.args)  # Python 2.6 dict comprehension
        if argspec.varargs:
            call_args = args
        else:
            call_args = args[:len(argspec.args) - (1 if constructor else 0)]
        return call_args, call_kwarg
    argspec_args = argspec_args_legacy
 def ensure_list(param):
    """
    Retrieves a list from given parameter.
    :param param:
    :type param:
    :return:
    :rtype:
    """
    if not param:
        param = []
    elif not is_iterable(param):
        param = [param]
    return param
 def ensure_dict(param, default_value, default_key=None):
    """
    Retrieves a dict and a default value from given parameter.
    if parameter is not a dict, it will be promoted as the default value.
    :param param:
    :type param:
    :param default_value:
    :type default_value:
    :param default_key:
    :type default_key:
    :return:
    :rtype:
    """
    if not param:
        param = default_value
    if not isinstance(param, dict):
        if param:
            default_value = param
        return {default_key: param}, default_value
    return param, default_value
 def filter_index(collection, predicate=None, index=None):
    """
    Filter collection with predicate function and index.
    If index is not found, returns None.
    :param collection:
    :type collection: collection supporting iteration and slicing
    :param predicate: function to filter the collection with
    :type predicate: function
    :param index: position of a single element to retrieve
    :type index: int
    :return: filtered list, or single element of filtered list if index is defined
    :rtype: list or object
    """
    if index is None and isinstance(predicate, int):
        index = predicate
        predicate = None
    if predicate:
        collection = collection.__class__(filter(predicate, collection))
    if index is not None:
        try:
            collection = collection[index]
        except IndexError:
            collection = None
    return collection
 def set_defaults(defaults, kwargs, override=False):
    """
    Set defaults from defaults dict to kwargs dict
    :param override:
    :type override:
    :param defaults:
    :type defaults:
    :param kwargs:
    :type kwargs:
    :return:
    :rtype:
    """
    if 'clear' in defaults.keys() and defaults.pop('clear'):
        kwargs.clear()
    for key, value in defaults.items():
        if key in kwargs:
            if isinstance(value, list) and isinstance(kwargs[key], list):
                kwargs[key] = list(value) + kwargs[key]
            elif isinstance(value, dict) and isinstance(kwargs[key], dict):
                set_defaults(value, kwargs[key])
        if key not in kwargs or override:
            kwargs[key] = value
@@ -1,890 +0,0 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 """
 Classes and functions related to matches
 """
 import copy
 import itertools
 from collections import defaultdict
 try:
    from collections.abc import MutableSequence
 except ImportError:
    from collections import MutableSequence
 try:
    from collections import OrderedDict  # pylint:disable=ungrouped-imports
 except ImportError:  # pragma: no cover
    from ordereddict import OrderedDict  # pylint:disable=import-error
 import six
 from .loose import ensure_list, filter_index
 from .utils import is_iterable
 from .debug import defined_at
 class MatchesDict(OrderedDict):
    """
    A custom dict with matches property.
    """
    def __init__(self):
        super(MatchesDict, self).__init__()
        self.matches = defaultdict(list)
        self.values_list = defaultdict(list)
 class _BaseMatches(MutableSequence):
    """
    A custom list[Match] that automatically maintains name, tag, start and end lookup structures.
    """
    _base = list
    _base_add = _base.append
    _base_remove = _base.remove
    _base_extend = _base.extend
    def __init__(self, matches=None, input_string=None):  # pylint: disable=super-init-not-called
        self.input_string = input_string
        self._max_end = 0
        self._delegate = []
        self.__name_dict = None
        self.__tag_dict = None
        self.__start_dict = None
        self.__end_dict = None
        self.__index_dict = None
        if matches:
            self.extend(matches)
    @property
    def _name_dict(self):
        if self.__name_dict is None:
            self.__name_dict = defaultdict(_BaseMatches._base)
            for name, values in itertools.groupby([m for m in self._delegate if m.name], lambda item: item.name):
                _BaseMatches._base_extend(self.__name_dict[name], values)
        return self.__name_dict
    @property
    def _start_dict(self):
        if self.__start_dict is None:
            self.__start_dict = defaultdict(_BaseMatches._base)
            for start, values in itertools.groupby([m for m in self._delegate], lambda item: item.start):
                _BaseMatches._base_extend(self.__start_dict[start], values)
        return self.__start_dict
    @property
    def _end_dict(self):
        if self.__end_dict is None:
            self.__end_dict = defaultdict(_BaseMatches._base)
            for start, values in itertools.groupby([m for m in self._delegate], lambda item: item.end):
                _BaseMatches._base_extend(self.__end_dict[start], values)
        return self.__end_dict
    @property
    def _tag_dict(self):
        if self.__tag_dict is None:
            self.__tag_dict = defaultdict(_BaseMatches._base)
            for match in self._delegate:
                for tag in match.tags:
                    _BaseMatches._base_add(self.__tag_dict[tag], match)
        return self.__tag_dict
    @property
    def _index_dict(self):
        if self.__index_dict is None:
            self.__index_dict = defaultdict(_BaseMatches._base)
            for match in self._delegate:
                for index in range(*match.span):
                    _BaseMatches._base_add(self.__index_dict[index], match)
        return self.__index_dict
    def _add_match(self, match):
        """
        Add a match
        :param match:
        :type match: Match
        """
        if self.__name_dict is not None:
            if match.name:
                _BaseMatches._base_add(self._name_dict[match.name], (match))
        if self.__tag_dict is not None:
            for tag in match.tags:
                _BaseMatches._base_add(self._tag_dict[tag], match)
        if self.__start_dict is not None:
            _BaseMatches._base_add(self._start_dict[match.start], match)
        if self.__end_dict is not None:
            _BaseMatches._base_add(self._end_dict[match.end], match)
        if self.__index_dict is not None:
            for index in range(*match.span):
                _BaseMatches._base_add(self._index_dict[index], match)
        if match.end > self._max_end:
            self._max_end = match.end
    def _remove_match(self, match):
        """
        Remove a match
        :param match:
        :type match: Match
        """
        if self.__name_dict is not None:
            if match.name:
                _BaseMatches._base_remove(self._name_dict[match.name], match)
        if self.__tag_dict is not None:
            for tag in match.tags:
                _BaseMatches._base_remove(self._tag_dict[tag], match)
        if self.__start_dict is not None:
            _BaseMatches._base_remove(self._start_dict[match.start], match)
        if self.__end_dict is not None:
            _BaseMatches._base_remove(self._end_dict[match.end], match)
        if self.__index_dict is not None:
            for index in range(*match.span):
                _BaseMatches._base_remove(self._index_dict[index], match)
        if match.end >= self._max_end and not self._end_dict[match.end]:
            self._max_end = max(self._end_dict.keys())
    def previous(self, match, predicate=None, index=None):
        """
        Retrieves the nearest previous matches.
        :param match:
        :type match:
        :param predicate:
        :type predicate:
        :param index:
        :type index: int
        :return:
        :rtype:
        """
        current = match.start
        while current > -1:
            previous_matches = self.ending(current)
            if previous_matches:
                return filter_index(previous_matches, predicate, index)
            current -= 1
        return filter_index(_BaseMatches._base(), predicate, index)
    def next(self, match, predicate=None, index=None):
        """
        Retrieves the nearest next matches.
        :param match:
        :type match:
        :param predicate:
        :type predicate:
        :param index:
        :type index: int
        :return:
        :rtype:
        """
        current = match.start + 1
        while current <= self._max_end:
            next_matches = self.starting(current)
            if next_matches:
                return filter_index(next_matches, predicate, index)
            current += 1
        return filter_index(_BaseMatches._base(), predicate, index)
    def named(self, name, predicate=None, index=None):
        """
        Retrieves a set of Match objects that have the given name.
        :param name:
        :type name: str
        :param predicate:
        :type predicate:
        :param index:
        :type index: int
        :return: set of matches
        :rtype: set[Match]
        """
        return filter_index(_BaseMatches._base(self._name_dict[name]), predicate, index)
    def tagged(self, tag, predicate=None, index=None):
        """
        Retrieves a set of Match objects that have the given tag defined.
        :param tag:
        :type tag: str
        :param predicate:
        :type predicate:
        :param index:
        :type index: int
        :return: set of matches
        :rtype: set[Match]
        """
        return filter_index(_BaseMatches._base(self._tag_dict[tag]), predicate, index)
    def starting(self, start, predicate=None, index=None):
        """
        Retrieves a set of Match objects that starts at given index.
        :param start: the starting index
        :type start: int
        :param predicate:
        :type predicate:
        :param index:
        :type index: int
        :return: set of matches
        :rtype: set[Match]
        """
        return filter_index(_BaseMatches._base(self._start_dict[start]), predicate, index)
    def ending(self, end, predicate=None, index=None):
        """
        Retrieves a set of Match objects that ends at given index.
        :param end: the ending index
        :type end: int
        :param predicate:
        :type predicate:
        :return: set of matches
        :rtype: set[Match]
        """
        return filter_index(_BaseMatches._base(self._end_dict[end]), predicate, index)
    def range(self, start=0, end=None, predicate=None, index=None):
        """
        Retrieves a set of Match objects that are available in given range, sorted from start to end.
        :param start: the starting index
        :type start: int
        :param end: the ending index
        :type end: int
        :param predicate:
        :type predicate:
        :param index:
        :type index: int
        :return: set of matches
        :rtype: set[Match]
        """
        if end is None:
            end = self.max_end
        else:
            end = min(self.max_end, end)
        ret = _BaseMatches._base()
        for match in sorted(self):
            if match.start < end and match.end > start:
                ret.append(match)
        return filter_index(ret, predicate, index)
    def chain_before(self, position, seps, start=0, predicate=None, index=None):
        """
        Retrieves a list of chained matches, before position, matching predicate and separated by characters from seps
        only.
        :param position:
        :type position:
        :param seps:
        :type seps:
        :param start:
        :type start:
        :param predicate:
        :type predicate:
        :param index:
        :type index:
        :return:
        :rtype:
        """
        if hasattr(position, 'start'):
            position = position.start
        chain = _BaseMatches._base()
        position = min(self.max_end, position)
        for i in reversed(range(start, position)):
            index_matches = self.at_index(i)
            filtered_matches = [index_match for index_match in index_matches if not predicate or predicate(index_match)]
            if filtered_matches:
                for chain_match in filtered_matches:
                    if chain_match not in chain:
                        chain.append(chain_match)
            elif self.input_string[i] not in seps:
                break
        return filter_index(chain, predicate, index)
    def chain_after(self, position, seps, end=None, predicate=None, index=None):
        """
        Retrieves a list of chained matches, after position, matching predicate and separated by characters from seps
        only.
        :param position:
        :type position:
        :param seps:
        :type seps:
        :param end:
        :type end:
        :param predicate:
        :type predicate:
        :param index:
        :type index:
        :return:
        :rtype:
        """
        if hasattr(position, 'end'):
            position = position.end
        chain = _BaseMatches._base()
        if end is None:
            end = self.max_end
        else:
            end = min(self.max_end, end)
        for i in range(position, end):
            index_matches = self.at_index(i)
            filtered_matches = [index_match for index_match in index_matches if not predicate or predicate(index_match)]
            if filtered_matches:
                for chain_match in filtered_matches:
                    if chain_match not in chain:
                        chain.append(chain_match)
            elif self.input_string[i] not in seps:
                break
        return filter_index(chain, predicate, index)
    @property
    def max_end(self):
        """
        Retrieves the maximum index.
        :return:
        """
        return max(len(self.input_string), self._max_end) if self.input_string else self._max_end
    def _hole_start(self, position, ignore=None):
        """
        Retrieves the start of hole index from position.
        :param position:
        :type position:
        :param ignore:
        :type ignore:
        :return:
        :rtype:
        """
        for lindex in reversed(range(0, position)):
            for starting in self.starting(lindex):
                if not ignore or not ignore(starting):
                    return lindex
        return 0
    def _hole_end(self, position, ignore=None):
        """
        Retrieves the end of hole index from position.
        :param position:
        :type position:
        :param ignore:
        :type ignore:
        :return:
        :rtype:
        """
        for rindex in range(position, self.max_end):
            for starting in self.starting(rindex):
                if not ignore or not ignore(starting):
                    return rindex
        return self.max_end
    def holes(self, start=0, end=None, formatter=None, ignore=None, seps=None, predicate=None,
              index=None):  # pylint: disable=too-many-branches,too-many-locals
        """
        Retrieves a set of Match objects that are not defined in given range.
        :param start:
        :type start:
        :param end:
        :type end:
        :param formatter:
        :type formatter:
        :param ignore:
        :type ignore:
        :param seps:
        :type seps:
        :param predicate:
        :type predicate:
        :param index:
        :type index:
        :return:
        :rtype:
        """
        assert self.input_string if seps else True, "input_string must be defined when using seps parameter"
        if end is None:
            end = self.max_end
        else:
            end = min(self.max_end, end)
        ret = _BaseMatches._base()
        hole = False
        rindex = start
        loop_start = self._hole_start(start, ignore)
        for rindex in range(loop_start, end):
            current = []
            for at_index in self.at_index(rindex):
                if not ignore or not ignore(at_index):
                    current.append(at_index)
            if seps and hole and self.input_string and self.input_string[rindex] in seps:
                hole = False
                ret[-1].end = rindex
            else:
                if not current and not hole:
                    # Open a new hole match
                    hole = True
                    ret.append(Match(max(rindex, start), None, input_string=self.input_string, formatter=formatter))
                elif current and hole:
                    # Close current hole match
                    hole = False
                    ret[-1].end = rindex
        if ret and hole:
            # go the the next starting element ...
            ret[-1].end = min(self._hole_end(rindex, ignore), end)
        return filter_index(ret, predicate, index)
    def conflicting(self, match, predicate=None, index=None):
        """
        Retrieves a list of ``Match`` objects that conflicts with given match.
        :param match:
        :type match:
        :param predicate:
        :type predicate:
        :param index:
        :type index:
        :return:
        :rtype:
        """
        ret = _BaseMatches._base()
        for i in range(*match.span):
            for at_match in self.at_index(i):
                if at_match not in ret:
                    ret.append(at_match)
        ret.remove(match)
        return filter_index(ret, predicate, index)
    def at_match(self, match, predicate=None, index=None):
        """
        Retrieves a list of matches from given match.
        """
        return self.at_span(match.span, predicate, index)
    def at_span(self, span, predicate=None, index=None):
        """
        Retrieves a list of matches from given (start, end) tuple.
        """
        starting = self._index_dict[span[0]]
        ending = self._index_dict[span[1] - 1]
        merged = list(starting)
        for marker in ending:
            if marker not in merged:
                merged.append(marker)
        return filter_index(merged, predicate, index)
    def at_index(self, pos, predicate=None, index=None):
        """
        Retrieves a list of matches from given position
        """
        return filter_index(self._index_dict[pos], predicate, index)
    @property
    def names(self):
        """
        Retrieve all names.
        :return:
        """
        return self._name_dict.keys()
    @property
    def tags(self):
        """
        Retrieve all tags.
        :return:
        """
        return self._tag_dict.keys()
    def to_dict(self, details=False, first_value=False, enforce_list=False):
        """
        Converts matches to a dict object.
        :param details if True, values will be complete Match object, else it will be only string Match.value property
        :type details: bool
        :param first_value if True, only the first value will be kept. Else, multiple values will be set as a list in
        the dict.
        :type first_value: bool
        :param enforce_list: if True, value is wrapped in a list even when a single value is found. Else, list values
        are available under `values_list` property of the returned dict object.
        :type enforce_list: bool
        :return:
        :rtype: dict
        """
        ret = MatchesDict()
        for match in sorted(self):
            value = match if details else match.value
            ret.matches[match.name].append(match)
            if not enforce_list and value not in ret.values_list[match.name]:
                ret.values_list[match.name].append(value)
            if match.name in ret.keys():
                if not first_value:
                    if not isinstance(ret[match.name], list):
                        if ret[match.name] == value:
                            continue
                        ret[match.name] = [ret[match.name]]
                    else:
                        if value in ret[match.name]:
                            continue
                    ret[match.name].append(value)
            else:
                if enforce_list and not isinstance(value, list):
                    ret[match.name] = [value]
                else:
                    ret[match.name] = value
        return ret
    if six.PY2:  # pragma: no cover
        def clear(self):
            """
            Python 3 backport
            """
            del self[:]
    def __len__(self):
        return len(self._delegate)
    def __getitem__(self, index):
        ret = self._delegate[index]
        if isinstance(ret, list):
            return Matches(ret)
        return ret
    def __setitem__(self, index, match):
        self._delegate[index] = match
        if isinstance(index, slice):
            for match_item in match:
                self._add_match(match_item)
            return
        self._add_match(match)
    def __delitem__(self, index):
        match = self._delegate[index]
        del self._delegate[index]
        if isinstance(match, list):
            # if index is a slice, we has a match list
            for match_item in match:
                self._remove_match(match_item)
        else:
            self._remove_match(match)
    def __repr__(self):
        return self._delegate.__repr__()
    def insert(self, index, value):
        self._delegate.insert(index, value)
        self._add_match(value)
 class Matches(_BaseMatches):
    """
    A custom list[Match] contains matches list.
    """
    def __init__(self, matches=None, input_string=None):
        self.markers = Markers(input_string=input_string)
        super(Matches, self).__init__(matches=matches, input_string=input_string)
    def _add_match(self, match):
        assert not match.marker, "A marker match should not be added to <Matches> object"
        super(Matches, self)._add_match(match)
 class Markers(_BaseMatches):
    """
    A custom list[Match] containing markers list.
    """
    def __init__(self, matches=None, input_string=None):
        super(Markers, self).__init__(matches=None, input_string=input_string)
    def _add_match(self, match):
        assert match.marker, "A non-marker match should not be added to <Markers> object"
        super(Markers, self)._add_match(match)
 class Match(object):
    """
    Object storing values related to a single match
    """
    def __init__(self, start, end, value=None, name=None, tags=None, marker=None, parent=None, private=None,
                 pattern=None, input_string=None, formatter=None, conflict_solver=None, **kwargs):
        # pylint: disable=unused-argument
        self.start = start
        self.end = end
        self.name = name
        self._value = value
        self.tags = ensure_list(tags)
        self.marker = marker
        self.parent = parent
        self.input_string = input_string
        self.formatter = formatter
        self.pattern = pattern
        self.private = private
        self.conflict_solver = conflict_solver
        self._children = None
        self._raw_start = None
        self._raw_end = None
        self.defined_at = pattern.defined_at if pattern else defined_at()
    @property
    def span(self):
        """
        2-tuple with start and end indices of the match
        """
        return self.start, self.end
    @property
    def children(self):
        """
        Children matches.
        """
        if self._children is None:
            self._children = Matches(None, self.input_string)
        return self._children
    @children.setter
    def children(self, value):
        self._children = value
    @property
    def value(self):
        """
        Get the value of the match, using formatter if defined.
        :return:
        :rtype:
        """
        if self._value:
            return self._value
        if self.formatter:
            return self.formatter(self.raw)
        return self.raw
    @value.setter
    def value(self, value):
        """
        Set the value (hardcode)
        :param value:
        :type value:
        :return:
        :rtype:
        """
        self._value = value  # pylint: disable=attribute-defined-outside-init
    @property
    def names(self):
        """
        Get all names of children
        :return:
        :rtype:
        """
        if not self.children:
            return set([self.name])
        ret = set()
        for child in self.children:
            for name in child.names:
                ret.add(name)
        return ret
    @property
    def raw_start(self):
        """
        start index of raw value
        :return:
        :rtype:
        """
        if self._raw_start is None:
            return self.start
        return self._raw_start
    @raw_start.setter
    def raw_start(self, value):
        """
        Set start index of raw value
        :return:
        :rtype:
        """
        self._raw_start = value
    @property
    def raw_end(self):
        """
        end index of raw value
        :return:
        :rtype:
        """
        if self._raw_end is None:
            return self.end
        return self._raw_end
    @raw_end.setter
    def raw_end(self, value):
        """
        Set end index of raw value
        :return:
        :rtype:
        """
        self._raw_end = value
    @property
    def raw(self):
        """
        Get the raw value of the match, without using hardcoded value nor formatter.
        :return:
        :rtype:
        """
        if self.input_string:
            return self.input_string[self.raw_start:self.raw_end]
        return None
    @property
    def initiator(self):
        """
        Retrieve the initiator parent of a match
        :param match:
        :type match:
        :return:
        :rtype:
        """
        match = self
        while match.parent:
            match = match.parent
        return match
    def crop(self, crops, predicate=None, index=None):
        """
        crop the match with given Match objects or spans tuples
        :param crops:
        :type crops: list or object
        :return: a list of Match objects
        :rtype: list[Match]
        """
        if not is_iterable(crops) or len(crops) == 2 and isinstance(crops[0], int):
            crops = [crops]
        initial = copy.deepcopy(self)
        ret = [initial]
        for crop in crops:
            if hasattr(crop, 'span'):
                start, end = crop.span
            else:
                start, end = crop
            for current in list(ret):
                if start <= current.start and end >= current.end:
                    # self is included in crop, remove current ...
                    ret.remove(current)
                elif start >= current.start and end <= current.end:
                    # crop is included in self, split current ...
                    right = copy.deepcopy(current)
                    current.end = start
                    if not current:
                        ret.remove(current)
                    right.start = end
                    if right:
                        ret.append(right)
                elif current.end >= end > current.start:
                    current.start = end
                elif current.start <= start < current.end:
                    current.end = start
        return filter_index(ret, predicate, index)
    def split(self, seps, predicate=None, index=None):
        """
        Split this match in multiple matches using given separators.
        :param seps:
        :type seps: string containing separator characters
        :return: list of new Match objects
        :rtype: list
        """
        split_match = copy.deepcopy(self)
        current_match = split_match
        ret = []
        for i in range(0, len(self.raw)):
            if self.raw[i] in seps:
                if not split_match:
                    split_match = copy.deepcopy(current_match)
                    current_match.end = self.start + i
            else:
                if split_match:
                    split_match.start = self.start + i
                    current_match = split_match
                    ret.append(split_match)
                    split_match = None
        return filter_index(ret, predicate, index)
    def tagged(self, *tags):
        """
        Check if this match has at least one of the provided tags
        :param tags:
        :return: True if at least one tag is defined, False otherwise.
        """
        return any(tag in self.tags for tag in tags)
    def named(self, *names):
        """
        Check if one of the children match has one of the provided name
        :param names:
        :return: True if at least one child is named with a given name is defined, False otherwise.
        """
        return any(name in self.names for name in names)
    def __len__(self):
        return self.end - self.start
    def __hash__(self):
        return hash(Match) + hash(self.start) + hash(self.end) + hash(self.value)
    def __eq__(self, other):
        if isinstance(other, Match):
            return self.span == other.span and self.value == other.value and self.name == other.name and \
                   self.parent == other.parent
        return NotImplemented
    def __ne__(self, other):
        if isinstance(other, Match):
            return self.span != other.span or self.value != other.value or self.name != other.name or \
                   self.parent != other.parent
        return NotImplemented
    def __lt__(self, other):
        if isinstance(other, Match):
            return self.span < other.span
        return NotImplemented
    def __gt__(self, other):
        if isinstance(other, Match):
            return self.span > other.span
        return NotImplemented
    def __le__(self, other):
        if isinstance(other, Match):
            return self.span <= other.span
        return NotImplemented
    def __ge__(self, other):
        if isinstance(other, Match):
            return self.span >= other.span
        return NotImplemented
    def __repr__(self):
        flags = ""
        name = ""
        tags = ""
        defined = ""
        initiator = ""
        if self.initiator.value != self.value:
            initiator = "+initiator=" + self.initiator.value
        if self.private:
            flags += '+private'
        if self.name:
            name = "+name=%s" % (self.name,)
        if self.tags:
            tags = "+tags=%s" % (self.tags,)
        if self.defined_at:
            defined += "@%s" % (self.defined_at,)
        return "<%s:%s%s%s%s%s%s>" % (self.value, self.span, flags, name, tags, initiator, defined)
@@ -1,559 +0,0 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 """
 Abstract pattern class definition along with various implementations (regexp, string, functional)
 """
 # pylint: disable=super-init-not-called,wrong-import-position
 from abc import ABCMeta, abstractmethod, abstractproperty
 import six
 from . import debug
 from .formatters import default_formatter
 from .loose import call, ensure_list, ensure_dict
 from .match import Match
 from .remodule import re, REGEX_AVAILABLE
 from .utils import find_all, is_iterable, get_first_defined
 from .validators import allways_true
@six.add_metaclass(ABCMeta)
 class BasePattern(object):
    """
    Base class for Pattern like objects
    """
    @abstractmethod
    def matches(self, input_string, context=None, with_raw_matches=False):
        """
        Computes all matches for a given input
        :param input_string: the string to parse
        :type input_string: str
        :param context: the context
        :type context: dict
        :param with_raw_matches: should return details
        :type with_raw_matches: dict
        :return: matches based on input_string for this pattern
        :rtype: iterator[Match]
        """
        pass
@six.add_metaclass(ABCMeta)
 class Pattern(BasePattern):
    """
    Definition of a particular pattern to search for.
    """
    def __init__(self, name=None, tags=None, formatter=None, value=None, validator=None, children=False, every=False,
                 private_parent=False, private_children=False, private=False, private_names=None, ignore_names=None,
                 marker=False, format_all=False, validate_all=False, disabled=lambda context: False, log_level=None,
                 properties=None, post_processor=None, pre_match_processor=None, post_match_processor=None, **kwargs):
        """
        :param name: Name of this pattern
        :type name: str
        :param tags: List of tags related to this pattern
        :type tags: list[str]
        :param formatter: dict (name, func) of formatter to use with this pattern. name is the match name to support,
        and func a function(input_string) that returns the formatted string. A single formatter function can also be
        passed as a shortcut for {None: formatter}. The returned formatted string with be set in Match.value property.
        :type formatter: dict[str, func] || func
        :param value: dict (name, value) of value to use with this pattern. name is the match name to support,
        and value an object for the match value. A single object value can also be
        passed as a shortcut for {None: value}. The value with be set in Match.value property.
        :type value: dict[str, object] || object
        :param validator: dict (name, func) of validator to use with this pattern. name is the match name to support,
        and func a function(match) that returns the a boolean. A single validator function can also be
        passed as a shortcut for {None: validator}. If return value is False, match will be ignored.
        :param children: generates children instead of parent
        :type children: bool
        :param every: generates both parent and children.
        :type every: bool
        :param private: flag this pattern as beeing private.
        :type private: bool
        :param private_parent: force return of parent and flag parent matches as private.
        :type private_parent: bool
        :param private_children: force return of children and flag children matches as private.
        :type private_children: bool
        :param private_names: force return of named matches as private.
        :type private_names: bool
        :param ignore_names: drop some named matches after validation.
        :type ignore_names: bool
        :param marker: flag this pattern as beeing a marker.
        :type private: bool
        :param format_all if True, pattern will format every match in the hierarchy (even match not yield).
        :type format_all: bool
        :param validate_all if True, pattern will validate every match in the hierarchy (even match not yield).
        :type validate_all: bool
        :param disabled: if True, this pattern is disabled. Can also be a function(context).
        :type disabled: bool|function
        :param log_lvl: Log level associated to this pattern
        :type log_lvl: int
        :param post_processor: Post processing function
        :type post_processor: func
        :param pre_match_processor: Pre match processing function
        :type pre_match_processor: func
        :param post_match_processor: Post match processing function
        :type post_match_processor: func
        """
        # pylint:disable=too-many-locals,unused-argument
        self.name = name
        self.tags = ensure_list(tags)
        self.formatters, self._default_formatter = ensure_dict(formatter, default_formatter)
        self.values, self._default_value = ensure_dict(value, None)
        self.validators, self._default_validator = ensure_dict(validator, allways_true)
        self.every = every
        self.children = children
        self.private = private
        self.private_names = private_names if private_names else []
        self.ignore_names = ignore_names if ignore_names else []
        self.private_parent = private_parent
        self.private_children = private_children
        self.marker = marker
        self.format_all = format_all
        self.validate_all = validate_all
        if not callable(disabled):
            self.disabled = lambda context: disabled
        else:
            self.disabled = disabled
        self._log_level = log_level
        self._properties = properties
        self.defined_at = debug.defined_at()
        if not callable(post_processor):
            self.post_processor = None
        else:
            self.post_processor = post_processor
        if not callable(pre_match_processor):
            self.pre_match_processor = None
        else:
            self.pre_match_processor = pre_match_processor
        if not callable(post_match_processor):
            self.post_match_processor = None
        else:
            self.post_match_processor = post_match_processor
    @property
    def log_level(self):
        """
        Log level for this pattern.
        :return:
        :rtype:
        """
        return self._log_level if self._log_level is not None else debug.LOG_LEVEL
    def matches(self, input_string, context=None, with_raw_matches=False):
        """
        Computes all matches for a given input
        :param input_string: the string to parse
        :type input_string: str
        :param context: the context
        :type context: dict
        :param with_raw_matches: should return details
        :type with_raw_matches: dict
        :return: matches based on input_string for this pattern
        :rtype: iterator[Match]
        """
        # pylint: disable=too-many-branches
        matches = []
        raw_matches = []
        for pattern in self.patterns:
            match_index = 0
            for match in self._match(pattern, input_string, context):
                raw_matches.append(match)
                matches.extend(self._process_matches(match, match_index))
                match_index += 1
        matches = self._post_process_matches(matches)
        if with_raw_matches:
            return matches, raw_matches
        return matches
    @property
    def _should_include_children(self):
        """
        Check if children matches from this pattern should be included in matches results.
        :param match:
        :type match:
        :return:
        :rtype:
        """
        return self.children or self.every
    @property
    def _should_include_parent(self):
        """
        Check is a match from this pattern should be included in matches results.
        :param match:
        :type match:
        :return:
        :rtype:
        """
        return not self.children or self.every
    @staticmethod
    def _match_config_property_keys(match, child=False):
        if match.name:
            yield match.name
        if child:
            yield '__children__'
        else:
            yield '__parent__'
        yield None
    @staticmethod
    def _process_match_index(match, match_index):
        """
        Process match index from this pattern process state.
        :param match:
        :return:
        """
        match.match_index = match_index
    def _process_match_private(self, match, child=False):
        """
        Process match privacy from this pattern configuration.
        :param match:
        :param child:
        :return:
        """
        if match.name and match.name in self.private_names or \
                not child and self.private_parent or \
                child and self.private_children:
            match.private = True
    def _process_match_value(self, match, child=False):
        """
        Process match value from this pattern configuration.
        :param match:
        :return:
        """
        keys = self._match_config_property_keys(match, child=child)
        pattern_value = get_first_defined(self.values, keys, self._default_value)
        if pattern_value:
            match.value = pattern_value
    def _process_match_formatter(self, match, child=False):
        """
        Process match formatter from this pattern configuration.
        :param match:
        :return:
        """
        included = self._should_include_children if child else self._should_include_parent
        if included or self.format_all:
            keys = self._match_config_property_keys(match, child=child)
            match.formatter = get_first_defined(self.formatters, keys, self._default_formatter)
    def _process_match_validator(self, match, child=False):
        """
        Process match validation from this pattern configuration.
        :param match:
        :return: True if match is validated by the configured validator, False otherwise.
        """
        included = self._should_include_children if child else self._should_include_parent
        if included or self.validate_all:
            keys = self._match_config_property_keys(match, child=child)
            validator = get_first_defined(self.validators, keys, self._default_validator)
            if validator and not validator(match):
                return False
        return True
    def _process_match(self, match, match_index, child=False):
        """
        Process match from this pattern by setting all properties from defined configuration
        (index, private, value, formatter, validator, ...).
        :param match:
        :type match:
        :return: True if match is validated by the configured validator, False otherwise.
        :rtype:
        """
        self._process_match_index(match, match_index)
        self._process_match_private(match, child)
        self._process_match_value(match, child)
        self._process_match_formatter(match, child)
        return self._process_match_validator(match, child)
    @staticmethod
    def _process_match_processor(match, processor):
        if processor:
            ret = processor(match)
            if ret is not None:
                return ret
        return match
    def _process_matches(self, match, match_index):
        """
        Process and generate all matches for the given unprocessed match.
        :param match:
        :param match_index:
        :return: Process and dispatched matches.
        """
        match = self._process_match_processor(match, self.pre_match_processor)
        if not match:
            return
        if not self._process_match(match, match_index):
            return
        for child in match.children:
            if not self._process_match(child, match_index, child=True):
                return
        match = self._process_match_processor(match, self.post_match_processor)
        if not match:
            return
        if (self._should_include_parent or self.private_parent) and match.name not in self.ignore_names:
            yield match
        if self._should_include_children or self.private_children:
            children = [x for x in match.children if x.name not in self.ignore_names]
            for child in children:
                yield child
    def _post_process_matches(self, matches):
        """
        Post process matches with user defined function
        :param matches:
        :type matches:
        :return:
        :rtype:
        """
        if self.post_processor:
            return self.post_processor(matches, self)
        return matches
    @abstractproperty
    def patterns(self):  # pragma: no cover
        """
        List of base patterns defined
        :return: A list of base patterns
        :rtype: list
        """
        pass
    @property
    def properties(self):
        """
        Properties names and values that can ben retrieved by this pattern.
        :return:
        :rtype:
        """
        if self._properties:
            return self._properties
        return {}
    @abstractproperty
    def match_options(self):  # pragma: no cover
        """
        dict of default options for generated Match objects
        :return: **options to pass to Match constructor
        :rtype: dict
        """
        pass
    @abstractmethod
    def _match(self, pattern, input_string, context=None):  # pragma: no cover
        """
        Computes all unprocess matches for a given pattern and input.
        :param pattern: the pattern to use
        :param input_string: the string to parse
        :type input_string: str
        :param context: the context
        :type context: dict
        :return: matches based on input_string for this pattern
        :rtype: iterator[Match]
        """
        pass
    def __repr__(self):
        defined = ""
        if self.defined_at:
            defined = "@%s" % (self.defined_at,)
        return "<%s%s:%s>" % (self.__class__.__name__, defined, self.__repr__patterns__)
    @property
    def __repr__patterns__(self):
        return self.patterns
 class StringPattern(Pattern):
    """
    Definition of one or many strings to search for.
    """
    def __init__(self, *patterns, **kwargs):
        super(StringPattern, self).__init__(**kwargs)
        self._patterns = patterns
        self._kwargs = kwargs
        self._match_kwargs = filter_match_kwargs(kwargs)
    @property
    def patterns(self):
        return self._patterns
    @property
    def match_options(self):
        return self._match_kwargs
    def _match(self, pattern, input_string, context=None):
        for index in find_all(input_string, pattern, **self._kwargs):
            match = Match(index, index + len(pattern), pattern=self, input_string=input_string, **self._match_kwargs)
            if match:
                yield match
 class RePattern(Pattern):
    """
    Definition of one or many regular expression pattern to search for.
    """
    def __init__(self, *patterns, **kwargs):
        super(RePattern, self).__init__(**kwargs)
        self.repeated_captures = REGEX_AVAILABLE
        if 'repeated_captures' in kwargs:
            self.repeated_captures = kwargs.get('repeated_captures')
        if self.repeated_captures and not REGEX_AVAILABLE:  # pragma: no cover
            raise NotImplementedError("repeated_capture is available only with regex module.")
        self.abbreviations = kwargs.get('abbreviations', [])
        self._kwargs = kwargs
        self._match_kwargs = filter_match_kwargs(kwargs)
        self._children_match_kwargs = filter_match_kwargs(kwargs, children=True)
        self._patterns = []
        for pattern in patterns:
            if isinstance(pattern, six.string_types):
                if self.abbreviations and pattern:
                    for key, replacement in self.abbreviations:
                        pattern = pattern.replace(key, replacement)
                pattern = call(re.compile, pattern, **self._kwargs)
            elif isinstance(pattern, dict):
                if self.abbreviations and 'pattern' in pattern:
                    for key, replacement in self.abbreviations:
                        pattern['pattern'] = pattern['pattern'].replace(key, replacement)
                pattern = re.compile(**pattern)
            elif hasattr(pattern, '__iter__'):
                pattern = re.compile(*pattern)
            self._patterns.append(pattern)
    @property
    def patterns(self):
        return self._patterns
    @property
    def __repr__patterns__(self):
        return [pattern.pattern for pattern in self.patterns]
    @property
    def match_options(self):
        return self._match_kwargs
    def _match(self, pattern, input_string, context=None):
        names = dict((v, k) for k, v in pattern.groupindex.items())
        for match_object in pattern.finditer(input_string):
            start = match_object.start()
            end = match_object.end()
            main_match = Match(start, end, pattern=self, input_string=input_string, **self._match_kwargs)
            if pattern.groups:
                for i in range(1, pattern.groups + 1):
                    name = names.get(i, main_match.name)
                    if self.repeated_captures:
                        for start, end in match_object.spans(i):
                            child_match = Match(start, end, name=name, parent=main_match, pattern=self,
                                                input_string=input_string, **self._children_match_kwargs)
                            if child_match:
                                main_match.children.append(child_match)
                    else:
                        start, end = match_object.span(i)
                        if start > -1 and end > -1:
                            child_match = Match(start, end, name=name, parent=main_match, pattern=self,
                                                input_string=input_string, **self._children_match_kwargs)
                            if child_match:
                                main_match.children.append(child_match)
            if main_match:
                yield main_match
 class FunctionalPattern(Pattern):
    """
    Definition of one or many functional pattern to search for.
    """
    def __init__(self, *patterns, **kwargs):
        super(FunctionalPattern, self).__init__(**kwargs)
        self._patterns = patterns
        self._kwargs = kwargs
        self._match_kwargs = filter_match_kwargs(kwargs)
    @property
    def patterns(self):
        return self._patterns
    @property
    def match_options(self):
        return self._match_kwargs
    def _match(self, pattern, input_string, context=None):
        ret = call(pattern, input_string, context, **self._kwargs)
        if ret:
            if not is_iterable(ret) or isinstance(ret, dict) \
                    or (is_iterable(ret) and hasattr(ret, '__getitem__') and isinstance(ret[0], int)):
                args_iterable = [ret]
            else:
                args_iterable = ret
            for args in args_iterable:
                if isinstance(args, dict):
                    options = args
                    options.pop('input_string', None)
                    options.pop('pattern', None)
                    if self._match_kwargs:
                        options = self._match_kwargs.copy()
                        options.update(args)
                    match = Match(pattern=self, input_string=input_string, **options)
                    if match:
                        yield match
                else:
                    kwargs = self._match_kwargs
                    if isinstance(args[-1], dict):
                        kwargs = dict(kwargs)
                        kwargs.update(args[-1])
                        args = args[:-1]
                    match = Match(*args, pattern=self, input_string=input_string, **kwargs)
                    if match:
                        yield match
 def filter_match_kwargs(kwargs, children=False):
    """
    Filters out kwargs for Match construction
    :param kwargs:
    :type kwargs: dict
    :param children:
    :type children: Flag to filter children matches
    :return: A filtered dict
    :rtype: dict
    """
    kwargs = kwargs.copy()
    for key in ('pattern', 'start', 'end', 'parent', 'formatter', 'value'):
        if key in kwargs:
            del kwargs[key]
    if children:
        for key in ('name',):
            if key in kwargs:
                del kwargs[key]
    return kwargs
@@ -1,107 +0,0 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 """
 Processor functions
 """
 from logging import getLogger
 from .utils import IdentitySet
 from .rules import Rule, RemoveMatch
 log = getLogger(__name__).log
 DEFAULT = '__default__'
 POST_PROCESS = -2048
 PRE_PROCESS = 2048
 def _default_conflict_solver(match, conflicting_match):
    """
    Default conflict solver for matches, shorter matches if they conflicts with longer ones
    :param conflicting_match:
    :type conflicting_match:
    :param match:
    :type match:
    :return:
    :rtype:
    """
    if len(conflicting_match.initiator) < len(match.initiator):
        return conflicting_match
    if len(match.initiator) < len(conflicting_match.initiator):
        return match
    return None
 class ConflictSolver(Rule):
    """
    Remove conflicting matches.
    """
    priority = PRE_PROCESS
    consequence = RemoveMatch
    @property
    def default_conflict_solver(self):  # pylint:disable=no-self-use
        """
        Default conflict solver to use.
        """
        return _default_conflict_solver
    def when(self, matches, context):
        # pylint:disable=too-many-nested-blocks
        to_remove_matches = IdentitySet()
        public_matches = [match for match in matches if not match.private]
        public_matches.sort(key=len)
        for match in public_matches:
            conflicting_matches = matches.conflicting(match)
            if conflicting_matches:
                # keep the match only if it's the longest
                conflicting_matches = [conflicting_match for conflicting_match in conflicting_matches if
                                       not conflicting_match.private]
                conflicting_matches.sort(key=len)
                for conflicting_match in conflicting_matches:
                    conflict_solvers = [(self.default_conflict_solver, False)]
                    if match.conflict_solver:
                        conflict_solvers.append((match.conflict_solver, False))
                    if conflicting_match.conflict_solver:
                        conflict_solvers.append((conflicting_match.conflict_solver, True))
                    for conflict_solver, reverse in reversed(conflict_solvers):
                        if reverse:
                            to_remove = conflict_solver(conflicting_match, match)
                        else:
                            to_remove = conflict_solver(match, conflicting_match)
                        if to_remove == DEFAULT:
                            continue
                        if to_remove and to_remove not in to_remove_matches:
                            both_matches = [match, conflicting_match]
                            both_matches.remove(to_remove)
                            to_keep = both_matches[0]
                            if to_keep not in to_remove_matches:
                                log(self.log_level, "Conflicting match %s will be removed in favor of match %s",
                                    to_remove, to_keep)
                                to_remove_matches.add(to_remove)
                        break
        return to_remove_matches
 class PrivateRemover(Rule):
    """
    Removes private matches rule.
    """
    priority = POST_PROCESS
    consequence = RemoveMatch
    def when(self, matches, context):
        return [match for match in matches if match.private]
@@ -1,190 +0,0 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 """
 Entry point functions and classes for Rebulk
 """
 from logging import getLogger
 from .builder import Builder
 from .match import Matches
 from .processors import ConflictSolver, PrivateRemover
 from .rules import Rules
 from .utils import extend_safe
 log = getLogger(__name__).log
 class Rebulk(Builder):
    r"""
    Regular expression, string and function based patterns are declared in a ``Rebulk`` object. It use a fluent API to
    chain ``string``, ``regex``, and ``functional`` methods to define various patterns types.
    .. code-block:: python
        >>> from rebulk import Rebulk
        >>> bulk = Rebulk().string('brown').regex(r'qu\w+').functional(lambda s: (20, 25))
    When ``Rebulk`` object is fully configured, you can call ``matches`` method with an input string to retrieve all
    ``Match`` objects found by registered pattern.
    .. code-block:: python
        >>> bulk.matches("The quick brown fox jumps over the lazy dog")
        [<brown:(10, 15)>, <quick:(4, 9)>, <jumps:(20, 25)>]
    If multiple ``Match`` objects are found at the same position, only the longer one is kept.
    .. code-block:: python
        >>> bulk = Rebulk().string('lakers').string('la')
        >>> bulk.matches("the lakers are from la")
        [<lakers:(4, 10)>, <la:(20, 22)>]
    """
    # pylint:disable=protected-access
    def __init__(self, disabled=lambda context: False, default_rules=True):
        """
        Creates a new Rebulk object.
        :param disabled: if True, this pattern is disabled. Can also be a function(context).
        :type disabled: bool|function
        :param default_rules: use default rules
        :type default_rules:
        :return:
        :rtype:
        """
        super(Rebulk, self).__init__()
        if not callable(disabled):
            self.disabled = lambda context: disabled
        else:
            self.disabled = disabled
        self._patterns = []
        self._rules = Rules()
        if default_rules:
            self.rules(ConflictSolver, PrivateRemover)
        self._rebulks = []
    def pattern(self, *pattern):
        """
        Add patterns objects
        :param pattern:
        :type pattern: rebulk.pattern.Pattern
        :return: self
        :rtype: Rebulk
        """
        self._patterns.extend(pattern)
        return self
    def rules(self, *rules):
        """
        Add rules as a module, class or instance.
        :param rules:
        :type rules: list[Rule]
        :return:
        """
        self._rules.load(*rules)
        return self
    def rebulk(self, *rebulks):
        """
        Add a children rebulk object
        :param rebulks:
        :type rebulks: Rebulk
        :return:
        """
        self._rebulks.extend(rebulks)
        return self
    def matches(self, string, context=None):
        """
        Search for all matches with current configuration against input_string
        :param string: string to search into
        :type string: str
        :param context: context to use
        :type context: dict
        :return: A custom list of matches
        :rtype: Matches
        """
        matches = Matches(input_string=string)
        if context is None:
            context = {}
        self._matches_patterns(matches, context)
        self._execute_rules(matches, context)
        return matches
    def effective_rules(self, context=None):
        """
        Get effective rules for this rebulk object and its children.
        :param context:
        :type context:
        :return:
        :rtype:
        """
        rules = Rules()
        rules.extend(self._rules)
        for rebulk in self._rebulks:
            if not rebulk.disabled(context):
                extend_safe(rules, rebulk._rules)
        return rules
    def _execute_rules(self, matches, context):
        """
        Execute rules for this rebulk and children.
        :param matches:
        :type matches:
        :param context:
        :type context:
        :return:
        :rtype:
        """
        if not self.disabled(context):
            rules = self.effective_rules(context)
            rules.execute_all_rules(matches, context)
    def effective_patterns(self, context=None):
        """
        Get effective patterns for this rebulk object and its children.
        :param context:
        :type context:
        :return:
        :rtype:
        """
        patterns = list(self._patterns)
        for rebulk in self._rebulks:
            if not rebulk.disabled(context):
                extend_safe(patterns, rebulk._patterns)
        return patterns
    def _matches_patterns(self, matches, context):
        """
        Search for all matches with current paterns agains input_string
        :param matches: matches list
        :type matches: Matches
        :param context: context to use
        :type context: dict
        :return:
        :rtype:
        """
        if not self.disabled(context):
            patterns = self.effective_patterns(context)
            for pattern in patterns:
                if not pattern.disabled(context):
                    pattern_matches = pattern.matches(matches.input_string, context)
                    if pattern_matches:
                        log(pattern.log_level, "Pattern has %s match(es). (%s)", len(pattern_matches), pattern)
                    else:
                        pass
                        # log(pattern.log_level, "Pattern doesn't match. (%s)" % (pattern,))
                    for match in pattern_matches:
                        if match.marker:
                            log(pattern.log_level, "Marker found. (%s)", match)
                            matches.markers.append(match)
                        else:
                            log(pattern.log_level, "Match found. (%s)", match)
                            matches.append(match)
                else:
                    log(pattern.log_level, "Pattern is disabled. (%s)", pattern)
@@ -1,17 +0,0 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 """
 Uniform re module
 """
 # pylint: disable-all
 import os
 REGEX_AVAILABLE = False
 if os.environ.get('REGEX_DISABLED') in ["1", "true", "True", "Y"]:
    import re
 else:
    try:
        import regex as re
        REGEX_AVAILABLE = True
    except ImportError:
        import re
@@ -1,373 +0,0 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 """
 Abstract rule class definition and rule engine implementation
 """
 from abc import ABCMeta, abstractmethod
 import inspect
 from itertools import groupby
 from logging import getLogger
 import six
 from .utils import is_iterable
 from .toposort import toposort
 from . import debug
 log = getLogger(__name__).log
@six.add_metaclass(ABCMeta)
 class Consequence(object):
    """
    Definition of a consequence to apply.
    """
    @abstractmethod
    def then(self, matches, when_response, context):  # pragma: no cover
        """
        Action implementation.
        :param matches:
        :type matches: rebulk.match.Matches
        :param context:
        :type context:
        :param when_response: return object from when call.
        :type when_response: object
        :return: True if the action was runned, False if it wasn't.
        :rtype: bool
        """
        pass
@six.add_metaclass(ABCMeta)
 class Condition(object):
    """
    Definition of a condition to check.
    """
    @abstractmethod
    def when(self, matches, context):  # pragma: no cover
        """
        Condition implementation.
        :param matches:
        :type matches: rebulk.match.Matches
        :param context:
        :type context:
        :return: truthy if rule should be triggered and execute then action, falsy if it should not.
        :rtype: object
        """
        pass
@six.add_metaclass(ABCMeta)
 class CustomRule(Condition, Consequence):
    """
    Definition of a rule to apply
    """
    # pylint: disable=no-self-use, unused-argument, abstract-method
    priority = 0
    name = None
    dependency = None
    properties = {}
    def __init__(self, log_level=None):
        self.defined_at = debug.defined_at()
        if log_level is None and not hasattr(self, 'log_level'):
            self.log_level = debug.LOG_LEVEL
    def enabled(self, context):
        """
        Disable rule.
        :param context:
        :type context:
        :return: True if rule is enabled, False if disabled
        :rtype: bool
        """
        return True
    def __lt__(self, other):
        return self.priority > other.priority
    def __repr__(self):
        defined = ""
        if self.defined_at:
            defined = "@%s" % (self.defined_at,)
        return "<%s%s>" % (self.name if self.name else self.__class__.__name__, defined)
    def __eq__(self, other):
        return self.__class__ == other.__class__
    def __hash__(self):
        return hash(self.__class__)
 class Rule(CustomRule):
    """
    Definition of a rule to apply
    """
    # pylint:disable=abstract-method
    consequence = None
    def then(self, matches, when_response, context):
        assert self.consequence
        if is_iterable(self.consequence):
            if not is_iterable(when_response):
                when_response = [when_response]
            iterator = iter(when_response)
            for cons in self.consequence:  #pylint: disable=not-an-iterable
                if inspect.isclass(cons):
                    cons = cons()
                cons.then(matches, next(iterator), context)
        else:
            cons = self.consequence
            if inspect.isclass(cons):
                cons = cons()  # pylint:disable=not-callable
            cons.then(matches, when_response, context)
 class RemoveMatch(Consequence):  # pylint: disable=abstract-method
    """
    Remove matches returned by then
    """
    def then(self, matches, when_response, context):
        if is_iterable(when_response):
            ret = []
            when_response = list(when_response)
            for match in when_response:
                if match in matches:
                    matches.remove(match)
                    ret.append(match)
            return ret
        if when_response in matches:
            matches.remove(when_response)
            return when_response
 class AppendMatch(Consequence):  # pylint: disable=abstract-method
    """
    Append matches returned by then
    """
    def __init__(self, match_name=None):
        self.match_name = match_name
    def then(self, matches, when_response, context):
        if is_iterable(when_response):
            ret = []
            when_response = list(when_response)
            for match in when_response:
                if match not in matches:
                    if self.match_name:
                        match.name = self.match_name
                    matches.append(match)
                    ret.append(match)
            return ret
        if self.match_name:
            when_response.name = self.match_name
        if when_response not in matches:
            matches.append(when_response)
            return when_response
 class RenameMatch(Consequence):  # pylint: disable=abstract-method
    """
    Rename matches returned by then
    """
    def __init__(self, match_name):
        self.match_name = match_name
        self.remove = RemoveMatch()
        self.append = AppendMatch()
    def then(self, matches, when_response, context):
        removed = self.remove.then(matches, when_response, context)
        if is_iterable(removed):
            removed = list(removed)
            for match in removed:
                match.name = self.match_name
        elif removed:
            removed.name = self.match_name
        if removed:
            self.append.then(matches, removed, context)
 class AppendTags(Consequence):  # pylint: disable=abstract-method
    """
    Add tags to returned matches
    """
    def __init__(self, tags):
        self.tags = tags
        self.remove = RemoveMatch()
        self.append = AppendMatch()
    def then(self, matches, when_response, context):
        removed = self.remove.then(matches, when_response, context)
        if is_iterable(removed):
            removed = list(removed)
            for match in removed:
                match.tags.extend(self.tags)
        elif removed:
            removed.tags.extend(self.tags)  # pylint: disable=no-member
        if removed:
            self.append.then(matches, removed, context)
 class RemoveTags(Consequence):  # pylint: disable=abstract-method
    """
    Remove tags from returned matches
    """
    def __init__(self, tags):
        self.tags = tags
        self.remove = RemoveMatch()
        self.append = AppendMatch()
    def then(self, matches, when_response, context):
        removed = self.remove.then(matches, when_response, context)
        if is_iterable(removed):
            removed = list(removed)
            for match in removed:
                for tag in self.tags:
                    if tag in match.tags:
                        match.tags.remove(tag)
        elif removed:
            for tag in self.tags:
                if tag in removed.tags:  # pylint: disable=no-member
                    removed.tags.remove(tag)  # pylint: disable=no-member
        if removed:
            self.append.then(matches, removed, context)
 class Rules(list):
    """
    list of rules ready to execute.
    """
    def __init__(self, *rules):
        super(Rules, self).__init__()
        self.load(*rules)
    def load(self, *rules):
        """
        Load rules from a Rule module, class or instance
        :param rules:
        :type rules:
        :return:
        :rtype:
        """
        for rule in rules:
            if inspect.ismodule(rule):
                self.load_module(rule)
            elif inspect.isclass(rule):
                self.load_class(rule)
            else:
                self.append(rule)
    def load_module(self, module):
        """
        Load a rules module
        :param module:
        :type module:
        :return:
        :rtype:
        """
        # pylint: disable=unused-variable
        for name, obj in inspect.getmembers(module,
                                            lambda member: hasattr(member, '__module__')
                                            and member.__module__ == module.__name__
                                            and inspect.isclass):
            self.load_class(obj)
    def load_class(self, class_):
        """
        Load a Rule class.
        :param class_:
        :type class_:
        :return:
        :rtype:
        """
        self.append(class_())
    def execute_all_rules(self, matches, context):
        """
        Execute all rules from this rules list. All when condition with same priority will be performed before
        calling then actions.
        :param matches:
        :type matches:
        :param context:
        :type context:
        :return:
        :rtype:
        """
        ret = []
        for priority, priority_rules in groupby(sorted(self), lambda rule: rule.priority):
            sorted_rules = toposort_rules(list(priority_rules))  # Group by dependency graph toposort
            for rules_group in sorted_rules:
                rules_group = list(sorted(rules_group, key=self.index))  # Sort rules group based on initial ordering.
                group_log_level = None
                for rule in rules_group:
                    if group_log_level is None or group_log_level < rule.log_level:
                        group_log_level = rule.log_level
                log(group_log_level, "%s independent rule(s) at priority %s.", len(rules_group), priority)
                for rule in rules_group:
                    when_response = execute_rule(rule, matches, context)
                    if when_response is not None:
                        ret.append((rule, when_response))
        return ret
 def execute_rule(rule, matches, context):
    """
    Execute the given rule.
    :param rule:
    :type rule:
    :param matches:
    :type matches:
    :param context:
    :type context:
    :return:
    :rtype:
    """
    if rule.enabled(context):
        log(rule.log_level, "Checking rule condition: %s", rule)
        when_response = rule.when(matches, context)
        if when_response:
            log(rule.log_level, "Rule was triggered: %s", when_response)
            log(rule.log_level, "Running rule consequence: %s %s", rule, when_response)
            rule.then(matches, when_response, context)
            return when_response
    else:
        log(rule.log_level, "Rule is disabled: %s", rule)
 def toposort_rules(rules):
    """
    Sort given rules using toposort with dependency parameter.
    :param rules:
    :type rules:
    :return:
    :rtype:
    """
    graph = {}
    class_dict = {}
    for rule in rules:
        if rule.__class__ in class_dict:
            raise ValueError("Duplicate class rules are not allowed: %s" % rule.__class__)
        class_dict[rule.__class__] = rule
    for rule in rules:
        if not is_iterable(rule.dependency) and rule.dependency:
            rule_dependencies = [rule.dependency]
        else:
            rule_dependencies = rule.dependency
        dependencies = set()
        if rule_dependencies:
            for dependency in rule_dependencies:
                if inspect.isclass(dependency):
                    dependency = class_dict.get(dependency)
                if dependency:
                    dependencies.add(dependency)
        graph[rule] = dependencies
    return toposort(graph)
@@ -1,84 +0,0 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 # Copyright 2014 True Blade Systems, Inc.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 # http://www.apache.org/licenses/LICENSE-2.0
 #
 # Original:
 #   - https://bitbucket.org/ericvsmith/toposort (1.4)
 # Modifications:
 #   - merged Pull request #2 for CyclicDependency error
 #   - import reduce as original name
 #   - support python 2.6 dict comprehension
 # pylint: skip-file
 from functools import reduce
 class CyclicDependency(ValueError):
    def __init__(self, cyclic):
        s = 'Cyclic dependencies exist among these items: {0}'.format(', '.join(repr(x) for x in cyclic.items()))
        super(CyclicDependency, self).__init__(s)
        self.cyclic = cyclic
 def toposort(data):
    """
    Dependencies are expressed as a dictionary whose keys are items
    and whose values are a set of dependent items. Output is a list of
    sets in topological order. The first set consists of items with no
    dependences, each subsequent set consists of items that depend upon
    items in the preceeding sets.
    :param data:
    :type data:
    :return:
    :rtype:
    """
    # Special case empty input.
    if len(data) == 0:
        return
    # Copy the input so as to leave it unmodified.
    data = data.copy()
    # Ignore self dependencies.
    for k, v in data.items():
        v.discard(k)
    # Find all items that don't depend on anything.
    extra_items_in_deps = reduce(set.union, data.values()) - set(data.keys())
    # Add empty dependences where needed.
    data.update(dict((item, set()) for item in extra_items_in_deps))
    while True:
        ordered = set(item for item, dep in data.items() if len(dep) == 0)
        if not ordered:
            break
        yield ordered
        data = dict((item, (dep - ordered))
                for item, dep in data.items()
                if item not in ordered)
    if len(data) != 0:
        raise CyclicDependency(data)
 def toposort_flatten(data, sort=True):
    """
    Returns a single list of dependencies. For any set returned by
    toposort(), those items are sorted and appended to the result (just to
    make the results deterministic).
    :param data:
    :type data:
    :param sort:
    :type sort:
    :return: Single list of dependencies.
    :rtype: list
    """
    result = []
    for d in toposort(data):
        result.extend((sorted if sort else list)(d))
    return result
@@ -1,156 +0,0 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 """
 Various utilities functions
 """
 try:
    from collections.abc import MutableSet
 except ImportError:
    from collections import MutableSet
 from types import GeneratorType
 def find_all(string, sub, start=None, end=None, ignore_case=False, **kwargs):
    """
    Return all indices in string s where substring sub is
    found, such that sub is contained in the slice s[start:end].
    >>> list(find_all('The quick brown fox jumps over the lazy dog', 'fox'))
    [16]
    >>> list(find_all('The quick brown fox jumps over the lazy dog', 'mountain'))
    []
    >>> list(find_all('The quick brown fox jumps over the lazy dog', 'The'))
    [0]
    >>> list(find_all(
    ... 'Carved symbols in a mountain hollow on the bank of an inlet irritated an eccentric person',
    ... 'an'))
    [44, 51, 70]
    >>> list(find_all(
    ... 'Carved symbols in a mountain hollow on the bank of an inlet irritated an eccentric person',
    ... 'an',
    ... 50,
    ... 60))
    [51]
    :param string: the input string
    :type string: str
    :param sub: the substring
    :type sub: str
    :return: all indices in the input string
    :rtype: __generator[str]
    """
    #pylint: disable=unused-argument
    if ignore_case:
        sub = sub.lower()
        string = string.lower()
    while True:
        start = string.find(sub, start, end)
        if start == -1:
            return
        yield start
        start += len(sub)
 def get_first_defined(data, keys, default_value=None):
    """
    Get the first defined key in data.
    :param data:
    :type data:
    :param keys:
    :type keys:
    :param default_value:
    :type default_value:
    :return:
    :rtype:
    """
    for key in keys:
        if key in data:
            return data[key]
    return default_value
 def is_iterable(obj):
    """
    Are we being asked to look up a list of things, instead of a single thing?
    We check for the `__iter__` attribute so that this can cover types that
    don't have to be known by this module, such as NumPy arrays.
    Strings, however, should be considered as atomic values to look up, not
    iterables.
    We don't need to check for the Python 2 `unicode` type, because it doesn't
    have an `__iter__` attribute anyway.
    """
    # pylint: disable=consider-using-ternary
    return hasattr(obj, '__iter__') and not isinstance(obj, str) or isinstance(obj, GeneratorType)
 def extend_safe(target, source):
    """
    Extends source list to target list only if elements doesn't exists in target list.
    :param target:
    :type target: list
    :param source:
    :type source: list
    """
    for elt in source:
        if elt not in target:
            target.append(elt)
 class _Ref(object):
    """
    Reference for IdentitySet
    """
    def __init__(self, value):
        self.value = value
    def __eq__(self, other):
        return self.value is other.value
    def __hash__(self):
        return id(self.value)
 class IdentitySet(MutableSet):  # pragma: no cover
    """
    Set based on identity
    """
    def __init__(self, items=None):  # pylint: disable=super-init-not-called
        if items is None:
            items = []
        self.refs = set(map(_Ref, items))
    def __contains__(self, elem):
        return _Ref(elem) in self.refs
    def __iter__(self):
        return (ref.value for ref in self.refs)
    def __len__(self):
        return len(self.refs)
    def add(self, value):
        self.refs.add(_Ref(value))
    def discard(self, value):
        self.refs.discard(_Ref(value))
    def update(self, iterable):
        """
        Update set with iterable
        :param iterable:
        :type iterable:
        :return:
        :rtype:
        """
        for elem in iterable:
            self.add(elem)
    def __repr__(self):  # pragma: no cover
        return "%s(%s)" % (type(self).__name__, list(self))
@@ -1,81 +0,0 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 """
 Validator functions to use in patterns.
 All those function have last argument as match, so it's possible to use functools.partial to bind previous arguments.
 """
 def chars_before(chars, match):
    """
    Validate the match if left character is in a given sequence.
    :param chars:
    :type chars:
    :param match:
    :type match:
    :return:
    :rtype:
    """
    if match.start <= 0:
        return True
    return match.input_string[match.start - 1] in chars
 def chars_after(chars, match):
    """
    Validate the match if right character is in a given sequence.
    :param chars:
    :type chars:
    :param match:
    :type match:
    :return:
    :rtype:
    """
    if match.end >= len(match.input_string):
        return True
    return match.input_string[match.end] in chars
 def chars_surround(chars, match):
    """
    Validate the match if surrounding characters are in a given sequence.
    :param chars:
    :type chars:
    :param match:
    :type match:
    :return:
    :rtype:
    """
    return chars_before(chars, match) and chars_after(chars, match)
 def validators(*chained_validators):
    """
    Creates a validator chain from several validator functions.
    :param chained_validators:
    :type chained_validators:
    :return:
    :rtype:
    """
    def validator_chain(match):  # pylint:disable=missing-docstring
        for chained_validator in chained_validators:
            if not chained_validator(match):
                return False
        return True
    return validator_chain
 def allways_true(match):  # pylint:disable=unused-argument
    """
    A validator which is allways true
    :param match:
    :return:
    """
    return True