rimesso guessit con le modifiche per gli import
This commit is contained in:
@@ -7,6 +7,8 @@ import os
|
|||||||
import re
|
import re
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
|
from lib.guessit import guessit
|
||||||
|
|
||||||
PY3 = False
|
PY3 = False
|
||||||
if sys.version_info[0] >= 3: PY3 = True; unicode = str; unichr = chr; long = int
|
if sys.version_info[0] >= 3: PY3 = True; unicode = str; unichr = chr; long = int
|
||||||
if PY3:
|
if PY3:
|
||||||
@@ -271,14 +273,13 @@ def scrapeBlock(item, args, block, patron, headers, action, pagination, debug, t
|
|||||||
longtitle = title + (s if title and title2 else '') + title2 + '\n'
|
longtitle = title + (s if title and title2 else '') + title2 + '\n'
|
||||||
|
|
||||||
if sceneTitle:
|
if sceneTitle:
|
||||||
import lib.PTN.parse as parse
|
parsedTitle = guessit(title)
|
||||||
parsedTitle = parse(title)
|
|
||||||
title = longtitle = parsedTitle.get('title', '')
|
title = longtitle = parsedTitle.get('title', '')
|
||||||
log('TITOLO',title)
|
log('TITOLO',title)
|
||||||
if parsedTitle.get('quality'):
|
if parsedTitle.get('source'):
|
||||||
quality = str(parsedTitle.get('quality'))
|
quality = str(parsedTitle.get('source'))
|
||||||
if parsedTitle.get('resolution'):
|
if parsedTitle.get('screen_size'):
|
||||||
quality += ' ' + str(parsedTitle.get('resolution', ''))
|
quality += ' ' + str(parsedTitle.get('screen_size', ''))
|
||||||
if not scraped['year']:
|
if not scraped['year']:
|
||||||
infolabels['year'] = parsedTitle.get('year', '')
|
infolabels['year'] = parsedTitle.get('year', '')
|
||||||
if parsedTitle.get('episode') and parsedTitle.get('season'):
|
if parsedTitle.get('episode') and parsedTitle.get('season'):
|
||||||
@@ -297,8 +298,8 @@ def scrapeBlock(item, args, block, patron, headers, action, pagination, debug, t
|
|||||||
longtitle += s + config.get_localized_string(30140) + " " +str(parsedTitle.get('season')[0]) + '-' + str(parsedTitle.get('season')[-1])
|
longtitle += s + config.get_localized_string(30140) + " " +str(parsedTitle.get('season')[0]) + '-' + str(parsedTitle.get('season')[-1])
|
||||||
elif parsedTitle.get('season'):
|
elif parsedTitle.get('season'):
|
||||||
longtitle += s + config.get_localized_string(60027) % str(parsedTitle.get('season'))
|
longtitle += s + config.get_localized_string(60027) % str(parsedTitle.get('season'))
|
||||||
if parsedTitle.get('episodeName'):
|
if parsedTitle.get('episode_title'):
|
||||||
longtitle += s + parsedTitle.get('episodeName')
|
longtitle += s + parsedTitle.get('episode_title')
|
||||||
|
|
||||||
longtitle = typo(longtitle, 'bold')
|
longtitle = typo(longtitle, 'bold')
|
||||||
lang1, longtitle = scrapeLang(scraped, lang, longtitle)
|
lang1, longtitle = scrapeLang(scraped, lang, longtitle)
|
||||||
@@ -871,7 +872,7 @@ def match(item_url_string, **args):
|
|||||||
string = args.get('string', False)
|
string = args.get('string', False)
|
||||||
|
|
||||||
# remove scrape arguments
|
# remove scrape arguments
|
||||||
args = dict([(key, val) for key, val in args.items() if key not in ['patron', 'patronBlock', 'patronBlocks', 'debug', 'debugBlock', 'string']])
|
args = dict([(key, val) for key, val in args.items() if key not in ['patron', 'patronBlock', 'patronBlocks', 'debug', 'debugBlock', 'string']])
|
||||||
|
|
||||||
# check type of item_url_string
|
# check type of item_url_string
|
||||||
if string:
|
if string:
|
||||||
|
|||||||
25
lib/babelfish/__init__.py
Executable file
25
lib/babelfish/__init__.py
Executable file
@@ -0,0 +1,25 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
#
|
||||||
|
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
|
||||||
|
# Use of this source code is governed by the 3-clause BSD license
|
||||||
|
# that can be found in the LICENSE file.
|
||||||
|
#
|
||||||
|
__title__ = 'babelfish'
|
||||||
|
__version__ = '0.5.5-dev'
|
||||||
|
__author__ = 'Antoine Bertin'
|
||||||
|
__license__ = 'BSD'
|
||||||
|
__copyright__ = 'Copyright 2015 the BabelFish authors'
|
||||||
|
|
||||||
|
import sys
|
||||||
|
|
||||||
|
if sys.version_info[0] >= 3:
|
||||||
|
basestr = str
|
||||||
|
else:
|
||||||
|
basestr = basestring
|
||||||
|
|
||||||
|
from .converters import (LanguageConverter, LanguageReverseConverter, LanguageEquivalenceConverter, CountryConverter,
|
||||||
|
CountryReverseConverter)
|
||||||
|
from .country import country_converters, COUNTRIES, COUNTRY_MATRIX, Country
|
||||||
|
from .exceptions import Error, LanguageConvertError, LanguageReverseError, CountryConvertError, CountryReverseError
|
||||||
|
from .language import language_converters, LANGUAGES, LANGUAGE_MATRIX, Language
|
||||||
|
from .script import SCRIPTS, SCRIPT_MATRIX, Script
|
||||||
289
lib/babelfish/converters/__init__.py
Executable file
289
lib/babelfish/converters/__init__.py
Executable file
@@ -0,0 +1,289 @@
|
|||||||
|
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
|
||||||
|
# Use of this source code is governed by the 3-clause BSD license
|
||||||
|
# that can be found in the LICENSE file.
|
||||||
|
#
|
||||||
|
import collections
|
||||||
|
import functools
|
||||||
|
from importlib import import_module
|
||||||
|
|
||||||
|
# from pkg_resources import iter_entry_points, EntryPoint
|
||||||
|
from ..exceptions import LanguageConvertError, LanguageReverseError
|
||||||
|
|
||||||
|
|
||||||
|
# from https://github.com/kennethreitz/requests/blob/master/requests/structures.py
|
||||||
|
class CaseInsensitiveDict(collections.MutableMapping):
|
||||||
|
"""A case-insensitive ``dict``-like object.
|
||||||
|
|
||||||
|
Implements all methods and operations of
|
||||||
|
``collections.MutableMapping`` as well as dict's ``copy``. Also
|
||||||
|
provides ``lower_items``.
|
||||||
|
|
||||||
|
All keys are expected to be strings. The structure remembers the
|
||||||
|
case of the last key to be set, and ``iter(instance)``,
|
||||||
|
``keys()``, ``items()``, ``iterkeys()``, and ``iteritems()``
|
||||||
|
will contain case-sensitive keys. However, querying and contains
|
||||||
|
testing is case insensitive:
|
||||||
|
|
||||||
|
cid = CaseInsensitiveDict()
|
||||||
|
cid['English'] = 'eng'
|
||||||
|
cid['ENGLISH'] == 'eng' # True
|
||||||
|
list(cid) == ['English'] # True
|
||||||
|
|
||||||
|
If the constructor, ``.update``, or equality comparison
|
||||||
|
operations are given keys that have equal ``.lower()``s, the
|
||||||
|
behavior is undefined.
|
||||||
|
|
||||||
|
"""
|
||||||
|
def __init__(self, data=None, **kwargs):
|
||||||
|
self._store = dict()
|
||||||
|
if data is None:
|
||||||
|
data = {}
|
||||||
|
self.update(data, **kwargs)
|
||||||
|
|
||||||
|
def __setitem__(self, key, value):
|
||||||
|
# Use the lowercased key for lookups, but store the actual
|
||||||
|
# key alongside the value.
|
||||||
|
self._store[key.lower()] = (key, value)
|
||||||
|
|
||||||
|
def __getitem__(self, key):
|
||||||
|
return self._store[key.lower()][1]
|
||||||
|
|
||||||
|
def __delitem__(self, key):
|
||||||
|
del self._store[key.lower()]
|
||||||
|
|
||||||
|
def __iter__(self):
|
||||||
|
return (casedkey for casedkey, mappedvalue in self._store.values())
|
||||||
|
|
||||||
|
def __len__(self):
|
||||||
|
return len(self._store)
|
||||||
|
|
||||||
|
def lower_items(self):
|
||||||
|
"""Like iteritems(), but with all lowercase keys."""
|
||||||
|
return (
|
||||||
|
(lowerkey, keyval[1])
|
||||||
|
for (lowerkey, keyval)
|
||||||
|
in self._store.items()
|
||||||
|
)
|
||||||
|
|
||||||
|
def __eq__(self, other):
|
||||||
|
if isinstance(other, collections.Mapping):
|
||||||
|
other = CaseInsensitiveDict(other)
|
||||||
|
else:
|
||||||
|
return NotImplemented
|
||||||
|
# Compare insensitively
|
||||||
|
return dict(self.lower_items()) == dict(other.lower_items())
|
||||||
|
|
||||||
|
# Copy is required
|
||||||
|
def copy(self):
|
||||||
|
return CaseInsensitiveDict(self._store.values())
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return '%s(%r)' % (self.__class__.__name__, dict(self.items()))
|
||||||
|
|
||||||
|
|
||||||
|
class LanguageConverter(object):
|
||||||
|
"""A :class:`LanguageConverter` supports converting an alpha3 language code with an
|
||||||
|
alpha2 country code and a script code into a custom code
|
||||||
|
|
||||||
|
.. attribute:: codes
|
||||||
|
|
||||||
|
Set of possible custom codes
|
||||||
|
|
||||||
|
"""
|
||||||
|
def convert(self, alpha3, country=None, script=None):
|
||||||
|
"""Convert an alpha3 language code with an alpha2 country code and a script code
|
||||||
|
into a custom code
|
||||||
|
|
||||||
|
:param string alpha3: ISO-639-3 language code
|
||||||
|
:param country: ISO-3166 country code, if any
|
||||||
|
:type country: string or None
|
||||||
|
:param script: ISO-15924 script code, if any
|
||||||
|
:type script: string or None
|
||||||
|
:return: the corresponding custom code
|
||||||
|
:rtype: string
|
||||||
|
:raise: :class:`~babelfish.exceptions.LanguageConvertError`
|
||||||
|
|
||||||
|
"""
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
|
||||||
|
class LanguageReverseConverter(LanguageConverter):
|
||||||
|
"""A :class:`LanguageConverter` able to reverse a custom code into a alpha3
|
||||||
|
ISO-639-3 language code, alpha2 ISO-3166-1 country code and ISO-15924 script code
|
||||||
|
|
||||||
|
"""
|
||||||
|
def reverse(self, code):
|
||||||
|
"""Reverse a custom code into alpha3, country and script code
|
||||||
|
|
||||||
|
:param string code: custom code to reverse
|
||||||
|
:return: the corresponding alpha3 ISO-639-3 language code, alpha2 ISO-3166-1 country code and ISO-15924 script code
|
||||||
|
:rtype: tuple
|
||||||
|
:raise: :class:`~babelfish.exceptions.LanguageReverseError`
|
||||||
|
|
||||||
|
"""
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
|
||||||
|
class LanguageEquivalenceConverter(LanguageReverseConverter):
|
||||||
|
"""A :class:`LanguageEquivalenceConverter` is a utility class that allows you to easily define a
|
||||||
|
:class:`LanguageReverseConverter` by only specifying the dict from alpha3 to their corresponding symbols.
|
||||||
|
|
||||||
|
You must specify the dict of equivalence as a class variable named SYMBOLS.
|
||||||
|
|
||||||
|
If you also set the class variable CASE_SENSITIVE to ``True`` then the reverse conversion function will be
|
||||||
|
case-sensitive (it is case-insensitive by default).
|
||||||
|
|
||||||
|
Example::
|
||||||
|
|
||||||
|
class MyCodeConverter(babelfish.LanguageEquivalenceConverter):
|
||||||
|
CASE_SENSITIVE = True
|
||||||
|
SYMBOLS = {'fra': 'mycode1', 'eng': 'mycode2'}
|
||||||
|
|
||||||
|
"""
|
||||||
|
CASE_SENSITIVE = False
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.codes = set()
|
||||||
|
self.to_symbol = {}
|
||||||
|
if self.CASE_SENSITIVE:
|
||||||
|
self.from_symbol = {}
|
||||||
|
else:
|
||||||
|
self.from_symbol = CaseInsensitiveDict()
|
||||||
|
|
||||||
|
for alpha3, symbol in self.SYMBOLS.items():
|
||||||
|
self.to_symbol[alpha3] = symbol
|
||||||
|
self.from_symbol[symbol] = (alpha3, None, None)
|
||||||
|
self.codes.add(symbol)
|
||||||
|
|
||||||
|
def convert(self, alpha3, country=None, script=None):
|
||||||
|
try:
|
||||||
|
return self.to_symbol[alpha3]
|
||||||
|
except KeyError:
|
||||||
|
raise LanguageConvertError(alpha3, country, script)
|
||||||
|
|
||||||
|
def reverse(self, code):
|
||||||
|
try:
|
||||||
|
return self.from_symbol[code]
|
||||||
|
except KeyError:
|
||||||
|
raise LanguageReverseError(code)
|
||||||
|
|
||||||
|
|
||||||
|
class CountryConverter(object):
|
||||||
|
"""A :class:`CountryConverter` supports converting an alpha2 country code
|
||||||
|
into a custom code
|
||||||
|
|
||||||
|
.. attribute:: codes
|
||||||
|
|
||||||
|
Set of possible custom codes
|
||||||
|
|
||||||
|
"""
|
||||||
|
def convert(self, alpha2):
|
||||||
|
"""Convert an alpha2 country code into a custom code
|
||||||
|
|
||||||
|
:param string alpha2: ISO-3166-1 language code
|
||||||
|
:return: the corresponding custom code
|
||||||
|
:rtype: string
|
||||||
|
:raise: :class:`~babelfish.exceptions.CountryConvertError`
|
||||||
|
|
||||||
|
"""
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
|
||||||
|
class CountryReverseConverter(CountryConverter):
|
||||||
|
"""A :class:`CountryConverter` able to reverse a custom code into a alpha2
|
||||||
|
ISO-3166-1 country code
|
||||||
|
|
||||||
|
"""
|
||||||
|
def reverse(self, code):
|
||||||
|
"""Reverse a custom code into alpha2 code
|
||||||
|
|
||||||
|
:param string code: custom code to reverse
|
||||||
|
:return: the corresponding alpha2 ISO-3166-1 country code
|
||||||
|
:rtype: string
|
||||||
|
:raise: :class:`~babelfish.exceptions.CountryReverseError`
|
||||||
|
|
||||||
|
"""
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
|
||||||
|
class ConverterManager(object):
|
||||||
|
"""Manager for babelfish converters behaving like a dict with lazy loading
|
||||||
|
|
||||||
|
Loading is done in this order:
|
||||||
|
|
||||||
|
* Entry point converters
|
||||||
|
* Registered converters
|
||||||
|
* Internal converters
|
||||||
|
|
||||||
|
.. attribute:: entry_point
|
||||||
|
|
||||||
|
The entry point where to look for converters
|
||||||
|
|
||||||
|
.. attribute:: internal_converters
|
||||||
|
|
||||||
|
Internal converters with entry point syntax
|
||||||
|
|
||||||
|
"""
|
||||||
|
entry_point = ''
|
||||||
|
internal_converters = []
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
#: Registered converters with entry point syntax
|
||||||
|
self.registered_converters = []
|
||||||
|
|
||||||
|
#: Loaded converters
|
||||||
|
self.converters = {}
|
||||||
|
|
||||||
|
def __getitem__(self, name):
|
||||||
|
"""Get a converter, lazy loading it if necessary"""
|
||||||
|
if name in self.converters:
|
||||||
|
return self.converters[name]
|
||||||
|
# for ep in iter_entry_points(self.entry_point):
|
||||||
|
# if ep.name == name:
|
||||||
|
# self.converters[ep.name] = ep.load()()
|
||||||
|
# return self.converters[ep.name]
|
||||||
|
def parse(str):
|
||||||
|
import re
|
||||||
|
match = re.match('(?P<name>\w+) = (?P<module>[a-z0-9.]+):(?P<class>\w+)', str)
|
||||||
|
print match.groupdict()
|
||||||
|
return match.groupdict()
|
||||||
|
for ep in (parse(c) for c in self.registered_converters + self.internal_converters):
|
||||||
|
if ep.get('name') == name:
|
||||||
|
cl = getattr(import_module(ep.get('module')), ep.get('class'))
|
||||||
|
self.converters[ep.get('name')] = cl()
|
||||||
|
return self.converters[ep.get('name')]
|
||||||
|
raise KeyError(name)
|
||||||
|
|
||||||
|
def __setitem__(self, name, converter):
|
||||||
|
"""Load a converter"""
|
||||||
|
self.converters[name] = converter
|
||||||
|
|
||||||
|
def __delitem__(self, name):
|
||||||
|
"""Unload a converter"""
|
||||||
|
del self.converters[name]
|
||||||
|
|
||||||
|
def __iter__(self):
|
||||||
|
"""Iterator over loaded converters"""
|
||||||
|
return iter(self.converters)
|
||||||
|
|
||||||
|
def register(self, entry_point):
|
||||||
|
"""Register a converter
|
||||||
|
|
||||||
|
:param string entry_point: converter to register (entry point syntax)
|
||||||
|
:raise: ValueError if already registered
|
||||||
|
|
||||||
|
"""
|
||||||
|
if entry_point in self.registered_converters:
|
||||||
|
raise ValueError('Already registered')
|
||||||
|
self.registered_converters.insert(0, entry_point)
|
||||||
|
|
||||||
|
def unregister(self, entry_point):
|
||||||
|
"""Unregister a converter
|
||||||
|
|
||||||
|
:param string entry_point: converter to unregister (entry point syntax)
|
||||||
|
|
||||||
|
"""
|
||||||
|
self.registered_converters.remove(entry_point)
|
||||||
|
|
||||||
|
def __contains__(self, name):
|
||||||
|
return name in self.converters
|
||||||
17
lib/babelfish/converters/alpha2.py
Executable file
17
lib/babelfish/converters/alpha2.py
Executable file
@@ -0,0 +1,17 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
#
|
||||||
|
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
|
||||||
|
# Use of this source code is governed by the 3-clause BSD license
|
||||||
|
# that can be found in the LICENSE file.
|
||||||
|
#
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
from . import LanguageEquivalenceConverter
|
||||||
|
from ..language import LANGUAGE_MATRIX
|
||||||
|
|
||||||
|
|
||||||
|
class Alpha2Converter(LanguageEquivalenceConverter):
|
||||||
|
CASE_SENSITIVE = True
|
||||||
|
SYMBOLS = {}
|
||||||
|
for iso_language in LANGUAGE_MATRIX:
|
||||||
|
if iso_language.alpha2:
|
||||||
|
SYMBOLS[iso_language.alpha3] = iso_language.alpha2
|
||||||
17
lib/babelfish/converters/alpha3b.py
Executable file
17
lib/babelfish/converters/alpha3b.py
Executable file
@@ -0,0 +1,17 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
#
|
||||||
|
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
|
||||||
|
# Use of this source code is governed by the 3-clause BSD license
|
||||||
|
# that can be found in the LICENSE file.
|
||||||
|
#
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
from . import LanguageEquivalenceConverter
|
||||||
|
from ..language import LANGUAGE_MATRIX
|
||||||
|
|
||||||
|
|
||||||
|
class Alpha3BConverter(LanguageEquivalenceConverter):
|
||||||
|
CASE_SENSITIVE = True
|
||||||
|
SYMBOLS = {}
|
||||||
|
for iso_language in LANGUAGE_MATRIX:
|
||||||
|
if iso_language.alpha3b:
|
||||||
|
SYMBOLS[iso_language.alpha3] = iso_language.alpha3b
|
||||||
17
lib/babelfish/converters/alpha3t.py
Executable file
17
lib/babelfish/converters/alpha3t.py
Executable file
@@ -0,0 +1,17 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
#
|
||||||
|
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
|
||||||
|
# Use of this source code is governed by the 3-clause BSD license
|
||||||
|
# that can be found in the LICENSE file.
|
||||||
|
#
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
from . import LanguageEquivalenceConverter
|
||||||
|
from ..language import LANGUAGE_MATRIX
|
||||||
|
|
||||||
|
|
||||||
|
class Alpha3TConverter(LanguageEquivalenceConverter):
|
||||||
|
CASE_SENSITIVE = True
|
||||||
|
SYMBOLS = {}
|
||||||
|
for iso_language in LANGUAGE_MATRIX:
|
||||||
|
if iso_language.alpha3t:
|
||||||
|
SYMBOLS[iso_language.alpha3] = iso_language.alpha3t
|
||||||
31
lib/babelfish/converters/countryname.py
Executable file
31
lib/babelfish/converters/countryname.py
Executable file
@@ -0,0 +1,31 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
#
|
||||||
|
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
|
||||||
|
# Use of this source code is governed by the 3-clause BSD license
|
||||||
|
# that can be found in the LICENSE file.
|
||||||
|
#
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
from . import CountryReverseConverter, CaseInsensitiveDict
|
||||||
|
from ..country import COUNTRY_MATRIX
|
||||||
|
from ..exceptions import CountryConvertError, CountryReverseError
|
||||||
|
|
||||||
|
|
||||||
|
class CountryNameConverter(CountryReverseConverter):
|
||||||
|
def __init__(self):
|
||||||
|
self.codes = set()
|
||||||
|
self.to_name = {}
|
||||||
|
self.from_name = CaseInsensitiveDict()
|
||||||
|
for country in COUNTRY_MATRIX:
|
||||||
|
self.codes.add(country.name)
|
||||||
|
self.to_name[country.alpha2] = country.name
|
||||||
|
self.from_name[country.name] = country.alpha2
|
||||||
|
|
||||||
|
def convert(self, alpha2):
|
||||||
|
if alpha2 not in self.to_name:
|
||||||
|
raise CountryConvertError(alpha2)
|
||||||
|
return self.to_name[alpha2]
|
||||||
|
|
||||||
|
def reverse(self, name):
|
||||||
|
if name not in self.from_name:
|
||||||
|
raise CountryReverseError(name)
|
||||||
|
return self.from_name[name]
|
||||||
17
lib/babelfish/converters/name.py
Executable file
17
lib/babelfish/converters/name.py
Executable file
@@ -0,0 +1,17 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
#
|
||||||
|
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
|
||||||
|
# Use of this source code is governed by the 3-clause BSD license
|
||||||
|
# that can be found in the LICENSE file.
|
||||||
|
#
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
from . import LanguageEquivalenceConverter
|
||||||
|
from ..language import LANGUAGE_MATRIX
|
||||||
|
|
||||||
|
|
||||||
|
class NameConverter(LanguageEquivalenceConverter):
|
||||||
|
CASE_SENSITIVE = False
|
||||||
|
SYMBOLS = {}
|
||||||
|
for iso_language in LANGUAGE_MATRIX:
|
||||||
|
if iso_language.name:
|
||||||
|
SYMBOLS[iso_language.alpha3] = iso_language.name
|
||||||
36
lib/babelfish/converters/opensubtitles.py
Executable file
36
lib/babelfish/converters/opensubtitles.py
Executable file
@@ -0,0 +1,36 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
#
|
||||||
|
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
|
||||||
|
# Use of this source code is governed by the 3-clause BSD license
|
||||||
|
# that can be found in the LICENSE file.
|
||||||
|
#
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
from . import LanguageReverseConverter, CaseInsensitiveDict
|
||||||
|
from ..exceptions import LanguageReverseError
|
||||||
|
from ..language import language_converters
|
||||||
|
|
||||||
|
|
||||||
|
class OpenSubtitlesConverter(LanguageReverseConverter):
|
||||||
|
def __init__(self):
|
||||||
|
self.alpha3b_converter = language_converters['alpha3b']
|
||||||
|
self.alpha2_converter = language_converters['alpha2']
|
||||||
|
self.to_opensubtitles = {('por', 'BR'): 'pob', ('gre', None): 'ell', ('srp', None): 'scc', ('srp', 'ME'): 'mne'}
|
||||||
|
self.from_opensubtitles = CaseInsensitiveDict({'pob': ('por', 'BR'), 'pb': ('por', 'BR'), 'ell': ('ell', None),
|
||||||
|
'scc': ('srp', None), 'mne': ('srp', 'ME')})
|
||||||
|
self.codes = (self.alpha2_converter.codes | self.alpha3b_converter.codes | set(['pob', 'pb', 'scc', 'mne']))
|
||||||
|
|
||||||
|
def convert(self, alpha3, country=None, script=None):
|
||||||
|
alpha3b = self.alpha3b_converter.convert(alpha3, country, script)
|
||||||
|
if (alpha3b, country) in self.to_opensubtitles:
|
||||||
|
return self.to_opensubtitles[(alpha3b, country)]
|
||||||
|
return alpha3b
|
||||||
|
|
||||||
|
def reverse(self, opensubtitles):
|
||||||
|
if opensubtitles in self.from_opensubtitles:
|
||||||
|
return self.from_opensubtitles[opensubtitles]
|
||||||
|
for conv in [self.alpha3b_converter, self.alpha2_converter]:
|
||||||
|
try:
|
||||||
|
return conv.reverse(opensubtitles)
|
||||||
|
except LanguageReverseError:
|
||||||
|
pass
|
||||||
|
raise LanguageReverseError(opensubtitles)
|
||||||
23
lib/babelfish/converters/scope.py
Executable file
23
lib/babelfish/converters/scope.py
Executable file
@@ -0,0 +1,23 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
#
|
||||||
|
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
|
||||||
|
# Use of this source code is governed by the 3-clause BSD license
|
||||||
|
# that can be found in the LICENSE file.
|
||||||
|
#
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
from . import LanguageConverter
|
||||||
|
from ..exceptions import LanguageConvertError
|
||||||
|
from ..language import LANGUAGE_MATRIX
|
||||||
|
|
||||||
|
|
||||||
|
class ScopeConverter(LanguageConverter):
|
||||||
|
FULLNAME = {'I': 'individual', 'M': 'macrolanguage', 'S': 'special'}
|
||||||
|
SYMBOLS = {}
|
||||||
|
for iso_language in LANGUAGE_MATRIX:
|
||||||
|
SYMBOLS[iso_language.alpha3] = iso_language.scope
|
||||||
|
codes = set(SYMBOLS.values())
|
||||||
|
|
||||||
|
def convert(self, alpha3, country=None, script=None):
|
||||||
|
if self.SYMBOLS[alpha3] in self.FULLNAME:
|
||||||
|
return self.FULLNAME[self.SYMBOLS[alpha3]]
|
||||||
|
raise LanguageConvertError(alpha3, country, script)
|
||||||
23
lib/babelfish/converters/type.py
Executable file
23
lib/babelfish/converters/type.py
Executable file
@@ -0,0 +1,23 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
#
|
||||||
|
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
|
||||||
|
# Use of this source code is governed by the 3-clause BSD license
|
||||||
|
# that can be found in the LICENSE file.
|
||||||
|
#
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
from . import LanguageConverter
|
||||||
|
from ..exceptions import LanguageConvertError
|
||||||
|
from ..language import LANGUAGE_MATRIX
|
||||||
|
|
||||||
|
|
||||||
|
class LanguageTypeConverter(LanguageConverter):
|
||||||
|
FULLNAME = {'A': 'ancient', 'C': 'constructed', 'E': 'extinct', 'H': 'historical', 'L': 'living', 'S': 'special'}
|
||||||
|
SYMBOLS = {}
|
||||||
|
for iso_language in LANGUAGE_MATRIX:
|
||||||
|
SYMBOLS[iso_language.alpha3] = iso_language.type
|
||||||
|
codes = set(SYMBOLS.values())
|
||||||
|
|
||||||
|
def convert(self, alpha3, country=None, script=None):
|
||||||
|
if self.SYMBOLS[alpha3] in self.FULLNAME:
|
||||||
|
return self.FULLNAME[self.SYMBOLS[alpha3]]
|
||||||
|
raise LanguageConvertError(alpha3, country, script)
|
||||||
108
lib/babelfish/country.py
Executable file
108
lib/babelfish/country.py
Executable file
@@ -0,0 +1,108 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
#
|
||||||
|
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
|
||||||
|
# Use of this source code is governed by the 3-clause BSD license
|
||||||
|
# that can be found in the LICENSE file.
|
||||||
|
#
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
from collections import namedtuple
|
||||||
|
from functools import partial
|
||||||
|
# from pkg_resources import resource_stream # @UnresolvedImport
|
||||||
|
import os
|
||||||
|
from .converters import ConverterManager
|
||||||
|
from . import basestr
|
||||||
|
|
||||||
|
|
||||||
|
COUNTRIES = {}
|
||||||
|
COUNTRY_MATRIX = []
|
||||||
|
|
||||||
|
#: The namedtuple used in the :data:`COUNTRY_MATRIX`
|
||||||
|
IsoCountry = namedtuple('IsoCountry', ['name', 'alpha2'])
|
||||||
|
|
||||||
|
f = open(os.path.join(os.path.dirname(__file__), 'data/iso-3166-1.txt'))
|
||||||
|
f.readline()
|
||||||
|
for l in f:
|
||||||
|
iso_country = IsoCountry(*l.decode('utf-8').strip().split(';'))
|
||||||
|
COUNTRIES[iso_country.alpha2] = iso_country.name
|
||||||
|
COUNTRY_MATRIX.append(iso_country)
|
||||||
|
f.close()
|
||||||
|
|
||||||
|
|
||||||
|
class CountryConverterManager(ConverterManager):
|
||||||
|
""":class:`~babelfish.converters.ConverterManager` for country converters"""
|
||||||
|
entry_point = 'babelfish.country_converters'
|
||||||
|
internal_converters = ['name = babelfish.converters.countryname:CountryNameConverter']
|
||||||
|
|
||||||
|
country_converters = CountryConverterManager()
|
||||||
|
|
||||||
|
|
||||||
|
class CountryMeta(type):
|
||||||
|
"""The :class:`Country` metaclass
|
||||||
|
|
||||||
|
Dynamically redirect :meth:`Country.frommycode` to :meth:`Country.fromcode` with the ``mycode`` `converter`
|
||||||
|
|
||||||
|
"""
|
||||||
|
def __getattr__(cls, name):
|
||||||
|
if name.startswith('from'):
|
||||||
|
return partial(cls.fromcode, converter=name[4:])
|
||||||
|
return type.__getattribute__(cls, name)
|
||||||
|
|
||||||
|
|
||||||
|
class Country(CountryMeta(str('CountryBase'), (object,), {})):
|
||||||
|
"""A country on Earth
|
||||||
|
|
||||||
|
A country is represented by a 2-letter code from the ISO-3166 standard
|
||||||
|
|
||||||
|
:param string country: 2-letter ISO-3166 country code
|
||||||
|
|
||||||
|
"""
|
||||||
|
def __init__(self, country):
|
||||||
|
if country not in COUNTRIES:
|
||||||
|
raise ValueError('%r is not a valid country' % country)
|
||||||
|
|
||||||
|
#: ISO-3166 2-letter country code
|
||||||
|
self.alpha2 = country
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def fromcode(cls, code, converter):
|
||||||
|
"""Create a :class:`Country` by its `code` using `converter` to
|
||||||
|
:meth:`~babelfish.converters.CountryReverseConverter.reverse` it
|
||||||
|
|
||||||
|
:param string code: the code to reverse
|
||||||
|
:param string converter: name of the :class:`~babelfish.converters.CountryReverseConverter` to use
|
||||||
|
:return: the corresponding :class:`Country` instance
|
||||||
|
:rtype: :class:`Country`
|
||||||
|
|
||||||
|
"""
|
||||||
|
return cls(country_converters[converter].reverse(code))
|
||||||
|
|
||||||
|
def __getstate__(self):
|
||||||
|
return self.alpha2
|
||||||
|
|
||||||
|
def __setstate__(self, state):
|
||||||
|
self.alpha2 = state
|
||||||
|
|
||||||
|
def __getattr__(self, name):
|
||||||
|
try:
|
||||||
|
return country_converters[name].convert(self.alpha2)
|
||||||
|
except KeyError:
|
||||||
|
raise AttributeError(name)
|
||||||
|
|
||||||
|
def __hash__(self):
|
||||||
|
return hash(self.alpha2)
|
||||||
|
|
||||||
|
def __eq__(self, other):
|
||||||
|
if isinstance(other, basestr):
|
||||||
|
return str(self) == other
|
||||||
|
if not isinstance(other, Country):
|
||||||
|
return False
|
||||||
|
return self.alpha2 == other.alpha2
|
||||||
|
|
||||||
|
def __ne__(self, other):
|
||||||
|
return not self == other
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return '<Country [%s]>' % self
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return self.alpha2
|
||||||
250
lib/babelfish/data/iso-3166-1.txt
Executable file
250
lib/babelfish/data/iso-3166-1.txt
Executable file
@@ -0,0 +1,250 @@
|
|||||||
|
Country Name;ISO 3166-1-alpha-2 code
|
||||||
|
AFGHANISTAN;AF
|
||||||
|
ÅLAND ISLANDS;AX
|
||||||
|
ALBANIA;AL
|
||||||
|
ALGERIA;DZ
|
||||||
|
AMERICAN SAMOA;AS
|
||||||
|
ANDORRA;AD
|
||||||
|
ANGOLA;AO
|
||||||
|
ANGUILLA;AI
|
||||||
|
ANTARCTICA;AQ
|
||||||
|
ANTIGUA AND BARBUDA;AG
|
||||||
|
ARGENTINA;AR
|
||||||
|
ARMENIA;AM
|
||||||
|
ARUBA;AW
|
||||||
|
AUSTRALIA;AU
|
||||||
|
AUSTRIA;AT
|
||||||
|
AZERBAIJAN;AZ
|
||||||
|
BAHAMAS;BS
|
||||||
|
BAHRAIN;BH
|
||||||
|
BANGLADESH;BD
|
||||||
|
BARBADOS;BB
|
||||||
|
BELARUS;BY
|
||||||
|
BELGIUM;BE
|
||||||
|
BELIZE;BZ
|
||||||
|
BENIN;BJ
|
||||||
|
BERMUDA;BM
|
||||||
|
BHUTAN;BT
|
||||||
|
BOLIVIA, PLURINATIONAL STATE OF;BO
|
||||||
|
BONAIRE, SINT EUSTATIUS AND SABA;BQ
|
||||||
|
BOSNIA AND HERZEGOVINA;BA
|
||||||
|
BOTSWANA;BW
|
||||||
|
BOUVET ISLAND;BV
|
||||||
|
BRAZIL;BR
|
||||||
|
BRITISH INDIAN OCEAN TERRITORY;IO
|
||||||
|
BRUNEI DARUSSALAM;BN
|
||||||
|
BULGARIA;BG
|
||||||
|
BURKINA FASO;BF
|
||||||
|
BURUNDI;BI
|
||||||
|
CAMBODIA;KH
|
||||||
|
CAMEROON;CM
|
||||||
|
CANADA;CA
|
||||||
|
CAPE VERDE;CV
|
||||||
|
CAYMAN ISLANDS;KY
|
||||||
|
CENTRAL AFRICAN REPUBLIC;CF
|
||||||
|
CHAD;TD
|
||||||
|
CHILE;CL
|
||||||
|
CHINA;CN
|
||||||
|
CHRISTMAS ISLAND;CX
|
||||||
|
COCOS (KEELING) ISLANDS;CC
|
||||||
|
COLOMBIA;CO
|
||||||
|
COMOROS;KM
|
||||||
|
CONGO;CG
|
||||||
|
CONGO, THE DEMOCRATIC REPUBLIC OF THE;CD
|
||||||
|
COOK ISLANDS;CK
|
||||||
|
COSTA RICA;CR
|
||||||
|
CÔTE D'IVOIRE;CI
|
||||||
|
CROATIA;HR
|
||||||
|
CUBA;CU
|
||||||
|
CURAÇAO;CW
|
||||||
|
CYPRUS;CY
|
||||||
|
CZECH REPUBLIC;CZ
|
||||||
|
DENMARK;DK
|
||||||
|
DJIBOUTI;DJ
|
||||||
|
DOMINICA;DM
|
||||||
|
DOMINICAN REPUBLIC;DO
|
||||||
|
ECUADOR;EC
|
||||||
|
EGYPT;EG
|
||||||
|
EL SALVADOR;SV
|
||||||
|
EQUATORIAL GUINEA;GQ
|
||||||
|
ERITREA;ER
|
||||||
|
ESTONIA;EE
|
||||||
|
ETHIOPIA;ET
|
||||||
|
FALKLAND ISLANDS (MALVINAS);FK
|
||||||
|
FAROE ISLANDS;FO
|
||||||
|
FIJI;FJ
|
||||||
|
FINLAND;FI
|
||||||
|
FRANCE;FR
|
||||||
|
FRENCH GUIANA;GF
|
||||||
|
FRENCH POLYNESIA;PF
|
||||||
|
FRENCH SOUTHERN TERRITORIES;TF
|
||||||
|
GABON;GA
|
||||||
|
GAMBIA;GM
|
||||||
|
GEORGIA;GE
|
||||||
|
GERMANY;DE
|
||||||
|
GHANA;GH
|
||||||
|
GIBRALTAR;GI
|
||||||
|
GREECE;GR
|
||||||
|
GREENLAND;GL
|
||||||
|
GRENADA;GD
|
||||||
|
GUADELOUPE;GP
|
||||||
|
GUAM;GU
|
||||||
|
GUATEMALA;GT
|
||||||
|
GUERNSEY;GG
|
||||||
|
GUINEA;GN
|
||||||
|
GUINEA-BISSAU;GW
|
||||||
|
GUYANA;GY
|
||||||
|
HAITI;HT
|
||||||
|
HEARD ISLAND AND MCDONALD ISLANDS;HM
|
||||||
|
HOLY SEE (VATICAN CITY STATE);VA
|
||||||
|
HONDURAS;HN
|
||||||
|
HONG KONG;HK
|
||||||
|
HUNGARY;HU
|
||||||
|
ICELAND;IS
|
||||||
|
INDIA;IN
|
||||||
|
INDONESIA;ID
|
||||||
|
IRAN, ISLAMIC REPUBLIC OF;IR
|
||||||
|
IRAQ;IQ
|
||||||
|
IRELAND;IE
|
||||||
|
ISLE OF MAN;IM
|
||||||
|
ISRAEL;IL
|
||||||
|
ITALY;IT
|
||||||
|
JAMAICA;JM
|
||||||
|
JAPAN;JP
|
||||||
|
JERSEY;JE
|
||||||
|
JORDAN;JO
|
||||||
|
KAZAKHSTAN;KZ
|
||||||
|
KENYA;KE
|
||||||
|
KIRIBATI;KI
|
||||||
|
KOREA, DEMOCRATIC PEOPLE'S REPUBLIC OF;KP
|
||||||
|
KOREA, REPUBLIC OF;KR
|
||||||
|
KUWAIT;KW
|
||||||
|
KYRGYZSTAN;KG
|
||||||
|
LAO PEOPLE'S DEMOCRATIC REPUBLIC;LA
|
||||||
|
LATVIA;LV
|
||||||
|
LEBANON;LB
|
||||||
|
LESOTHO;LS
|
||||||
|
LIBERIA;LR
|
||||||
|
LIBYA;LY
|
||||||
|
LIECHTENSTEIN;LI
|
||||||
|
LITHUANIA;LT
|
||||||
|
LUXEMBOURG;LU
|
||||||
|
MACAO;MO
|
||||||
|
MACEDONIA, THE FORMER YUGOSLAV REPUBLIC OF;MK
|
||||||
|
MADAGASCAR;MG
|
||||||
|
MALAWI;MW
|
||||||
|
MALAYSIA;MY
|
||||||
|
MALDIVES;MV
|
||||||
|
MALI;ML
|
||||||
|
MALTA;MT
|
||||||
|
MARSHALL ISLANDS;MH
|
||||||
|
MARTINIQUE;MQ
|
||||||
|
MAURITANIA;MR
|
||||||
|
MAURITIUS;MU
|
||||||
|
MAYOTTE;YT
|
||||||
|
MEXICO;MX
|
||||||
|
MICRONESIA, FEDERATED STATES OF;FM
|
||||||
|
MOLDOVA, REPUBLIC OF;MD
|
||||||
|
MONACO;MC
|
||||||
|
MONGOLIA;MN
|
||||||
|
MONTENEGRO;ME
|
||||||
|
MONTSERRAT;MS
|
||||||
|
MOROCCO;MA
|
||||||
|
MOZAMBIQUE;MZ
|
||||||
|
MYANMAR;MM
|
||||||
|
NAMIBIA;NA
|
||||||
|
NAURU;NR
|
||||||
|
NEPAL;NP
|
||||||
|
NETHERLANDS;NL
|
||||||
|
NEW CALEDONIA;NC
|
||||||
|
NEW ZEALAND;NZ
|
||||||
|
NICARAGUA;NI
|
||||||
|
NIGER;NE
|
||||||
|
NIGERIA;NG
|
||||||
|
NIUE;NU
|
||||||
|
NORFOLK ISLAND;NF
|
||||||
|
NORTHERN MARIANA ISLANDS;MP
|
||||||
|
NORWAY;NO
|
||||||
|
OMAN;OM
|
||||||
|
PAKISTAN;PK
|
||||||
|
PALAU;PW
|
||||||
|
PALESTINE, STATE OF;PS
|
||||||
|
PANAMA;PA
|
||||||
|
PAPUA NEW GUINEA;PG
|
||||||
|
PARAGUAY;PY
|
||||||
|
PERU;PE
|
||||||
|
PHILIPPINES;PH
|
||||||
|
PITCAIRN;PN
|
||||||
|
POLAND;PL
|
||||||
|
PORTUGAL;PT
|
||||||
|
PUERTO RICO;PR
|
||||||
|
QATAR;QA
|
||||||
|
RÉUNION;RE
|
||||||
|
ROMANIA;RO
|
||||||
|
RUSSIAN FEDERATION;RU
|
||||||
|
RWANDA;RW
|
||||||
|
SAINT BARTHÉLEMY;BL
|
||||||
|
SAINT HELENA, ASCENSION AND TRISTAN DA CUNHA;SH
|
||||||
|
SAINT KITTS AND NEVIS;KN
|
||||||
|
SAINT LUCIA;LC
|
||||||
|
SAINT MARTIN (FRENCH PART);MF
|
||||||
|
SAINT PIERRE AND MIQUELON;PM
|
||||||
|
SAINT VINCENT AND THE GRENADINES;VC
|
||||||
|
SAMOA;WS
|
||||||
|
SAN MARINO;SM
|
||||||
|
SAO TOME AND PRINCIPE;ST
|
||||||
|
SAUDI ARABIA;SA
|
||||||
|
SENEGAL;SN
|
||||||
|
SERBIA;RS
|
||||||
|
SEYCHELLES;SC
|
||||||
|
SIERRA LEONE;SL
|
||||||
|
SINGAPORE;SG
|
||||||
|
SINT MAARTEN (DUTCH PART);SX
|
||||||
|
SLOVAKIA;SK
|
||||||
|
SLOVENIA;SI
|
||||||
|
SOLOMON ISLANDS;SB
|
||||||
|
SOMALIA;SO
|
||||||
|
SOUTH AFRICA;ZA
|
||||||
|
SOUTH GEORGIA AND THE SOUTH SANDWICH ISLANDS;GS
|
||||||
|
SOUTH SUDAN;SS
|
||||||
|
SPAIN;ES
|
||||||
|
SRI LANKA;LK
|
||||||
|
SUDAN;SD
|
||||||
|
SURINAME;SR
|
||||||
|
SVALBARD AND JAN MAYEN;SJ
|
||||||
|
SWAZILAND;SZ
|
||||||
|
SWEDEN;SE
|
||||||
|
SWITZERLAND;CH
|
||||||
|
SYRIAN ARAB REPUBLIC;SY
|
||||||
|
TAIWAN, PROVINCE OF CHINA;TW
|
||||||
|
TAJIKISTAN;TJ
|
||||||
|
TANZANIA, UNITED REPUBLIC OF;TZ
|
||||||
|
THAILAND;TH
|
||||||
|
TIMOR-LESTE;TL
|
||||||
|
TOGO;TG
|
||||||
|
TOKELAU;TK
|
||||||
|
TONGA;TO
|
||||||
|
TRINIDAD AND TOBAGO;TT
|
||||||
|
TUNISIA;TN
|
||||||
|
TURKEY;TR
|
||||||
|
TURKMENISTAN;TM
|
||||||
|
TURKS AND CAICOS ISLANDS;TC
|
||||||
|
TUVALU;TV
|
||||||
|
UGANDA;UG
|
||||||
|
UKRAINE;UA
|
||||||
|
UNITED ARAB EMIRATES;AE
|
||||||
|
UNITED KINGDOM;GB
|
||||||
|
UNITED STATES;US
|
||||||
|
UNITED STATES MINOR OUTLYING ISLANDS;UM
|
||||||
|
URUGUAY;UY
|
||||||
|
UZBEKISTAN;UZ
|
||||||
|
VANUATU;VU
|
||||||
|
VENEZUELA, BOLIVARIAN REPUBLIC OF;VE
|
||||||
|
VIET NAM;VN
|
||||||
|
VIRGIN ISLANDS, BRITISH;VG
|
||||||
|
VIRGIN ISLANDS, U.S.;VI
|
||||||
|
WALLIS AND FUTUNA;WF
|
||||||
|
WESTERN SAHARA;EH
|
||||||
|
YEMEN;YE
|
||||||
|
ZAMBIA;ZM
|
||||||
|
ZIMBABWE;ZW
|
||||||
7875
lib/babelfish/data/iso-639-3.tab
Executable file
7875
lib/babelfish/data/iso-639-3.tab
Executable file
File diff suppressed because it is too large
Load Diff
176
lib/babelfish/data/iso15924-utf8-20131012.txt
Executable file
176
lib/babelfish/data/iso15924-utf8-20131012.txt
Executable file
@@ -0,0 +1,176 @@
|
|||||||
|
#
|
||||||
|
# ISO 15924 - Codes for the representation of names of scripts
|
||||||
|
# Codes pour la représentation des noms d’écritures
|
||||||
|
# Format:
|
||||||
|
# Code;N°;English Name;Nom français;PVA;Date
|
||||||
|
#
|
||||||
|
|
||||||
|
Afak;439;Afaka;afaka;;2010-12-21
|
||||||
|
Aghb;239;Caucasian Albanian;aghbanien;;2012-10-16
|
||||||
|
Ahom;338;Ahom, Tai Ahom;âhom;;2012-11-01
|
||||||
|
Arab;160;Arabic;arabe;Arabic;2004-05-01
|
||||||
|
Armi;124;Imperial Aramaic;araméen impérial;Imperial_Aramaic;2009-06-01
|
||||||
|
Armn;230;Armenian;arménien;Armenian;2004-05-01
|
||||||
|
Avst;134;Avestan;avestique;Avestan;2009-06-01
|
||||||
|
Bali;360;Balinese;balinais;Balinese;2006-10-10
|
||||||
|
Bamu;435;Bamum;bamoum;Bamum;2009-06-01
|
||||||
|
Bass;259;Bassa Vah;bassa;;2010-03-26
|
||||||
|
Batk;365;Batak;batik;Batak;2010-07-23
|
||||||
|
Beng;325;Bengali;bengalî;Bengali;2004-05-01
|
||||||
|
Blis;550;Blissymbols;symboles Bliss;;2004-05-01
|
||||||
|
Bopo;285;Bopomofo;bopomofo;Bopomofo;2004-05-01
|
||||||
|
Brah;300;Brahmi;brahma;Brahmi;2010-07-23
|
||||||
|
Brai;570;Braille;braille;Braille;2004-05-01
|
||||||
|
Bugi;367;Buginese;bouguis;Buginese;2006-06-21
|
||||||
|
Buhd;372;Buhid;bouhide;Buhid;2004-05-01
|
||||||
|
Cakm;349;Chakma;chakma;Chakma;2012-02-06
|
||||||
|
Cans;440;Unified Canadian Aboriginal Syllabics;syllabaire autochtone canadien unifié;Canadian_Aboriginal;2004-05-29
|
||||||
|
Cari;201;Carian;carien;Carian;2007-07-02
|
||||||
|
Cham;358;Cham;cham (čam, tcham);Cham;2009-11-11
|
||||||
|
Cher;445;Cherokee;tchérokî;Cherokee;2004-05-01
|
||||||
|
Cirt;291;Cirth;cirth;;2004-05-01
|
||||||
|
Copt;204;Coptic;copte;Coptic;2006-06-21
|
||||||
|
Cprt;403;Cypriot;syllabaire chypriote;Cypriot;2004-05-01
|
||||||
|
Cyrl;220;Cyrillic;cyrillique;Cyrillic;2004-05-01
|
||||||
|
Cyrs;221;Cyrillic (Old Church Slavonic variant);cyrillique (variante slavonne);;2004-05-01
|
||||||
|
Deva;315;Devanagari (Nagari);dévanâgarî;Devanagari;2004-05-01
|
||||||
|
Dsrt;250;Deseret (Mormon);déseret (mormon);Deseret;2004-05-01
|
||||||
|
Dupl;755;Duployan shorthand, Duployan stenography;sténographie Duployé;;2010-07-18
|
||||||
|
Egyd;070;Egyptian demotic;démotique égyptien;;2004-05-01
|
||||||
|
Egyh;060;Egyptian hieratic;hiératique égyptien;;2004-05-01
|
||||||
|
Egyp;050;Egyptian hieroglyphs;hiéroglyphes égyptiens;Egyptian_Hieroglyphs;2009-06-01
|
||||||
|
Elba;226;Elbasan;elbasan;;2010-07-18
|
||||||
|
Ethi;430;Ethiopic (Geʻez);éthiopien (geʻez, guèze);Ethiopic;2004-10-25
|
||||||
|
Geor;240;Georgian (Mkhedruli);géorgien (mkhédrouli);Georgian;2004-05-29
|
||||||
|
Geok;241;Khutsuri (Asomtavruli and Nuskhuri);khoutsouri (assomtavrouli et nouskhouri);Georgian;2012-10-16
|
||||||
|
Glag;225;Glagolitic;glagolitique;Glagolitic;2006-06-21
|
||||||
|
Goth;206;Gothic;gotique;Gothic;2004-05-01
|
||||||
|
Gran;343;Grantha;grantha;;2009-11-11
|
||||||
|
Grek;200;Greek;grec;Greek;2004-05-01
|
||||||
|
Gujr;320;Gujarati;goudjarâtî (gujrâtî);Gujarati;2004-05-01
|
||||||
|
Guru;310;Gurmukhi;gourmoukhî;Gurmukhi;2004-05-01
|
||||||
|
Hang;286;Hangul (Hangŭl, Hangeul);hangûl (hangŭl, hangeul);Hangul;2004-05-29
|
||||||
|
Hani;500;Han (Hanzi, Kanji, Hanja);idéogrammes han (sinogrammes);Han;2009-02-23
|
||||||
|
Hano;371;Hanunoo (Hanunóo);hanounóo;Hanunoo;2004-05-29
|
||||||
|
Hans;501;Han (Simplified variant);idéogrammes han (variante simplifiée);;2004-05-29
|
||||||
|
Hant;502;Han (Traditional variant);idéogrammes han (variante traditionnelle);;2004-05-29
|
||||||
|
Hatr;127;Hatran;hatrénien;;2012-11-01
|
||||||
|
Hebr;125;Hebrew;hébreu;Hebrew;2004-05-01
|
||||||
|
Hira;410;Hiragana;hiragana;Hiragana;2004-05-01
|
||||||
|
Hluw;080;Anatolian Hieroglyphs (Luwian Hieroglyphs, Hittite Hieroglyphs);hiéroglyphes anatoliens (hiéroglyphes louvites, hiéroglyphes hittites);;2011-12-09
|
||||||
|
Hmng;450;Pahawh Hmong;pahawh hmong;;2004-05-01
|
||||||
|
Hrkt;412;Japanese syllabaries (alias for Hiragana + Katakana);syllabaires japonais (alias pour hiragana + katakana);Katakana_Or_Hiragana;2011-06-21
|
||||||
|
Hung;176;Old Hungarian (Hungarian Runic);runes hongroises (ancien hongrois);;2012-10-16
|
||||||
|
Inds;610;Indus (Harappan);indus;;2004-05-01
|
||||||
|
Ital;210;Old Italic (Etruscan, Oscan, etc.);ancien italique (étrusque, osque, etc.);Old_Italic;2004-05-29
|
||||||
|
Java;361;Javanese;javanais;Javanese;2009-06-01
|
||||||
|
Jpan;413;Japanese (alias for Han + Hiragana + Katakana);japonais (alias pour han + hiragana + katakana);;2006-06-21
|
||||||
|
Jurc;510;Jurchen;jurchen;;2010-12-21
|
||||||
|
Kali;357;Kayah Li;kayah li;Kayah_Li;2007-07-02
|
||||||
|
Kana;411;Katakana;katakana;Katakana;2004-05-01
|
||||||
|
Khar;305;Kharoshthi;kharochthî;Kharoshthi;2006-06-21
|
||||||
|
Khmr;355;Khmer;khmer;Khmer;2004-05-29
|
||||||
|
Khoj;322;Khojki;khojkî;;2011-06-21
|
||||||
|
Knda;345;Kannada;kannara (canara);Kannada;2004-05-29
|
||||||
|
Kore;287;Korean (alias for Hangul + Han);coréen (alias pour hangûl + han);;2007-06-13
|
||||||
|
Kpel;436;Kpelle;kpèllé;;2010-03-26
|
||||||
|
Kthi;317;Kaithi;kaithî;Kaithi;2009-06-01
|
||||||
|
Lana;351;Tai Tham (Lanna);taï tham (lanna);Tai_Tham;2009-06-01
|
||||||
|
Laoo;356;Lao;laotien;Lao;2004-05-01
|
||||||
|
Latf;217;Latin (Fraktur variant);latin (variante brisée);;2004-05-01
|
||||||
|
Latg;216;Latin (Gaelic variant);latin (variante gaélique);;2004-05-01
|
||||||
|
Latn;215;Latin;latin;Latin;2004-05-01
|
||||||
|
Lepc;335;Lepcha (Róng);lepcha (róng);Lepcha;2007-07-02
|
||||||
|
Limb;336;Limbu;limbou;Limbu;2004-05-29
|
||||||
|
Lina;400;Linear A;linéaire A;;2004-05-01
|
||||||
|
Linb;401;Linear B;linéaire B;Linear_B;2004-05-29
|
||||||
|
Lisu;399;Lisu (Fraser);lisu (Fraser);Lisu;2009-06-01
|
||||||
|
Loma;437;Loma;loma;;2010-03-26
|
||||||
|
Lyci;202;Lycian;lycien;Lycian;2007-07-02
|
||||||
|
Lydi;116;Lydian;lydien;Lydian;2007-07-02
|
||||||
|
Mahj;314;Mahajani;mahâjanî;;2012-10-16
|
||||||
|
Mand;140;Mandaic, Mandaean;mandéen;Mandaic;2010-07-23
|
||||||
|
Mani;139;Manichaean;manichéen;;2007-07-15
|
||||||
|
Maya;090;Mayan hieroglyphs;hiéroglyphes mayas;;2004-05-01
|
||||||
|
Mend;438;Mende Kikakui;mendé kikakui;;2013-10-12
|
||||||
|
Merc;101;Meroitic Cursive;cursif méroïtique;Meroitic_Cursive;2012-02-06
|
||||||
|
Mero;100;Meroitic Hieroglyphs;hiéroglyphes méroïtiques;Meroitic_Hieroglyphs;2012-02-06
|
||||||
|
Mlym;347;Malayalam;malayâlam;Malayalam;2004-05-01
|
||||||
|
Modi;323;Modi, Moḍī;modî;;2013-10-12
|
||||||
|
Moon;218;Moon (Moon code, Moon script, Moon type);écriture Moon;;2006-12-11
|
||||||
|
Mong;145;Mongolian;mongol;Mongolian;2004-05-01
|
||||||
|
Mroo;199;Mro, Mru;mro;;2010-12-21
|
||||||
|
Mtei;337;Meitei Mayek (Meithei, Meetei);meitei mayek;Meetei_Mayek;2009-06-01
|
||||||
|
Mult;323; Multani;multanî;;2012-11-01
|
||||||
|
Mymr;350;Myanmar (Burmese);birman;Myanmar;2004-05-01
|
||||||
|
Narb;106;Old North Arabian (Ancient North Arabian);nord-arabique;;2010-03-26
|
||||||
|
Nbat;159;Nabataean;nabatéen;;2010-03-26
|
||||||
|
Nkgb;420;Nakhi Geba ('Na-'Khi ²Ggŏ-¹baw, Naxi Geba);nakhi géba;;2009-02-23
|
||||||
|
Nkoo;165;N’Ko;n’ko;Nko;2006-10-10
|
||||||
|
Nshu;499;Nüshu;nüshu;;2010-12-21
|
||||||
|
Ogam;212;Ogham;ogam;Ogham;2004-05-01
|
||||||
|
Olck;261;Ol Chiki (Ol Cemet’, Ol, Santali);ol tchiki;Ol_Chiki;2007-07-02
|
||||||
|
Orkh;175;Old Turkic, Orkhon Runic;orkhon;Old_Turkic;2009-06-01
|
||||||
|
Orya;327;Oriya;oriyâ;Oriya;2004-05-01
|
||||||
|
Osma;260;Osmanya;osmanais;Osmanya;2004-05-01
|
||||||
|
Palm;126;Palmyrene;palmyrénien;;2010-03-26
|
||||||
|
Pauc;263;Pau Cin Hau;paou chin haou;;2013-10-12
|
||||||
|
Perm;227;Old Permic;ancien permien;;2004-05-01
|
||||||
|
Phag;331;Phags-pa;’phags pa;Phags_Pa;2006-10-10
|
||||||
|
Phli;131;Inscriptional Pahlavi;pehlevi des inscriptions;Inscriptional_Pahlavi;2009-06-01
|
||||||
|
Phlp;132;Psalter Pahlavi;pehlevi des psautiers;;2007-11-26
|
||||||
|
Phlv;133;Book Pahlavi;pehlevi des livres;;2007-07-15
|
||||||
|
Phnx;115;Phoenician;phénicien;Phoenician;2006-10-10
|
||||||
|
Plrd;282;Miao (Pollard);miao (Pollard);Miao;2012-02-06
|
||||||
|
Prti;130;Inscriptional Parthian;parthe des inscriptions;Inscriptional_Parthian;2009-06-01
|
||||||
|
Qaaa;900;Reserved for private use (start);réservé à l’usage privé (début);;2004-05-29
|
||||||
|
Qabx;949;Reserved for private use (end);réservé à l’usage privé (fin);;2004-05-29
|
||||||
|
Rjng;363;Rejang (Redjang, Kaganga);redjang (kaganga);Rejang;2009-02-23
|
||||||
|
Roro;620;Rongorongo;rongorongo;;2004-05-01
|
||||||
|
Runr;211;Runic;runique;Runic;2004-05-01
|
||||||
|
Samr;123;Samaritan;samaritain;Samaritan;2009-06-01
|
||||||
|
Sara;292;Sarati;sarati;;2004-05-29
|
||||||
|
Sarb;105;Old South Arabian;sud-arabique, himyarite;Old_South_Arabian;2009-06-01
|
||||||
|
Saur;344;Saurashtra;saurachtra;Saurashtra;2007-07-02
|
||||||
|
Sgnw;095;SignWriting;SignÉcriture, SignWriting;;2006-10-10
|
||||||
|
Shaw;281;Shavian (Shaw);shavien (Shaw);Shavian;2004-05-01
|
||||||
|
Shrd;319;Sharada, Śāradā;charada, shard;Sharada;2012-02-06
|
||||||
|
Sidd;302;Siddham, Siddhaṃ, Siddhamātṛkā;siddham;;2013-10-12
|
||||||
|
Sind;318;Khudawadi, Sindhi;khoudawadî, sindhî;;2010-12-21
|
||||||
|
Sinh;348;Sinhala;singhalais;Sinhala;2004-05-01
|
||||||
|
Sora;398;Sora Sompeng;sora sompeng;Sora_Sompeng;2012-02-06
|
||||||
|
Sund;362;Sundanese;sundanais;Sundanese;2007-07-02
|
||||||
|
Sylo;316;Syloti Nagri;sylotî nâgrî;Syloti_Nagri;2006-06-21
|
||||||
|
Syrc;135;Syriac;syriaque;Syriac;2004-05-01
|
||||||
|
Syre;138;Syriac (Estrangelo variant);syriaque (variante estranghélo);;2004-05-01
|
||||||
|
Syrj;137;Syriac (Western variant);syriaque (variante occidentale);;2004-05-01
|
||||||
|
Syrn;136;Syriac (Eastern variant);syriaque (variante orientale);;2004-05-01
|
||||||
|
Tagb;373;Tagbanwa;tagbanoua;Tagbanwa;2004-05-01
|
||||||
|
Takr;321;Takri, Ṭākrī, Ṭāṅkrī;tâkrî;Takri;2012-02-06
|
||||||
|
Tale;353;Tai Le;taï-le;Tai_Le;2004-10-25
|
||||||
|
Talu;354;New Tai Lue;nouveau taï-lue;New_Tai_Lue;2006-06-21
|
||||||
|
Taml;346;Tamil;tamoul;Tamil;2004-05-01
|
||||||
|
Tang;520;Tangut;tangoute;;2010-12-21
|
||||||
|
Tavt;359;Tai Viet;taï viêt;Tai_Viet;2009-06-01
|
||||||
|
Telu;340;Telugu;télougou;Telugu;2004-05-01
|
||||||
|
Teng;290;Tengwar;tengwar;;2004-05-01
|
||||||
|
Tfng;120;Tifinagh (Berber);tifinagh (berbère);Tifinagh;2006-06-21
|
||||||
|
Tglg;370;Tagalog (Baybayin, Alibata);tagal (baybayin, alibata);Tagalog;2009-02-23
|
||||||
|
Thaa;170;Thaana;thâna;Thaana;2004-05-01
|
||||||
|
Thai;352;Thai;thaï;Thai;2004-05-01
|
||||||
|
Tibt;330;Tibetan;tibétain;Tibetan;2004-05-01
|
||||||
|
Tirh;326;Tirhuta;tirhouta;;2011-12-09
|
||||||
|
Ugar;040;Ugaritic;ougaritique;Ugaritic;2004-05-01
|
||||||
|
Vaii;470;Vai;vaï;Vai;2007-07-02
|
||||||
|
Visp;280;Visible Speech;parole visible;;2004-05-01
|
||||||
|
Wara;262;Warang Citi (Varang Kshiti);warang citi;;2009-11-11
|
||||||
|
Wole;480;Woleai;woléaï;;2010-12-21
|
||||||
|
Xpeo;030;Old Persian;cunéiforme persépolitain;Old_Persian;2006-06-21
|
||||||
|
Xsux;020;Cuneiform, Sumero-Akkadian;cunéiforme suméro-akkadien;Cuneiform;2006-10-10
|
||||||
|
Yiii;460;Yi;yi;Yi;2004-05-01
|
||||||
|
Zinh;994;Code for inherited script;codet pour écriture héritée;Inherited;2009-02-23
|
||||||
|
Zmth;995;Mathematical notation;notation mathématique;;2007-11-26
|
||||||
|
Zsym;996;Symbols;symboles;;2007-11-26
|
||||||
|
Zxxx;997;Code for unwritten documents;codet pour les documents non écrits;;2011-06-21
|
||||||
|
Zyyy;998;Code for undetermined script;codet pour écriture indéterminée;Common;2004-05-29
|
||||||
|
Zzzz;999;Code for uncoded script;codet pour écriture non codée;Unknown;2006-10-10
|
||||||
474
lib/babelfish/data/opensubtitles_languages.txt
Executable file
474
lib/babelfish/data/opensubtitles_languages.txt
Executable file
@@ -0,0 +1,474 @@
|
|||||||
|
IdSubLanguage ISO639 LanguageName UploadEnabled WebEnabled
|
||||||
|
aar aa Afar, afar 0 0
|
||||||
|
abk ab Abkhazian 0 0
|
||||||
|
ace Achinese 0 0
|
||||||
|
ach Acoli 0 0
|
||||||
|
ada Adangme 0 0
|
||||||
|
ady adyghé 0 0
|
||||||
|
afa Afro-Asiatic (Other) 0 0
|
||||||
|
afh Afrihili 0 0
|
||||||
|
afr af Afrikaans 1 0
|
||||||
|
ain Ainu 0 0
|
||||||
|
aka ak Akan 0 0
|
||||||
|
akk Akkadian 0 0
|
||||||
|
alb sq Albanian 1 1
|
||||||
|
ale Aleut 0 0
|
||||||
|
alg Algonquian languages 0 0
|
||||||
|
alt Southern Altai 0 0
|
||||||
|
amh am Amharic 0 0
|
||||||
|
ang English, Old (ca.450-1100) 0 0
|
||||||
|
apa Apache languages 0 0
|
||||||
|
ara ar Arabic 1 1
|
||||||
|
arc Aramaic 0 0
|
||||||
|
arg an Aragonese 0 0
|
||||||
|
arm hy Armenian 1 0
|
||||||
|
arn Araucanian 0 0
|
||||||
|
arp Arapaho 0 0
|
||||||
|
art Artificial (Other) 0 0
|
||||||
|
arw Arawak 0 0
|
||||||
|
asm as Assamese 0 0
|
||||||
|
ast Asturian, Bable 0 0
|
||||||
|
ath Athapascan languages 0 0
|
||||||
|
aus Australian languages 0 0
|
||||||
|
ava av Avaric 0 0
|
||||||
|
ave ae Avestan 0 0
|
||||||
|
awa Awadhi 0 0
|
||||||
|
aym ay Aymara 0 0
|
||||||
|
aze az Azerbaijani 0 0
|
||||||
|
bad Banda 0 0
|
||||||
|
bai Bamileke languages 0 0
|
||||||
|
bak ba Bashkir 0 0
|
||||||
|
bal Baluchi 0 0
|
||||||
|
bam bm Bambara 0 0
|
||||||
|
ban Balinese 0 0
|
||||||
|
baq eu Basque 1 1
|
||||||
|
bas Basa 0 0
|
||||||
|
bat Baltic (Other) 0 0
|
||||||
|
bej Beja 0 0
|
||||||
|
bel be Belarusian 0 0
|
||||||
|
bem Bemba 0 0
|
||||||
|
ben bn Bengali 1 0
|
||||||
|
ber Berber (Other) 0 0
|
||||||
|
bho Bhojpuri 0 0
|
||||||
|
bih bh Bihari 0 0
|
||||||
|
bik Bikol 0 0
|
||||||
|
bin Bini 0 0
|
||||||
|
bis bi Bislama 0 0
|
||||||
|
bla Siksika 0 0
|
||||||
|
bnt Bantu (Other) 0 0
|
||||||
|
bos bs Bosnian 1 0
|
||||||
|
bra Braj 0 0
|
||||||
|
bre br Breton 1 0
|
||||||
|
btk Batak (Indonesia) 0 0
|
||||||
|
bua Buriat 0 0
|
||||||
|
bug Buginese 0 0
|
||||||
|
bul bg Bulgarian 1 1
|
||||||
|
bur my Burmese 1 0
|
||||||
|
byn Blin 0 0
|
||||||
|
cad Caddo 0 0
|
||||||
|
cai Central American Indian (Other) 0 0
|
||||||
|
car Carib 0 0
|
||||||
|
cat ca Catalan 1 1
|
||||||
|
cau Caucasian (Other) 0 0
|
||||||
|
ceb Cebuano 0 0
|
||||||
|
cel Celtic (Other) 0 0
|
||||||
|
cha ch Chamorro 0 0
|
||||||
|
chb Chibcha 0 0
|
||||||
|
che ce Chechen 0 0
|
||||||
|
chg Chagatai 0 0
|
||||||
|
chi zh Chinese 1 1
|
||||||
|
chk Chuukese 0 0
|
||||||
|
chm Mari 0 0
|
||||||
|
chn Chinook jargon 0 0
|
||||||
|
cho Choctaw 0 0
|
||||||
|
chp Chipewyan 0 0
|
||||||
|
chr Cherokee 0 0
|
||||||
|
chu cu Church Slavic 0 0
|
||||||
|
chv cv Chuvash 0 0
|
||||||
|
chy Cheyenne 0 0
|
||||||
|
cmc Chamic languages 0 0
|
||||||
|
cop Coptic 0 0
|
||||||
|
cor kw Cornish 0 0
|
||||||
|
cos co Corsican 0 0
|
||||||
|
cpe Creoles and pidgins, English based (Other) 0 0
|
||||||
|
cpf Creoles and pidgins, French-based (Other) 0 0
|
||||||
|
cpp Creoles and pidgins, Portuguese-based (Other) 0 0
|
||||||
|
cre cr Cree 0 0
|
||||||
|
crh Crimean Tatar 0 0
|
||||||
|
crp Creoles and pidgins (Other) 0 0
|
||||||
|
csb Kashubian 0 0
|
||||||
|
cus Cushitic (Other)' couchitiques, autres langues 0 0
|
||||||
|
cze cs Czech 1 1
|
||||||
|
dak Dakota 0 0
|
||||||
|
dan da Danish 1 1
|
||||||
|
dar Dargwa 0 0
|
||||||
|
day Dayak 0 0
|
||||||
|
del Delaware 0 0
|
||||||
|
den Slave (Athapascan) 0 0
|
||||||
|
dgr Dogrib 0 0
|
||||||
|
din Dinka 0 0
|
||||||
|
div dv Divehi 0 0
|
||||||
|
doi Dogri 0 0
|
||||||
|
dra Dravidian (Other) 0 0
|
||||||
|
dua Duala 0 0
|
||||||
|
dum Dutch, Middle (ca.1050-1350) 0 0
|
||||||
|
dut nl Dutch 1 1
|
||||||
|
dyu Dyula 0 0
|
||||||
|
dzo dz Dzongkha 0 0
|
||||||
|
efi Efik 0 0
|
||||||
|
egy Egyptian (Ancient) 0 0
|
||||||
|
eka Ekajuk 0 0
|
||||||
|
elx Elamite 0 0
|
||||||
|
eng en English 1 1
|
||||||
|
enm English, Middle (1100-1500) 0 0
|
||||||
|
epo eo Esperanto 1 0
|
||||||
|
est et Estonian 1 1
|
||||||
|
ewe ee Ewe 0 0
|
||||||
|
ewo Ewondo 0 0
|
||||||
|
fan Fang 0 0
|
||||||
|
fao fo Faroese 0 0
|
||||||
|
fat Fanti 0 0
|
||||||
|
fij fj Fijian 0 0
|
||||||
|
fil Filipino 0 0
|
||||||
|
fin fi Finnish 1 1
|
||||||
|
fiu Finno-Ugrian (Other) 0 0
|
||||||
|
fon Fon 0 0
|
||||||
|
fre fr French 1 1
|
||||||
|
frm French, Middle (ca.1400-1600) 0 0
|
||||||
|
fro French, Old (842-ca.1400) 0 0
|
||||||
|
fry fy Frisian 0 0
|
||||||
|
ful ff Fulah 0 0
|
||||||
|
fur Friulian 0 0
|
||||||
|
gaa Ga 0 0
|
||||||
|
gay Gayo 0 0
|
||||||
|
gba Gbaya 0 0
|
||||||
|
gem Germanic (Other) 0 0
|
||||||
|
geo ka Georgian 1 1
|
||||||
|
ger de German 1 1
|
||||||
|
gez Geez 0 0
|
||||||
|
gil Gilbertese 0 0
|
||||||
|
gla gd Gaelic 0 0
|
||||||
|
gle ga Irish 0 0
|
||||||
|
glg gl Galician 1 1
|
||||||
|
glv gv Manx 0 0
|
||||||
|
gmh German, Middle High (ca.1050-1500) 0 0
|
||||||
|
goh German, Old High (ca.750-1050) 0 0
|
||||||
|
gon Gondi 0 0
|
||||||
|
gor Gorontalo 0 0
|
||||||
|
got Gothic 0 0
|
||||||
|
grb Grebo 0 0
|
||||||
|
grc Greek, Ancient (to 1453) 0 0
|
||||||
|
ell el Greek 1 1
|
||||||
|
grn gn Guarani 0 0
|
||||||
|
guj gu Gujarati 0 0
|
||||||
|
gwi Gwich´in 0 0
|
||||||
|
hai Haida 0 0
|
||||||
|
hat ht Haitian 0 0
|
||||||
|
hau ha Hausa 0 0
|
||||||
|
haw Hawaiian 0 0
|
||||||
|
heb he Hebrew 1 1
|
||||||
|
her hz Herero 0 0
|
||||||
|
hil Hiligaynon 0 0
|
||||||
|
him Himachali 0 0
|
||||||
|
hin hi Hindi 1 1
|
||||||
|
hit Hittite 0 0
|
||||||
|
hmn Hmong 0 0
|
||||||
|
hmo ho Hiri Motu 0 0
|
||||||
|
hrv hr Croatian 1 1
|
||||||
|
hun hu Hungarian 1 1
|
||||||
|
hup Hupa 0 0
|
||||||
|
iba Iban 0 0
|
||||||
|
ibo ig Igbo 0 0
|
||||||
|
ice is Icelandic 1 1
|
||||||
|
ido io Ido 0 0
|
||||||
|
iii ii Sichuan Yi 0 0
|
||||||
|
ijo Ijo 0 0
|
||||||
|
iku iu Inuktitut 0 0
|
||||||
|
ile ie Interlingue 0 0
|
||||||
|
ilo Iloko 0 0
|
||||||
|
ina ia Interlingua (International Auxiliary Language Asso 0 0
|
||||||
|
inc Indic (Other) 0 0
|
||||||
|
ind id Indonesian 1 1
|
||||||
|
ine Indo-European (Other) 0 0
|
||||||
|
inh Ingush 0 0
|
||||||
|
ipk ik Inupiaq 0 0
|
||||||
|
ira Iranian (Other) 0 0
|
||||||
|
iro Iroquoian languages 0 0
|
||||||
|
ita it Italian 1 1
|
||||||
|
jav jv Javanese 0 0
|
||||||
|
jpn ja Japanese 1 1
|
||||||
|
jpr Judeo-Persian 0 0
|
||||||
|
jrb Judeo-Arabic 0 0
|
||||||
|
kaa Kara-Kalpak 0 0
|
||||||
|
kab Kabyle 0 0
|
||||||
|
kac Kachin 0 0
|
||||||
|
kal kl Kalaallisut 0 0
|
||||||
|
kam Kamba 0 0
|
||||||
|
kan kn Kannada 0 0
|
||||||
|
kar Karen 0 0
|
||||||
|
kas ks Kashmiri 0 0
|
||||||
|
kau kr Kanuri 0 0
|
||||||
|
kaw Kawi 0 0
|
||||||
|
kaz kk Kazakh 1 0
|
||||||
|
kbd Kabardian 0 0
|
||||||
|
kha Khasi 0 0
|
||||||
|
khi Khoisan (Other) 0 0
|
||||||
|
khm km Khmer 1 1
|
||||||
|
kho Khotanese 0 0
|
||||||
|
kik ki Kikuyu 0 0
|
||||||
|
kin rw Kinyarwanda 0 0
|
||||||
|
kir ky Kirghiz 0 0
|
||||||
|
kmb Kimbundu 0 0
|
||||||
|
kok Konkani 0 0
|
||||||
|
kom kv Komi 0 0
|
||||||
|
kon kg Kongo 0 0
|
||||||
|
kor ko Korean 1 1
|
||||||
|
kos Kosraean 0 0
|
||||||
|
kpe Kpelle 0 0
|
||||||
|
krc Karachay-Balkar 0 0
|
||||||
|
kro Kru 0 0
|
||||||
|
kru Kurukh 0 0
|
||||||
|
kua kj Kuanyama 0 0
|
||||||
|
kum Kumyk 0 0
|
||||||
|
kur ku Kurdish 0 0
|
||||||
|
kut Kutenai 0 0
|
||||||
|
lad Ladino 0 0
|
||||||
|
lah Lahnda 0 0
|
||||||
|
lam Lamba 0 0
|
||||||
|
lao lo Lao 0 0
|
||||||
|
lat la Latin 0 0
|
||||||
|
lav lv Latvian 1 0
|
||||||
|
lez Lezghian 0 0
|
||||||
|
lim li Limburgan 0 0
|
||||||
|
lin ln Lingala 0 0
|
||||||
|
lit lt Lithuanian 1 0
|
||||||
|
lol Mongo 0 0
|
||||||
|
loz Lozi 0 0
|
||||||
|
ltz lb Luxembourgish 1 0
|
||||||
|
lua Luba-Lulua 0 0
|
||||||
|
lub lu Luba-Katanga 0 0
|
||||||
|
lug lg Ganda 0 0
|
||||||
|
lui Luiseno 0 0
|
||||||
|
lun Lunda 0 0
|
||||||
|
luo Luo (Kenya and Tanzania) 0 0
|
||||||
|
lus lushai 0 0
|
||||||
|
mac mk Macedonian 1 1
|
||||||
|
mad Madurese 0 0
|
||||||
|
mag Magahi 0 0
|
||||||
|
mah mh Marshallese 0 0
|
||||||
|
mai Maithili 0 0
|
||||||
|
mak Makasar 0 0
|
||||||
|
mal ml Malayalam 1 0
|
||||||
|
man Mandingo 0 0
|
||||||
|
mao mi Maori 0 0
|
||||||
|
map Austronesian (Other) 0 0
|
||||||
|
mar mr Marathi 0 0
|
||||||
|
mas Masai 0 0
|
||||||
|
may ms Malay 1 1
|
||||||
|
mdf Moksha 0 0
|
||||||
|
mdr Mandar 0 0
|
||||||
|
men Mende 0 0
|
||||||
|
mga Irish, Middle (900-1200) 0 0
|
||||||
|
mic Mi'kmaq 0 0
|
||||||
|
min Minangkabau 0 0
|
||||||
|
mis Miscellaneous languages 0 0
|
||||||
|
mkh Mon-Khmer (Other) 0 0
|
||||||
|
mlg mg Malagasy 0 0
|
||||||
|
mlt mt Maltese 0 0
|
||||||
|
mnc Manchu 0 0
|
||||||
|
mni Manipuri 0 0
|
||||||
|
mno Manobo languages 0 0
|
||||||
|
moh Mohawk 0 0
|
||||||
|
mol mo Moldavian 0 0
|
||||||
|
mon mn Mongolian 1 0
|
||||||
|
mos Mossi 0 0
|
||||||
|
mwl Mirandese 0 0
|
||||||
|
mul Multiple languages 0 0
|
||||||
|
mun Munda languages 0 0
|
||||||
|
mus Creek 0 0
|
||||||
|
mwr Marwari 0 0
|
||||||
|
myn Mayan languages 0 0
|
||||||
|
myv Erzya 0 0
|
||||||
|
nah Nahuatl 0 0
|
||||||
|
nai North American Indian 0 0
|
||||||
|
nap Neapolitan 0 0
|
||||||
|
nau na Nauru 0 0
|
||||||
|
nav nv Navajo 0 0
|
||||||
|
nbl nr Ndebele, South 0 0
|
||||||
|
nde nd Ndebele, North 0 0
|
||||||
|
ndo ng Ndonga 0 0
|
||||||
|
nds Low German 0 0
|
||||||
|
nep ne Nepali 0 0
|
||||||
|
new Nepal Bhasa 0 0
|
||||||
|
nia Nias 0 0
|
||||||
|
nic Niger-Kordofanian (Other) 0 0
|
||||||
|
niu Niuean 0 0
|
||||||
|
nno nn Norwegian Nynorsk 0 0
|
||||||
|
nob nb Norwegian Bokmal 0 0
|
||||||
|
nog Nogai 0 0
|
||||||
|
non Norse, Old 0 0
|
||||||
|
nor no Norwegian 1 1
|
||||||
|
nso Northern Sotho 0 0
|
||||||
|
nub Nubian languages 0 0
|
||||||
|
nwc Classical Newari 0 0
|
||||||
|
nya ny Chichewa 0 0
|
||||||
|
nym Nyamwezi 0 0
|
||||||
|
nyn Nyankole 0 0
|
||||||
|
nyo Nyoro 0 0
|
||||||
|
nzi Nzima 0 0
|
||||||
|
oci oc Occitan 1 1
|
||||||
|
oji oj Ojibwa 0 0
|
||||||
|
ori or Oriya 0 0
|
||||||
|
orm om Oromo 0 0
|
||||||
|
osa Osage 0 0
|
||||||
|
oss os Ossetian 0 0
|
||||||
|
ota Turkish, Ottoman (1500-1928) 0 0
|
||||||
|
oto Otomian languages 0 0
|
||||||
|
paa Papuan (Other) 0 0
|
||||||
|
pag Pangasinan 0 0
|
||||||
|
pal Pahlavi 0 0
|
||||||
|
pam Pampanga 0 0
|
||||||
|
pan pa Panjabi 0 0
|
||||||
|
pap Papiamento 0 0
|
||||||
|
pau Palauan 0 0
|
||||||
|
peo Persian, Old (ca.600-400 B.C.) 0 0
|
||||||
|
per fa Persian 1 1
|
||||||
|
phi Philippine (Other) 0 0
|
||||||
|
phn Phoenician 0 0
|
||||||
|
pli pi Pali 0 0
|
||||||
|
pol pl Polish 1 1
|
||||||
|
pon Pohnpeian 0 0
|
||||||
|
por pt Portuguese 1 1
|
||||||
|
pra Prakrit languages 0 0
|
||||||
|
pro Provençal, Old (to 1500) 0 0
|
||||||
|
pus ps Pushto 0 0
|
||||||
|
que qu Quechua 0 0
|
||||||
|
raj Rajasthani 0 0
|
||||||
|
rap Rapanui 0 0
|
||||||
|
rar Rarotongan 0 0
|
||||||
|
roa Romance (Other) 0 0
|
||||||
|
roh rm Raeto-Romance 0 0
|
||||||
|
rom Romany 0 0
|
||||||
|
run rn Rundi 0 0
|
||||||
|
rup Aromanian 0 0
|
||||||
|
rus ru Russian 1 1
|
||||||
|
sad Sandawe 0 0
|
||||||
|
sag sg Sango 0 0
|
||||||
|
sah Yakut 0 0
|
||||||
|
sai South American Indian (Other) 0 0
|
||||||
|
sal Salishan languages 0 0
|
||||||
|
sam Samaritan Aramaic 0 0
|
||||||
|
san sa Sanskrit 0 0
|
||||||
|
sas Sasak 0 0
|
||||||
|
sat Santali 0 0
|
||||||
|
scc sr Serbian 1 1
|
||||||
|
scn Sicilian 0 0
|
||||||
|
sco Scots 0 0
|
||||||
|
sel Selkup 0 0
|
||||||
|
sem Semitic (Other) 0 0
|
||||||
|
sga Irish, Old (to 900) 0 0
|
||||||
|
sgn Sign Languages 0 0
|
||||||
|
shn Shan 0 0
|
||||||
|
sid Sidamo 0 0
|
||||||
|
sin si Sinhalese 1 1
|
||||||
|
sio Siouan languages 0 0
|
||||||
|
sit Sino-Tibetan (Other) 0 0
|
||||||
|
sla Slavic (Other) 0 0
|
||||||
|
slo sk Slovak 1 1
|
||||||
|
slv sl Slovenian 1 1
|
||||||
|
sma Southern Sami 0 0
|
||||||
|
sme se Northern Sami 0 0
|
||||||
|
smi Sami languages (Other) 0 0
|
||||||
|
smj Lule Sami 0 0
|
||||||
|
smn Inari Sami 0 0
|
||||||
|
smo sm Samoan 0 0
|
||||||
|
sms Skolt Sami 0 0
|
||||||
|
sna sn Shona 0 0
|
||||||
|
snd sd Sindhi 0 0
|
||||||
|
snk Soninke 0 0
|
||||||
|
sog Sogdian 0 0
|
||||||
|
som so Somali 0 0
|
||||||
|
son Songhai 0 0
|
||||||
|
sot st Sotho, Southern 0 0
|
||||||
|
spa es Spanish 1 1
|
||||||
|
srd sc Sardinian 0 0
|
||||||
|
srr Serer 0 0
|
||||||
|
ssa Nilo-Saharan (Other) 0 0
|
||||||
|
ssw ss Swati 0 0
|
||||||
|
suk Sukuma 0 0
|
||||||
|
sun su Sundanese 0 0
|
||||||
|
sus Susu 0 0
|
||||||
|
sux Sumerian 0 0
|
||||||
|
swa sw Swahili 1 0
|
||||||
|
swe sv Swedish 1 1
|
||||||
|
syr Syriac 1 0
|
||||||
|
tah ty Tahitian 0 0
|
||||||
|
tai Tai (Other) 0 0
|
||||||
|
tam ta Tamil 1 0
|
||||||
|
tat tt Tatar 0 0
|
||||||
|
tel te Telugu 1 0
|
||||||
|
tem Timne 0 0
|
||||||
|
ter Tereno 0 0
|
||||||
|
tet Tetum 0 0
|
||||||
|
tgk tg Tajik 0 0
|
||||||
|
tgl tl Tagalog 1 1
|
||||||
|
tha th Thai 1 1
|
||||||
|
tib bo Tibetan 0 0
|
||||||
|
tig Tigre 0 0
|
||||||
|
tir ti Tigrinya 0 0
|
||||||
|
tiv Tiv 0 0
|
||||||
|
tkl Tokelau 0 0
|
||||||
|
tlh Klingon 0 0
|
||||||
|
tli Tlingit 0 0
|
||||||
|
tmh Tamashek 0 0
|
||||||
|
tog Tonga (Nyasa) 0 0
|
||||||
|
ton to Tonga (Tonga Islands) 0 0
|
||||||
|
tpi Tok Pisin 0 0
|
||||||
|
tsi Tsimshian 0 0
|
||||||
|
tsn tn Tswana 0 0
|
||||||
|
tso ts Tsonga 0 0
|
||||||
|
tuk tk Turkmen 0 0
|
||||||
|
tum Tumbuka 0 0
|
||||||
|
tup Tupi languages 0 0
|
||||||
|
tur tr Turkish 1 1
|
||||||
|
tut Altaic (Other) 0 0
|
||||||
|
tvl Tuvalu 0 0
|
||||||
|
twi tw Twi 0 0
|
||||||
|
tyv Tuvinian 0 0
|
||||||
|
udm Udmurt 0 0
|
||||||
|
uga Ugaritic 0 0
|
||||||
|
uig ug Uighur 0 0
|
||||||
|
ukr uk Ukrainian 1 1
|
||||||
|
umb Umbundu 0 0
|
||||||
|
und Undetermined 0 0
|
||||||
|
urd ur Urdu 1 0
|
||||||
|
uzb uz Uzbek 0 0
|
||||||
|
vai Vai 0 0
|
||||||
|
ven ve Venda 0 0
|
||||||
|
vie vi Vietnamese 1 1
|
||||||
|
vol vo Volapük 0 0
|
||||||
|
vot Votic 0 0
|
||||||
|
wak Wakashan languages 0 0
|
||||||
|
wal Walamo 0 0
|
||||||
|
war Waray 0 0
|
||||||
|
was Washo 0 0
|
||||||
|
wel cy Welsh 0 0
|
||||||
|
wen Sorbian languages 0 0
|
||||||
|
wln wa Walloon 0 0
|
||||||
|
wol wo Wolof 0 0
|
||||||
|
xal Kalmyk 0 0
|
||||||
|
xho xh Xhosa 0 0
|
||||||
|
yao Yao 0 0
|
||||||
|
yap Yapese 0 0
|
||||||
|
yid yi Yiddish 0 0
|
||||||
|
yor yo Yoruba 0 0
|
||||||
|
ypk Yupik languages 0 0
|
||||||
|
zap Zapotec 0 0
|
||||||
|
zen Zenaga 0 0
|
||||||
|
zha za Zhuang 0 0
|
||||||
|
znd Zande 0 0
|
||||||
|
zul zu Zulu 0 0
|
||||||
|
zun Zuni 0 0
|
||||||
|
rum ro Romanian 1 1
|
||||||
|
pob pb Brazilian 1 1
|
||||||
|
mne Montenegrin 1 0
|
||||||
85
lib/babelfish/exceptions.py
Executable file
85
lib/babelfish/exceptions.py
Executable file
@@ -0,0 +1,85 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
#
|
||||||
|
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
|
||||||
|
# Use of this source code is governed by the 3-clause BSD license
|
||||||
|
# that can be found in the LICENSE file.
|
||||||
|
#
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
|
||||||
|
class Error(Exception):
|
||||||
|
"""Base class for all exceptions in babelfish"""
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class LanguageError(Error, AttributeError):
|
||||||
|
"""Base class for all language exceptions in babelfish"""
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class LanguageConvertError(LanguageError):
|
||||||
|
"""Exception raised by converters when :meth:`~babelfish.converters.LanguageConverter.convert` fails
|
||||||
|
|
||||||
|
:param string alpha3: alpha3 code that failed conversion
|
||||||
|
:param country: country code that failed conversion, if any
|
||||||
|
:type country: string or None
|
||||||
|
:param script: script code that failed conversion, if any
|
||||||
|
:type script: string or None
|
||||||
|
|
||||||
|
"""
|
||||||
|
def __init__(self, alpha3, country=None, script=None):
|
||||||
|
self.alpha3 = alpha3
|
||||||
|
self.country = country
|
||||||
|
self.script = script
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
s = self.alpha3
|
||||||
|
if self.country is not None:
|
||||||
|
s += '-' + self.country
|
||||||
|
if self.script is not None:
|
||||||
|
s += '-' + self.script
|
||||||
|
return s
|
||||||
|
|
||||||
|
|
||||||
|
class LanguageReverseError(LanguageError):
|
||||||
|
"""Exception raised by converters when :meth:`~babelfish.converters.LanguageReverseConverter.reverse` fails
|
||||||
|
|
||||||
|
:param string code: code that failed reverse conversion
|
||||||
|
|
||||||
|
"""
|
||||||
|
def __init__(self, code):
|
||||||
|
self.code = code
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return repr(self.code)
|
||||||
|
|
||||||
|
|
||||||
|
class CountryError(Error, AttributeError):
|
||||||
|
"""Base class for all country exceptions in babelfish"""
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class CountryConvertError(CountryError):
|
||||||
|
"""Exception raised by converters when :meth:`~babelfish.converters.CountryConverter.convert` fails
|
||||||
|
|
||||||
|
:param string alpha2: alpha2 code that failed conversion
|
||||||
|
|
||||||
|
"""
|
||||||
|
def __init__(self, alpha2):
|
||||||
|
self.alpha2 = alpha2
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return self.alpha2
|
||||||
|
|
||||||
|
|
||||||
|
class CountryReverseError(CountryError):
|
||||||
|
"""Exception raised by converters when :meth:`~babelfish.converters.CountryReverseConverter.reverse` fails
|
||||||
|
|
||||||
|
:param string code: code that failed reverse conversion
|
||||||
|
|
||||||
|
"""
|
||||||
|
def __init__(self, code):
|
||||||
|
self.code = code
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return repr(self.code)
|
||||||
186
lib/babelfish/language.py
Executable file
186
lib/babelfish/language.py
Executable file
@@ -0,0 +1,186 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
#
|
||||||
|
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
|
||||||
|
# Use of this source code is governed by the 3-clause BSD license
|
||||||
|
# that can be found in the LICENSE file.
|
||||||
|
#
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
from collections import namedtuple
|
||||||
|
from functools import partial
|
||||||
|
import os
|
||||||
|
# from pkg_resources import resource_stream # @UnresolvedImport
|
||||||
|
from .converters import ConverterManager
|
||||||
|
from .country import Country
|
||||||
|
from .exceptions import LanguageConvertError
|
||||||
|
from .script import Script
|
||||||
|
from . import basestr
|
||||||
|
|
||||||
|
|
||||||
|
LANGUAGES = set()
|
||||||
|
LANGUAGE_MATRIX = []
|
||||||
|
|
||||||
|
#: The namedtuple used in the :data:`LANGUAGE_MATRIX`
|
||||||
|
IsoLanguage = namedtuple('IsoLanguage', ['alpha3', 'alpha3b', 'alpha3t', 'alpha2', 'scope', 'type', 'name', 'comment'])
|
||||||
|
|
||||||
|
f = open(os.path.join(os.path.dirname(__file__), 'data/iso-639-3.tab'))
|
||||||
|
f.readline()
|
||||||
|
for l in f:
|
||||||
|
iso_language = IsoLanguage(*l.decode('utf-8').split('\t'))
|
||||||
|
LANGUAGES.add(iso_language.alpha3)
|
||||||
|
LANGUAGE_MATRIX.append(iso_language)
|
||||||
|
f.close()
|
||||||
|
|
||||||
|
|
||||||
|
class LanguageConverterManager(ConverterManager):
|
||||||
|
""":class:`~babelfish.converters.ConverterManager` for language converters"""
|
||||||
|
entry_point = 'babelfish.language_converters'
|
||||||
|
internal_converters = ['alpha2 = babelfish.converters.alpha2:Alpha2Converter',
|
||||||
|
'alpha3b = babelfish.converters.alpha3b:Alpha3BConverter',
|
||||||
|
'alpha3t = babelfish.converters.alpha3t:Alpha3TConverter',
|
||||||
|
'name = babelfish.converters.name:NameConverter',
|
||||||
|
'scope = babelfish.converters.scope:ScopeConverter',
|
||||||
|
'type = babelfish.converters.type:LanguageTypeConverter',
|
||||||
|
'opensubtitles = babelfish.converters.opensubtitles:OpenSubtitlesConverter']
|
||||||
|
|
||||||
|
language_converters = LanguageConverterManager()
|
||||||
|
|
||||||
|
|
||||||
|
class LanguageMeta(type):
|
||||||
|
"""The :class:`Language` metaclass
|
||||||
|
|
||||||
|
Dynamically redirect :meth:`Language.frommycode` to :meth:`Language.fromcode` with the ``mycode`` `converter`
|
||||||
|
|
||||||
|
"""
|
||||||
|
def __getattr__(cls, name):
|
||||||
|
if name.startswith('from'):
|
||||||
|
return partial(cls.fromcode, converter=name[4:])
|
||||||
|
return type.__getattribute__(cls, name)
|
||||||
|
|
||||||
|
|
||||||
|
class Language(LanguageMeta(str('LanguageBase'), (object,), {})):
|
||||||
|
"""A human language
|
||||||
|
|
||||||
|
A human language is composed of a language part following the ISO-639
|
||||||
|
standard and can be country-specific when a :class:`~babelfish.country.Country`
|
||||||
|
is specified.
|
||||||
|
|
||||||
|
The :class:`Language` is extensible with custom converters (see :ref:`custom_converters`)
|
||||||
|
|
||||||
|
:param string language: the language as a 3-letter ISO-639-3 code
|
||||||
|
:param country: the country (if any) as a 2-letter ISO-3166 code or :class:`~babelfish.country.Country` instance
|
||||||
|
:type country: string or :class:`~babelfish.country.Country` or None
|
||||||
|
:param script: the script (if any) as a 4-letter ISO-15924 code or :class:`~babelfish.script.Script` instance
|
||||||
|
:type script: string or :class:`~babelfish.script.Script` or None
|
||||||
|
:param unknown: the unknown language as a three-letters ISO-639-3 code to use as fallback
|
||||||
|
:type unknown: string or None
|
||||||
|
:raise: ValueError if the language could not be recognized and `unknown` is ``None``
|
||||||
|
|
||||||
|
"""
|
||||||
|
def __init__(self, language, country=None, script=None, unknown=None):
|
||||||
|
if unknown is not None and language not in LANGUAGES:
|
||||||
|
language = unknown
|
||||||
|
if language not in LANGUAGES:
|
||||||
|
raise ValueError('%r is not a valid language' % language)
|
||||||
|
self.alpha3 = language
|
||||||
|
self.country = None
|
||||||
|
if isinstance(country, Country):
|
||||||
|
self.country = country
|
||||||
|
elif country is None:
|
||||||
|
self.country = None
|
||||||
|
else:
|
||||||
|
self.country = Country(country)
|
||||||
|
self.script = None
|
||||||
|
if isinstance(script, Script):
|
||||||
|
self.script = script
|
||||||
|
elif script is None:
|
||||||
|
self.script = None
|
||||||
|
else:
|
||||||
|
self.script = Script(script)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def fromcode(cls, code, converter):
|
||||||
|
"""Create a :class:`Language` by its `code` using `converter` to
|
||||||
|
:meth:`~babelfish.converters.LanguageReverseConverter.reverse` it
|
||||||
|
|
||||||
|
:param string code: the code to reverse
|
||||||
|
:param string converter: name of the :class:`~babelfish.converters.LanguageReverseConverter` to use
|
||||||
|
:return: the corresponding :class:`Language` instance
|
||||||
|
:rtype: :class:`Language`
|
||||||
|
|
||||||
|
"""
|
||||||
|
return cls(*language_converters[converter].reverse(code))
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def fromietf(cls, ietf):
|
||||||
|
"""Create a :class:`Language` by from an IETF language code
|
||||||
|
|
||||||
|
:param string ietf: the ietf code
|
||||||
|
:return: the corresponding :class:`Language` instance
|
||||||
|
:rtype: :class:`Language`
|
||||||
|
|
||||||
|
"""
|
||||||
|
subtags = ietf.split('-')
|
||||||
|
language_subtag = subtags.pop(0).lower()
|
||||||
|
if len(language_subtag) == 2:
|
||||||
|
language = cls.fromalpha2(language_subtag)
|
||||||
|
else:
|
||||||
|
language = cls(language_subtag)
|
||||||
|
while subtags:
|
||||||
|
subtag = subtags.pop(0)
|
||||||
|
if len(subtag) == 2:
|
||||||
|
language.country = Country(subtag.upper())
|
||||||
|
else:
|
||||||
|
language.script = Script(subtag.capitalize())
|
||||||
|
if language.script is not None:
|
||||||
|
if subtags:
|
||||||
|
raise ValueError('Wrong IETF format. Unmatched subtags: %r' % subtags)
|
||||||
|
break
|
||||||
|
return language
|
||||||
|
|
||||||
|
def __getstate__(self):
|
||||||
|
return self.alpha3, self.country, self.script
|
||||||
|
|
||||||
|
def __setstate__(self, state):
|
||||||
|
self.alpha3, self.country, self.script = state
|
||||||
|
|
||||||
|
def __getattr__(self, name):
|
||||||
|
alpha3 = self.alpha3
|
||||||
|
country = self.country.alpha2 if self.country is not None else None
|
||||||
|
script = self.script.code if self.script is not None else None
|
||||||
|
try:
|
||||||
|
return language_converters[name].convert(alpha3, country, script)
|
||||||
|
except KeyError:
|
||||||
|
raise AttributeError(name)
|
||||||
|
|
||||||
|
def __hash__(self):
|
||||||
|
return hash(str(self))
|
||||||
|
|
||||||
|
def __eq__(self, other):
|
||||||
|
if isinstance(other, basestr):
|
||||||
|
return str(self) == other
|
||||||
|
if not isinstance(other, Language):
|
||||||
|
return False
|
||||||
|
return (self.alpha3 == other.alpha3 and
|
||||||
|
self.country == other.country and
|
||||||
|
self.script == other.script)
|
||||||
|
|
||||||
|
def __ne__(self, other):
|
||||||
|
return not self == other
|
||||||
|
|
||||||
|
def __bool__(self):
|
||||||
|
return self.alpha3 != 'und'
|
||||||
|
__nonzero__ = __bool__
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return '<Language [%s]>' % self
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
try:
|
||||||
|
s = self.alpha2
|
||||||
|
except LanguageConvertError:
|
||||||
|
s = self.alpha3
|
||||||
|
if self.country is not None:
|
||||||
|
s += '-' + str(self.country)
|
||||||
|
if self.script is not None:
|
||||||
|
s += '-' + str(self.script)
|
||||||
|
return s
|
||||||
78
lib/babelfish/script.py
Executable file
78
lib/babelfish/script.py
Executable file
@@ -0,0 +1,78 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
#
|
||||||
|
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
|
||||||
|
# Use of this source code is governed by the 3-clause BSD license
|
||||||
|
# that can be found in the LICENSE file.
|
||||||
|
#
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import os
|
||||||
|
from collections import namedtuple
|
||||||
|
# from pkg_resources import resource_stream # @UnresolvedImport
|
||||||
|
from . import basestr
|
||||||
|
|
||||||
|
#: Script code to script name mapping
|
||||||
|
SCRIPTS = {}
|
||||||
|
|
||||||
|
#: List of countries in the ISO-15924 as namedtuple of code, number, name, french_name, pva and date
|
||||||
|
SCRIPT_MATRIX = []
|
||||||
|
|
||||||
|
#: The namedtuple used in the :data:`SCRIPT_MATRIX`
|
||||||
|
IsoScript = namedtuple('IsoScript', ['code', 'number', 'name', 'french_name', 'pva', 'date'])
|
||||||
|
|
||||||
|
f = open(os.path.join(os.path.dirname(__file__), 'data/iso15924-utf8-20131012.txt'))
|
||||||
|
f.readline()
|
||||||
|
for l in f:
|
||||||
|
l = l.decode('utf-8').strip()
|
||||||
|
if not l or l.startswith('#'):
|
||||||
|
continue
|
||||||
|
script = IsoScript._make(l.split(';'))
|
||||||
|
SCRIPT_MATRIX.append(script)
|
||||||
|
SCRIPTS[script.code] = script.name
|
||||||
|
f.close()
|
||||||
|
|
||||||
|
|
||||||
|
class Script(object):
|
||||||
|
"""A human writing system
|
||||||
|
|
||||||
|
A script is represented by a 4-letter code from the ISO-15924 standard
|
||||||
|
|
||||||
|
:param string script: 4-letter ISO-15924 script code
|
||||||
|
|
||||||
|
"""
|
||||||
|
def __init__(self, script):
|
||||||
|
if script not in SCRIPTS:
|
||||||
|
raise ValueError('%r is not a valid script' % script)
|
||||||
|
|
||||||
|
#: ISO-15924 4-letter script code
|
||||||
|
self.code = script
|
||||||
|
|
||||||
|
@property
|
||||||
|
def name(self):
|
||||||
|
"""English name of the script"""
|
||||||
|
return SCRIPTS[self.code]
|
||||||
|
|
||||||
|
def __getstate__(self):
|
||||||
|
return self.code
|
||||||
|
|
||||||
|
def __setstate__(self, state):
|
||||||
|
self.code = state
|
||||||
|
|
||||||
|
def __hash__(self):
|
||||||
|
return hash(self.code)
|
||||||
|
|
||||||
|
def __eq__(self, other):
|
||||||
|
if isinstance(other, basestr):
|
||||||
|
return self.code == other
|
||||||
|
if not isinstance(other, Script):
|
||||||
|
return False
|
||||||
|
return self.code == other.code
|
||||||
|
|
||||||
|
def __ne__(self, other):
|
||||||
|
return not self == other
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return '<Script [%s]>' % self
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return self.code
|
||||||
9
lib/dateutil/__init__.py
Normal file
9
lib/dateutil/__init__.py
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
"""
|
||||||
|
Copyright (c) 2003-2010 Gustavo Niemeyer <gustavo@niemeyer.net>
|
||||||
|
|
||||||
|
This module offers extensions to the standard python 2.3+
|
||||||
|
datetime module.
|
||||||
|
"""
|
||||||
|
__author__ = "Gustavo Niemeyer <gustavo@niemeyer.net>"
|
||||||
|
__license__ = "PSF License"
|
||||||
|
__version__ = "1.5.0.1"
|
||||||
92
lib/dateutil/easter.py
Normal file
92
lib/dateutil/easter.py
Normal file
@@ -0,0 +1,92 @@
|
|||||||
|
"""
|
||||||
|
Copyright (c) 2003-2007 Gustavo Niemeyer <gustavo@niemeyer.net>
|
||||||
|
|
||||||
|
This module offers extensions to the standard python 2.3+
|
||||||
|
datetime module.
|
||||||
|
"""
|
||||||
|
__author__ = "Gustavo Niemeyer <gustavo@niemeyer.net>"
|
||||||
|
__license__ = "PSF License"
|
||||||
|
|
||||||
|
import datetime
|
||||||
|
|
||||||
|
__all__ = ["easter", "EASTER_JULIAN", "EASTER_ORTHODOX", "EASTER_WESTERN"]
|
||||||
|
|
||||||
|
EASTER_JULIAN = 1
|
||||||
|
EASTER_ORTHODOX = 2
|
||||||
|
EASTER_WESTERN = 3
|
||||||
|
|
||||||
|
def easter(year, method=EASTER_WESTERN):
|
||||||
|
"""
|
||||||
|
This method was ported from the work done by GM Arts,
|
||||||
|
on top of the algorithm by Claus Tondering, which was
|
||||||
|
based in part on the algorithm of Ouding (1940), as
|
||||||
|
quoted in "Explanatory Supplement to the Astronomical
|
||||||
|
Almanac", P. Kenneth Seidelmann, editor.
|
||||||
|
|
||||||
|
This algorithm implements three different easter
|
||||||
|
calculation methods:
|
||||||
|
|
||||||
|
1 - Original calculation in Julian calendar, valid in
|
||||||
|
dates after 326 AD
|
||||||
|
2 - Original method, with date converted to Gregorian
|
||||||
|
calendar, valid in years 1583 to 4099
|
||||||
|
3 - Revised method, in Gregorian calendar, valid in
|
||||||
|
years 1583 to 4099 as well
|
||||||
|
|
||||||
|
These methods are represented by the constants:
|
||||||
|
|
||||||
|
EASTER_JULIAN = 1
|
||||||
|
EASTER_ORTHODOX = 2
|
||||||
|
EASTER_WESTERN = 3
|
||||||
|
|
||||||
|
The default method is method 3.
|
||||||
|
|
||||||
|
More about the algorithm may be found at:
|
||||||
|
|
||||||
|
http://users.chariot.net.au/~gmarts/eastalg.htm
|
||||||
|
|
||||||
|
and
|
||||||
|
|
||||||
|
http://www.tondering.dk/claus/calendar.html
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
if not (1 <= method <= 3):
|
||||||
|
raise ValueError, "invalid method"
|
||||||
|
|
||||||
|
# g - Golden year - 1
|
||||||
|
# c - Century
|
||||||
|
# h - (23 - Epact) mod 30
|
||||||
|
# i - Number of days from March 21 to Paschal Full Moon
|
||||||
|
# j - Weekday for PFM (0=Sunday, etc)
|
||||||
|
# p - Number of days from March 21 to Sunday on or before PFM
|
||||||
|
# (-6 to 28 methods 1 & 3, to 56 for method 2)
|
||||||
|
# e - Extra days to add for method 2 (converting Julian
|
||||||
|
# date to Gregorian date)
|
||||||
|
|
||||||
|
y = year
|
||||||
|
g = y % 19
|
||||||
|
e = 0
|
||||||
|
if method < 3:
|
||||||
|
# Old method
|
||||||
|
i = (19*g+15)%30
|
||||||
|
j = (y+y//4+i)%7
|
||||||
|
if method == 2:
|
||||||
|
# Extra dates to convert Julian to Gregorian date
|
||||||
|
e = 10
|
||||||
|
if y > 1600:
|
||||||
|
e = e+y//100-16-(y//100-16)//4
|
||||||
|
else:
|
||||||
|
# New method
|
||||||
|
c = y//100
|
||||||
|
h = (c-c//4-(8*c+13)//25+19*g+15)%30
|
||||||
|
i = h-(h//28)*(1-(h//28)*(29//(h+1))*((21-g)//11))
|
||||||
|
j = (y+y//4+i+2-c+c//4)%7
|
||||||
|
|
||||||
|
# p can be from -6 to 56 corresponding to dates 22 March to 23 May
|
||||||
|
# (later dates apply to method 2, although 23 May never actually occurs)
|
||||||
|
p = i-j+e
|
||||||
|
d = 1+(p+27+(p+6)//40)%31
|
||||||
|
m = 3+(p+26)//30
|
||||||
|
return datetime.date(int(y),int(m),int(d))
|
||||||
|
|
||||||
886
lib/dateutil/parser.py
Normal file
886
lib/dateutil/parser.py
Normal file
@@ -0,0 +1,886 @@
|
|||||||
|
# -*- coding:iso-8859-1 -*-
|
||||||
|
"""
|
||||||
|
Copyright (c) 2003-2007 Gustavo Niemeyer <gustavo@niemeyer.net>
|
||||||
|
|
||||||
|
This module offers extensions to the standard python 2.3+
|
||||||
|
datetime module.
|
||||||
|
"""
|
||||||
|
__author__ = "Gustavo Niemeyer <gustavo@niemeyer.net>"
|
||||||
|
__license__ = "PSF License"
|
||||||
|
|
||||||
|
import datetime
|
||||||
|
import string
|
||||||
|
import time
|
||||||
|
import sys
|
||||||
|
import os
|
||||||
|
|
||||||
|
try:
|
||||||
|
from cStringIO import StringIO
|
||||||
|
except ImportError:
|
||||||
|
from StringIO import StringIO
|
||||||
|
|
||||||
|
import relativedelta
|
||||||
|
import tz
|
||||||
|
|
||||||
|
|
||||||
|
__all__ = ["parse", "parserinfo"]
|
||||||
|
|
||||||
|
|
||||||
|
# Some pointers:
|
||||||
|
#
|
||||||
|
# http://www.cl.cam.ac.uk/~mgk25/iso-time.html
|
||||||
|
# http://www.iso.ch/iso/en/prods-services/popstds/datesandtime.html
|
||||||
|
# http://www.w3.org/TR/NOTE-datetime
|
||||||
|
# http://ringmaster.arc.nasa.gov/tools/time_formats.html
|
||||||
|
# http://search.cpan.org/author/MUIR/Time-modules-2003.0211/lib/Time/ParseDate.pm
|
||||||
|
# http://stein.cshl.org/jade/distrib/docs/java.text.SimpleDateFormat.html
|
||||||
|
|
||||||
|
|
||||||
|
class _timelex(object):
|
||||||
|
|
||||||
|
def __init__(self, instream):
|
||||||
|
if isinstance(instream, basestring):
|
||||||
|
instream = StringIO(instream)
|
||||||
|
self.instream = instream
|
||||||
|
self.wordchars = ('abcdfeghijklmnopqrstuvwxyz'
|
||||||
|
'ABCDEFGHIJKLMNOPQRSTUVWXYZ_'
|
||||||
|
'<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>'
|
||||||
|
'<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>')
|
||||||
|
self.numchars = '0123456789'
|
||||||
|
self.whitespace = ' \t\r\n'
|
||||||
|
self.charstack = []
|
||||||
|
self.tokenstack = []
|
||||||
|
self.eof = False
|
||||||
|
|
||||||
|
def get_token(self):
|
||||||
|
if self.tokenstack:
|
||||||
|
return self.tokenstack.pop(0)
|
||||||
|
seenletters = False
|
||||||
|
token = None
|
||||||
|
state = None
|
||||||
|
wordchars = self.wordchars
|
||||||
|
numchars = self.numchars
|
||||||
|
whitespace = self.whitespace
|
||||||
|
while not self.eof:
|
||||||
|
if self.charstack:
|
||||||
|
nextchar = self.charstack.pop(0)
|
||||||
|
else:
|
||||||
|
nextchar = self.instream.read(1)
|
||||||
|
while nextchar == '\x00':
|
||||||
|
nextchar = self.instream.read(1)
|
||||||
|
if not nextchar:
|
||||||
|
self.eof = True
|
||||||
|
break
|
||||||
|
elif not state:
|
||||||
|
token = nextchar
|
||||||
|
if nextchar in wordchars:
|
||||||
|
state = 'a'
|
||||||
|
elif nextchar in numchars:
|
||||||
|
state = '0'
|
||||||
|
elif nextchar in whitespace:
|
||||||
|
token = ' '
|
||||||
|
break # emit token
|
||||||
|
else:
|
||||||
|
break # emit token
|
||||||
|
elif state == 'a':
|
||||||
|
seenletters = True
|
||||||
|
if nextchar in wordchars:
|
||||||
|
token += nextchar
|
||||||
|
elif nextchar == '.':
|
||||||
|
token += nextchar
|
||||||
|
state = 'a.'
|
||||||
|
else:
|
||||||
|
self.charstack.append(nextchar)
|
||||||
|
break # emit token
|
||||||
|
elif state == '0':
|
||||||
|
if nextchar in numchars:
|
||||||
|
token += nextchar
|
||||||
|
elif nextchar == '.':
|
||||||
|
token += nextchar
|
||||||
|
state = '0.'
|
||||||
|
else:
|
||||||
|
self.charstack.append(nextchar)
|
||||||
|
break # emit token
|
||||||
|
elif state == 'a.':
|
||||||
|
seenletters = True
|
||||||
|
if nextchar == '.' or nextchar in wordchars:
|
||||||
|
token += nextchar
|
||||||
|
elif nextchar in numchars and token[-1] == '.':
|
||||||
|
token += nextchar
|
||||||
|
state = '0.'
|
||||||
|
else:
|
||||||
|
self.charstack.append(nextchar)
|
||||||
|
break # emit token
|
||||||
|
elif state == '0.':
|
||||||
|
if nextchar == '.' or nextchar in numchars:
|
||||||
|
token += nextchar
|
||||||
|
elif nextchar in wordchars and token[-1] == '.':
|
||||||
|
token += nextchar
|
||||||
|
state = 'a.'
|
||||||
|
else:
|
||||||
|
self.charstack.append(nextchar)
|
||||||
|
break # emit token
|
||||||
|
if (state in ('a.', '0.') and
|
||||||
|
(seenletters or token.count('.') > 1 or token[-1] == '.')):
|
||||||
|
l = token.split('.')
|
||||||
|
token = l[0]
|
||||||
|
for tok in l[1:]:
|
||||||
|
self.tokenstack.append('.')
|
||||||
|
if tok:
|
||||||
|
self.tokenstack.append(tok)
|
||||||
|
return token
|
||||||
|
|
||||||
|
def __iter__(self):
|
||||||
|
return self
|
||||||
|
|
||||||
|
def next(self):
|
||||||
|
token = self.get_token()
|
||||||
|
if token is None:
|
||||||
|
raise StopIteration
|
||||||
|
return token
|
||||||
|
|
||||||
|
def split(cls, s):
|
||||||
|
return list(cls(s))
|
||||||
|
split = classmethod(split)
|
||||||
|
|
||||||
|
|
||||||
|
class _resultbase(object):
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
for attr in self.__slots__:
|
||||||
|
setattr(self, attr, None)
|
||||||
|
|
||||||
|
def _repr(self, classname):
|
||||||
|
l = []
|
||||||
|
for attr in self.__slots__:
|
||||||
|
value = getattr(self, attr)
|
||||||
|
if value is not None:
|
||||||
|
l.append("%s=%s" % (attr, `value`))
|
||||||
|
return "%s(%s)" % (classname, ", ".join(l))
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return self._repr(self.__class__.__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class parserinfo(object):
|
||||||
|
|
||||||
|
# m from a.m/p.m, t from ISO T separator
|
||||||
|
JUMP = [" ", ".", ",", ";", "-", "/", "'",
|
||||||
|
"at", "on", "and", "ad", "m", "t", "of",
|
||||||
|
"st", "nd", "rd", "th"]
|
||||||
|
|
||||||
|
WEEKDAYS = [("Mon", "Monday"),
|
||||||
|
("Tue", "Tuesday"),
|
||||||
|
("Wed", "Wednesday"),
|
||||||
|
("Thu", "Thursday"),
|
||||||
|
("Fri", "Friday"),
|
||||||
|
("Sat", "Saturday"),
|
||||||
|
("Sun", "Sunday")]
|
||||||
|
MONTHS = [("Jan", "January"),
|
||||||
|
("Feb", "February"),
|
||||||
|
("Mar", "March"),
|
||||||
|
("Apr", "April"),
|
||||||
|
("May", "May"),
|
||||||
|
("Jun", "June"),
|
||||||
|
("Jul", "July"),
|
||||||
|
("Aug", "August"),
|
||||||
|
("Sep", "September"),
|
||||||
|
("Oct", "October"),
|
||||||
|
("Nov", "November"),
|
||||||
|
("Dec", "December")]
|
||||||
|
HMS = [("h", "hour", "hours"),
|
||||||
|
("m", "minute", "minutes"),
|
||||||
|
("s", "second", "seconds")]
|
||||||
|
AMPM = [("am", "a"),
|
||||||
|
("pm", "p")]
|
||||||
|
UTCZONE = ["UTC", "GMT", "Z"]
|
||||||
|
PERTAIN = ["of"]
|
||||||
|
TZOFFSET = {}
|
||||||
|
|
||||||
|
def __init__(self, dayfirst=False, yearfirst=False):
|
||||||
|
self._jump = self._convert(self.JUMP)
|
||||||
|
self._weekdays = self._convert(self.WEEKDAYS)
|
||||||
|
self._months = self._convert(self.MONTHS)
|
||||||
|
self._hms = self._convert(self.HMS)
|
||||||
|
self._ampm = self._convert(self.AMPM)
|
||||||
|
self._utczone = self._convert(self.UTCZONE)
|
||||||
|
self._pertain = self._convert(self.PERTAIN)
|
||||||
|
|
||||||
|
self.dayfirst = dayfirst
|
||||||
|
self.yearfirst = yearfirst
|
||||||
|
|
||||||
|
self._year = time.localtime().tm_year
|
||||||
|
self._century = self._year//100*100
|
||||||
|
|
||||||
|
def _convert(self, lst):
|
||||||
|
dct = {}
|
||||||
|
for i in range(len(lst)):
|
||||||
|
v = lst[i]
|
||||||
|
if isinstance(v, tuple):
|
||||||
|
for v in v:
|
||||||
|
dct[v.lower()] = i
|
||||||
|
else:
|
||||||
|
dct[v.lower()] = i
|
||||||
|
return dct
|
||||||
|
|
||||||
|
def jump(self, name):
|
||||||
|
return name.lower() in self._jump
|
||||||
|
|
||||||
|
def weekday(self, name):
|
||||||
|
if len(name) >= 3:
|
||||||
|
try:
|
||||||
|
return self._weekdays[name.lower()]
|
||||||
|
except KeyError:
|
||||||
|
pass
|
||||||
|
return None
|
||||||
|
|
||||||
|
def month(self, name):
|
||||||
|
if len(name) >= 3:
|
||||||
|
try:
|
||||||
|
return self._months[name.lower()]+1
|
||||||
|
except KeyError:
|
||||||
|
pass
|
||||||
|
return None
|
||||||
|
|
||||||
|
def hms(self, name):
|
||||||
|
try:
|
||||||
|
return self._hms[name.lower()]
|
||||||
|
except KeyError:
|
||||||
|
return None
|
||||||
|
|
||||||
|
def ampm(self, name):
|
||||||
|
try:
|
||||||
|
return self._ampm[name.lower()]
|
||||||
|
except KeyError:
|
||||||
|
return None
|
||||||
|
|
||||||
|
def pertain(self, name):
|
||||||
|
return name.lower() in self._pertain
|
||||||
|
|
||||||
|
def utczone(self, name):
|
||||||
|
return name.lower() in self._utczone
|
||||||
|
|
||||||
|
def tzoffset(self, name):
|
||||||
|
if name in self._utczone:
|
||||||
|
return 0
|
||||||
|
return self.TZOFFSET.get(name)
|
||||||
|
|
||||||
|
def convertyear(self, year):
|
||||||
|
if year < 100:
|
||||||
|
year += self._century
|
||||||
|
if abs(year-self._year) >= 50:
|
||||||
|
if year < self._year:
|
||||||
|
year += 100
|
||||||
|
else:
|
||||||
|
year -= 100
|
||||||
|
return year
|
||||||
|
|
||||||
|
def validate(self, res):
|
||||||
|
# move to info
|
||||||
|
if res.year is not None:
|
||||||
|
res.year = self.convertyear(res.year)
|
||||||
|
if res.tzoffset == 0 and not res.tzname or res.tzname == 'Z':
|
||||||
|
res.tzname = "UTC"
|
||||||
|
res.tzoffset = 0
|
||||||
|
elif res.tzoffset != 0 and res.tzname and self.utczone(res.tzname):
|
||||||
|
res.tzoffset = 0
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
class parser(object):
|
||||||
|
|
||||||
|
def __init__(self, info=None):
|
||||||
|
self.info = info or parserinfo()
|
||||||
|
|
||||||
|
def parse(self, timestr, default=None,
|
||||||
|
ignoretz=False, tzinfos=None,
|
||||||
|
**kwargs):
|
||||||
|
if not default:
|
||||||
|
default = datetime.datetime.now().replace(hour=0, minute=0,
|
||||||
|
second=0, microsecond=0)
|
||||||
|
res = self._parse(timestr, **kwargs)
|
||||||
|
if res is None:
|
||||||
|
raise ValueError, "unknown string format"
|
||||||
|
repl = {}
|
||||||
|
for attr in ["year", "month", "day", "hour",
|
||||||
|
"minute", "second", "microsecond"]:
|
||||||
|
value = getattr(res, attr)
|
||||||
|
if value is not None:
|
||||||
|
repl[attr] = value
|
||||||
|
ret = default.replace(**repl)
|
||||||
|
if res.weekday is not None and not res.day:
|
||||||
|
ret = ret+relativedelta.relativedelta(weekday=res.weekday)
|
||||||
|
if not ignoretz:
|
||||||
|
if callable(tzinfos) or tzinfos and res.tzname in tzinfos:
|
||||||
|
if callable(tzinfos):
|
||||||
|
tzdata = tzinfos(res.tzname, res.tzoffset)
|
||||||
|
else:
|
||||||
|
tzdata = tzinfos.get(res.tzname)
|
||||||
|
if isinstance(tzdata, datetime.tzinfo):
|
||||||
|
tzinfo = tzdata
|
||||||
|
elif isinstance(tzdata, basestring):
|
||||||
|
tzinfo = tz.tzstr(tzdata)
|
||||||
|
elif isinstance(tzdata, int):
|
||||||
|
tzinfo = tz.tzoffset(res.tzname, tzdata)
|
||||||
|
else:
|
||||||
|
raise ValueError, "offset must be tzinfo subclass, " \
|
||||||
|
"tz string, or int offset"
|
||||||
|
ret = ret.replace(tzinfo=tzinfo)
|
||||||
|
elif res.tzname and res.tzname in time.tzname:
|
||||||
|
ret = ret.replace(tzinfo=tz.tzlocal())
|
||||||
|
elif res.tzoffset == 0:
|
||||||
|
ret = ret.replace(tzinfo=tz.tzutc())
|
||||||
|
elif res.tzoffset:
|
||||||
|
ret = ret.replace(tzinfo=tz.tzoffset(res.tzname, res.tzoffset))
|
||||||
|
return ret
|
||||||
|
|
||||||
|
class _result(_resultbase):
|
||||||
|
__slots__ = ["year", "month", "day", "weekday",
|
||||||
|
"hour", "minute", "second", "microsecond",
|
||||||
|
"tzname", "tzoffset"]
|
||||||
|
|
||||||
|
def _parse(self, timestr, dayfirst=None, yearfirst=None, fuzzy=False):
|
||||||
|
info = self.info
|
||||||
|
if dayfirst is None:
|
||||||
|
dayfirst = info.dayfirst
|
||||||
|
if yearfirst is None:
|
||||||
|
yearfirst = info.yearfirst
|
||||||
|
res = self._result()
|
||||||
|
l = _timelex.split(timestr)
|
||||||
|
try:
|
||||||
|
|
||||||
|
# year/month/day list
|
||||||
|
ymd = []
|
||||||
|
|
||||||
|
# Index of the month string in ymd
|
||||||
|
mstridx = -1
|
||||||
|
|
||||||
|
len_l = len(l)
|
||||||
|
i = 0
|
||||||
|
while i < len_l:
|
||||||
|
|
||||||
|
# Check if it's a number
|
||||||
|
try:
|
||||||
|
value_repr = l[i]
|
||||||
|
value = float(value_repr)
|
||||||
|
except ValueError:
|
||||||
|
value = None
|
||||||
|
|
||||||
|
if value is not None:
|
||||||
|
# Token is a number
|
||||||
|
len_li = len(l[i])
|
||||||
|
i += 1
|
||||||
|
if (len(ymd) == 3 and len_li in (2, 4)
|
||||||
|
and (i >= len_l or (l[i] != ':' and
|
||||||
|
info.hms(l[i]) is None))):
|
||||||
|
# 19990101T23[59]
|
||||||
|
s = l[i-1]
|
||||||
|
res.hour = int(s[:2])
|
||||||
|
if len_li == 4:
|
||||||
|
res.minute = int(s[2:])
|
||||||
|
elif len_li == 6 or (len_li > 6 and l[i-1].find('.') == 6):
|
||||||
|
# YYMMDD or HHMMSS[.ss]
|
||||||
|
s = l[i-1]
|
||||||
|
if not ymd and l[i-1].find('.') == -1:
|
||||||
|
ymd.append(info.convertyear(int(s[:2])))
|
||||||
|
ymd.append(int(s[2:4]))
|
||||||
|
ymd.append(int(s[4:]))
|
||||||
|
else:
|
||||||
|
# 19990101T235959[.59]
|
||||||
|
res.hour = int(s[:2])
|
||||||
|
res.minute = int(s[2:4])
|
||||||
|
res.second, res.microsecond = _parsems(s[4:])
|
||||||
|
elif len_li == 8:
|
||||||
|
# YYYYMMDD
|
||||||
|
s = l[i-1]
|
||||||
|
ymd.append(int(s[:4]))
|
||||||
|
ymd.append(int(s[4:6]))
|
||||||
|
ymd.append(int(s[6:]))
|
||||||
|
elif len_li in (12, 14):
|
||||||
|
# YYYYMMDDhhmm[ss]
|
||||||
|
s = l[i-1]
|
||||||
|
ymd.append(int(s[:4]))
|
||||||
|
ymd.append(int(s[4:6]))
|
||||||
|
ymd.append(int(s[6:8]))
|
||||||
|
res.hour = int(s[8:10])
|
||||||
|
res.minute = int(s[10:12])
|
||||||
|
if len_li == 14:
|
||||||
|
res.second = int(s[12:])
|
||||||
|
elif ((i < len_l and info.hms(l[i]) is not None) or
|
||||||
|
(i+1 < len_l and l[i] == ' ' and
|
||||||
|
info.hms(l[i+1]) is not None)):
|
||||||
|
# HH[ ]h or MM[ ]m or SS[.ss][ ]s
|
||||||
|
if l[i] == ' ':
|
||||||
|
i += 1
|
||||||
|
idx = info.hms(l[i])
|
||||||
|
while True:
|
||||||
|
if idx == 0:
|
||||||
|
res.hour = int(value)
|
||||||
|
if value%1:
|
||||||
|
res.minute = int(60*(value%1))
|
||||||
|
elif idx == 1:
|
||||||
|
res.minute = int(value)
|
||||||
|
if value%1:
|
||||||
|
res.second = int(60*(value%1))
|
||||||
|
elif idx == 2:
|
||||||
|
res.second, res.microsecond = \
|
||||||
|
_parsems(value_repr)
|
||||||
|
i += 1
|
||||||
|
if i >= len_l or idx == 2:
|
||||||
|
break
|
||||||
|
# 12h00
|
||||||
|
try:
|
||||||
|
value_repr = l[i]
|
||||||
|
value = float(value_repr)
|
||||||
|
except ValueError:
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
i += 1
|
||||||
|
idx += 1
|
||||||
|
if i < len_l:
|
||||||
|
newidx = info.hms(l[i])
|
||||||
|
if newidx is not None:
|
||||||
|
idx = newidx
|
||||||
|
elif i+1 < len_l and l[i] == ':':
|
||||||
|
# HH:MM[:SS[.ss]]
|
||||||
|
res.hour = int(value)
|
||||||
|
i += 1
|
||||||
|
value = float(l[i])
|
||||||
|
res.minute = int(value)
|
||||||
|
if value%1:
|
||||||
|
res.second = int(60*(value%1))
|
||||||
|
i += 1
|
||||||
|
if i < len_l and l[i] == ':':
|
||||||
|
res.second, res.microsecond = _parsems(l[i+1])
|
||||||
|
i += 2
|
||||||
|
elif i < len_l and l[i] in ('-', '/', '.'):
|
||||||
|
sep = l[i]
|
||||||
|
ymd.append(int(value))
|
||||||
|
i += 1
|
||||||
|
if i < len_l and not info.jump(l[i]):
|
||||||
|
try:
|
||||||
|
# 01-01[-01]
|
||||||
|
ymd.append(int(l[i]))
|
||||||
|
except ValueError:
|
||||||
|
# 01-Jan[-01]
|
||||||
|
value = info.month(l[i])
|
||||||
|
if value is not None:
|
||||||
|
ymd.append(value)
|
||||||
|
assert mstridx == -1
|
||||||
|
mstridx = len(ymd)-1
|
||||||
|
else:
|
||||||
|
return None
|
||||||
|
i += 1
|
||||||
|
if i < len_l and l[i] == sep:
|
||||||
|
# We have three members
|
||||||
|
i += 1
|
||||||
|
value = info.month(l[i])
|
||||||
|
if value is not None:
|
||||||
|
ymd.append(value)
|
||||||
|
mstridx = len(ymd)-1
|
||||||
|
assert mstridx == -1
|
||||||
|
else:
|
||||||
|
ymd.append(int(l[i]))
|
||||||
|
i += 1
|
||||||
|
elif i >= len_l or info.jump(l[i]):
|
||||||
|
if i+1 < len_l and info.ampm(l[i+1]) is not None:
|
||||||
|
# 12 am
|
||||||
|
res.hour = int(value)
|
||||||
|
if res.hour < 12 and info.ampm(l[i+1]) == 1:
|
||||||
|
res.hour += 12
|
||||||
|
elif res.hour == 12 and info.ampm(l[i+1]) == 0:
|
||||||
|
res.hour = 0
|
||||||
|
i += 1
|
||||||
|
else:
|
||||||
|
# Year, month or day
|
||||||
|
ymd.append(int(value))
|
||||||
|
i += 1
|
||||||
|
elif info.ampm(l[i]) is not None:
|
||||||
|
# 12am
|
||||||
|
res.hour = int(value)
|
||||||
|
if res.hour < 12 and info.ampm(l[i]) == 1:
|
||||||
|
res.hour += 12
|
||||||
|
elif res.hour == 12 and info.ampm(l[i]) == 0:
|
||||||
|
res.hour = 0
|
||||||
|
i += 1
|
||||||
|
elif not fuzzy:
|
||||||
|
return None
|
||||||
|
else:
|
||||||
|
i += 1
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Check weekday
|
||||||
|
value = info.weekday(l[i])
|
||||||
|
if value is not None:
|
||||||
|
res.weekday = value
|
||||||
|
i += 1
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Check month name
|
||||||
|
value = info.month(l[i])
|
||||||
|
if value is not None:
|
||||||
|
ymd.append(value)
|
||||||
|
assert mstridx == -1
|
||||||
|
mstridx = len(ymd)-1
|
||||||
|
i += 1
|
||||||
|
if i < len_l:
|
||||||
|
if l[i] in ('-', '/'):
|
||||||
|
# Jan-01[-99]
|
||||||
|
sep = l[i]
|
||||||
|
i += 1
|
||||||
|
ymd.append(int(l[i]))
|
||||||
|
i += 1
|
||||||
|
if i < len_l and l[i] == sep:
|
||||||
|
# Jan-01-99
|
||||||
|
i += 1
|
||||||
|
ymd.append(int(l[i]))
|
||||||
|
i += 1
|
||||||
|
elif (i+3 < len_l and l[i] == l[i+2] == ' '
|
||||||
|
and info.pertain(l[i+1])):
|
||||||
|
# Jan of 01
|
||||||
|
# In this case, 01 is clearly year
|
||||||
|
try:
|
||||||
|
value = int(l[i+3])
|
||||||
|
except ValueError:
|
||||||
|
# Wrong guess
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
# Convert it here to become unambiguous
|
||||||
|
ymd.append(info.convertyear(value))
|
||||||
|
i += 4
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Check am/pm
|
||||||
|
value = info.ampm(l[i])
|
||||||
|
if value is not None:
|
||||||
|
if value == 1 and res.hour < 12:
|
||||||
|
res.hour += 12
|
||||||
|
elif value == 0 and res.hour == 12:
|
||||||
|
res.hour = 0
|
||||||
|
i += 1
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Check for a timezone name
|
||||||
|
if (res.hour is not None and len(l[i]) <= 5 and
|
||||||
|
res.tzname is None and res.tzoffset is None and
|
||||||
|
not [x for x in l[i] if x not in string.ascii_uppercase]):
|
||||||
|
res.tzname = l[i]
|
||||||
|
res.tzoffset = info.tzoffset(res.tzname)
|
||||||
|
i += 1
|
||||||
|
|
||||||
|
# Check for something like GMT+3, or BRST+3. Notice
|
||||||
|
# that it doesn't mean "I am 3 hours after GMT", but
|
||||||
|
# "my time +3 is GMT". If found, we reverse the
|
||||||
|
# logic so that timezone parsing code will get it
|
||||||
|
# right.
|
||||||
|
if i < len_l and l[i] in ('+', '-'):
|
||||||
|
l[i] = ('+', '-')[l[i] == '+']
|
||||||
|
res.tzoffset = None
|
||||||
|
if info.utczone(res.tzname):
|
||||||
|
# With something like GMT+3, the timezone
|
||||||
|
# is *not* GMT.
|
||||||
|
res.tzname = None
|
||||||
|
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Check for a numbered timezone
|
||||||
|
if res.hour is not None and l[i] in ('+', '-'):
|
||||||
|
signal = (-1,1)[l[i] == '+']
|
||||||
|
i += 1
|
||||||
|
len_li = len(l[i])
|
||||||
|
if len_li == 4:
|
||||||
|
# -0300
|
||||||
|
res.tzoffset = int(l[i][:2])*3600+int(l[i][2:])*60
|
||||||
|
elif i+1 < len_l and l[i+1] == ':':
|
||||||
|
# -03:00
|
||||||
|
res.tzoffset = int(l[i])*3600+int(l[i+2])*60
|
||||||
|
i += 2
|
||||||
|
elif len_li <= 2:
|
||||||
|
# -[0]3
|
||||||
|
res.tzoffset = int(l[i][:2])*3600
|
||||||
|
else:
|
||||||
|
return None
|
||||||
|
i += 1
|
||||||
|
res.tzoffset *= signal
|
||||||
|
|
||||||
|
# Look for a timezone name between parenthesis
|
||||||
|
if (i+3 < len_l and
|
||||||
|
info.jump(l[i]) and l[i+1] == '(' and l[i+3] == ')' and
|
||||||
|
3 <= len(l[i+2]) <= 5 and
|
||||||
|
not [x for x in l[i+2]
|
||||||
|
if x not in string.ascii_uppercase]):
|
||||||
|
# -0300 (BRST)
|
||||||
|
res.tzname = l[i+2]
|
||||||
|
i += 4
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Check jumps
|
||||||
|
if not (info.jump(l[i]) or fuzzy):
|
||||||
|
return None
|
||||||
|
|
||||||
|
i += 1
|
||||||
|
|
||||||
|
# Process year/month/day
|
||||||
|
len_ymd = len(ymd)
|
||||||
|
if len_ymd > 3:
|
||||||
|
# More than three members!?
|
||||||
|
return None
|
||||||
|
elif len_ymd == 1 or (mstridx != -1 and len_ymd == 2):
|
||||||
|
# One member, or two members with a month string
|
||||||
|
if mstridx != -1:
|
||||||
|
res.month = ymd[mstridx]
|
||||||
|
del ymd[mstridx]
|
||||||
|
if len_ymd > 1 or mstridx == -1:
|
||||||
|
if ymd[0] > 31:
|
||||||
|
res.year = ymd[0]
|
||||||
|
else:
|
||||||
|
res.day = ymd[0]
|
||||||
|
elif len_ymd == 2:
|
||||||
|
# Two members with numbers
|
||||||
|
if ymd[0] > 31:
|
||||||
|
# 99-01
|
||||||
|
res.year, res.month = ymd
|
||||||
|
elif ymd[1] > 31:
|
||||||
|
# 01-99
|
||||||
|
res.month, res.year = ymd
|
||||||
|
elif dayfirst and ymd[1] <= 12:
|
||||||
|
# 13-01
|
||||||
|
res.day, res.month = ymd
|
||||||
|
else:
|
||||||
|
# 01-13
|
||||||
|
res.month, res.day = ymd
|
||||||
|
if len_ymd == 3:
|
||||||
|
# Three members
|
||||||
|
if mstridx == 0:
|
||||||
|
res.month, res.day, res.year = ymd
|
||||||
|
elif mstridx == 1:
|
||||||
|
if ymd[0] > 31 or (yearfirst and ymd[2] <= 31):
|
||||||
|
# 99-Jan-01
|
||||||
|
res.year, res.month, res.day = ymd
|
||||||
|
else:
|
||||||
|
# 01-Jan-01
|
||||||
|
# Give precendence to day-first, since
|
||||||
|
# two-digit years is usually hand-written.
|
||||||
|
res.day, res.month, res.year = ymd
|
||||||
|
elif mstridx == 2:
|
||||||
|
# WTF!?
|
||||||
|
if ymd[1] > 31:
|
||||||
|
# 01-99-Jan
|
||||||
|
res.day, res.year, res.month = ymd
|
||||||
|
else:
|
||||||
|
# 99-01-Jan
|
||||||
|
res.year, res.day, res.month = ymd
|
||||||
|
else:
|
||||||
|
if ymd[0] > 31 or \
|
||||||
|
(yearfirst and ymd[1] <= 12 and ymd[2] <= 31):
|
||||||
|
# 99-01-01
|
||||||
|
res.year, res.month, res.day = ymd
|
||||||
|
elif ymd[0] > 12 or (dayfirst and ymd[1] <= 12):
|
||||||
|
# 13-01-01
|
||||||
|
res.day, res.month, res.year = ymd
|
||||||
|
else:
|
||||||
|
# 01-13-01
|
||||||
|
res.month, res.day, res.year = ymd
|
||||||
|
|
||||||
|
except (IndexError, ValueError, AssertionError):
|
||||||
|
return None
|
||||||
|
|
||||||
|
if not info.validate(res):
|
||||||
|
return None
|
||||||
|
return res
|
||||||
|
|
||||||
|
DEFAULTPARSER = parser()
|
||||||
|
def parse(timestr, parserinfo=None, **kwargs):
|
||||||
|
if parserinfo:
|
||||||
|
return parser(parserinfo).parse(timestr, **kwargs)
|
||||||
|
else:
|
||||||
|
return DEFAULTPARSER.parse(timestr, **kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
class _tzparser(object):
|
||||||
|
|
||||||
|
class _result(_resultbase):
|
||||||
|
|
||||||
|
__slots__ = ["stdabbr", "stdoffset", "dstabbr", "dstoffset",
|
||||||
|
"start", "end"]
|
||||||
|
|
||||||
|
class _attr(_resultbase):
|
||||||
|
__slots__ = ["month", "week", "weekday",
|
||||||
|
"yday", "jyday", "day", "time"]
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return self._repr("")
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
_resultbase.__init__(self)
|
||||||
|
self.start = self._attr()
|
||||||
|
self.end = self._attr()
|
||||||
|
|
||||||
|
def parse(self, tzstr):
|
||||||
|
res = self._result()
|
||||||
|
l = _timelex.split(tzstr)
|
||||||
|
try:
|
||||||
|
|
||||||
|
len_l = len(l)
|
||||||
|
|
||||||
|
i = 0
|
||||||
|
while i < len_l:
|
||||||
|
# BRST+3[BRDT[+2]]
|
||||||
|
j = i
|
||||||
|
while j < len_l and not [x for x in l[j]
|
||||||
|
if x in "0123456789:,-+"]:
|
||||||
|
j += 1
|
||||||
|
if j != i:
|
||||||
|
if not res.stdabbr:
|
||||||
|
offattr = "stdoffset"
|
||||||
|
res.stdabbr = "".join(l[i:j])
|
||||||
|
else:
|
||||||
|
offattr = "dstoffset"
|
||||||
|
res.dstabbr = "".join(l[i:j])
|
||||||
|
i = j
|
||||||
|
if (i < len_l and
|
||||||
|
(l[i] in ('+', '-') or l[i][0] in "0123456789")):
|
||||||
|
if l[i] in ('+', '-'):
|
||||||
|
# Yes, that's right. See the TZ variable
|
||||||
|
# documentation.
|
||||||
|
signal = (1,-1)[l[i] == '+']
|
||||||
|
i += 1
|
||||||
|
else:
|
||||||
|
signal = -1
|
||||||
|
len_li = len(l[i])
|
||||||
|
if len_li == 4:
|
||||||
|
# -0300
|
||||||
|
setattr(res, offattr,
|
||||||
|
(int(l[i][:2])*3600+int(l[i][2:])*60)*signal)
|
||||||
|
elif i+1 < len_l and l[i+1] == ':':
|
||||||
|
# -03:00
|
||||||
|
setattr(res, offattr,
|
||||||
|
(int(l[i])*3600+int(l[i+2])*60)*signal)
|
||||||
|
i += 2
|
||||||
|
elif len_li <= 2:
|
||||||
|
# -[0]3
|
||||||
|
setattr(res, offattr,
|
||||||
|
int(l[i][:2])*3600*signal)
|
||||||
|
else:
|
||||||
|
return None
|
||||||
|
i += 1
|
||||||
|
if res.dstabbr:
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
break
|
||||||
|
|
||||||
|
if i < len_l:
|
||||||
|
for j in range(i, len_l):
|
||||||
|
if l[j] == ';': l[j] = ','
|
||||||
|
|
||||||
|
assert l[i] == ','
|
||||||
|
|
||||||
|
i += 1
|
||||||
|
|
||||||
|
if i >= len_l:
|
||||||
|
pass
|
||||||
|
elif (8 <= l.count(',') <= 9 and
|
||||||
|
not [y for x in l[i:] if x != ','
|
||||||
|
for y in x if y not in "0123456789"]):
|
||||||
|
# GMT0BST,3,0,30,3600,10,0,26,7200[,3600]
|
||||||
|
for x in (res.start, res.end):
|
||||||
|
x.month = int(l[i])
|
||||||
|
i += 2
|
||||||
|
if l[i] == '-':
|
||||||
|
value = int(l[i+1])*-1
|
||||||
|
i += 1
|
||||||
|
else:
|
||||||
|
value = int(l[i])
|
||||||
|
i += 2
|
||||||
|
if value:
|
||||||
|
x.week = value
|
||||||
|
x.weekday = (int(l[i])-1)%7
|
||||||
|
else:
|
||||||
|
x.day = int(l[i])
|
||||||
|
i += 2
|
||||||
|
x.time = int(l[i])
|
||||||
|
i += 2
|
||||||
|
if i < len_l:
|
||||||
|
if l[i] in ('-','+'):
|
||||||
|
signal = (-1,1)[l[i] == "+"]
|
||||||
|
i += 1
|
||||||
|
else:
|
||||||
|
signal = 1
|
||||||
|
res.dstoffset = (res.stdoffset+int(l[i]))*signal
|
||||||
|
elif (l.count(',') == 2 and l[i:].count('/') <= 2 and
|
||||||
|
not [y for x in l[i:] if x not in (',','/','J','M',
|
||||||
|
'.','-',':')
|
||||||
|
for y in x if y not in "0123456789"]):
|
||||||
|
for x in (res.start, res.end):
|
||||||
|
if l[i] == 'J':
|
||||||
|
# non-leap year day (1 based)
|
||||||
|
i += 1
|
||||||
|
x.jyday = int(l[i])
|
||||||
|
elif l[i] == 'M':
|
||||||
|
# month[-.]week[-.]weekday
|
||||||
|
i += 1
|
||||||
|
x.month = int(l[i])
|
||||||
|
i += 1
|
||||||
|
assert l[i] in ('-', '.')
|
||||||
|
i += 1
|
||||||
|
x.week = int(l[i])
|
||||||
|
if x.week == 5:
|
||||||
|
x.week = -1
|
||||||
|
i += 1
|
||||||
|
assert l[i] in ('-', '.')
|
||||||
|
i += 1
|
||||||
|
x.weekday = (int(l[i])-1)%7
|
||||||
|
else:
|
||||||
|
# year day (zero based)
|
||||||
|
x.yday = int(l[i])+1
|
||||||
|
|
||||||
|
i += 1
|
||||||
|
|
||||||
|
if i < len_l and l[i] == '/':
|
||||||
|
i += 1
|
||||||
|
# start time
|
||||||
|
len_li = len(l[i])
|
||||||
|
if len_li == 4:
|
||||||
|
# -0300
|
||||||
|
x.time = (int(l[i][:2])*3600+int(l[i][2:])*60)
|
||||||
|
elif i+1 < len_l and l[i+1] == ':':
|
||||||
|
# -03:00
|
||||||
|
x.time = int(l[i])*3600+int(l[i+2])*60
|
||||||
|
i += 2
|
||||||
|
if i+1 < len_l and l[i+1] == ':':
|
||||||
|
i += 2
|
||||||
|
x.time += int(l[i])
|
||||||
|
elif len_li <= 2:
|
||||||
|
# -[0]3
|
||||||
|
x.time = (int(l[i][:2])*3600)
|
||||||
|
else:
|
||||||
|
return None
|
||||||
|
i += 1
|
||||||
|
|
||||||
|
assert i == len_l or l[i] == ','
|
||||||
|
|
||||||
|
i += 1
|
||||||
|
|
||||||
|
assert i >= len_l
|
||||||
|
|
||||||
|
except (IndexError, ValueError, AssertionError):
|
||||||
|
return None
|
||||||
|
|
||||||
|
return res
|
||||||
|
|
||||||
|
|
||||||
|
DEFAULTTZPARSER = _tzparser()
|
||||||
|
def _parsetz(tzstr):
|
||||||
|
return DEFAULTTZPARSER.parse(tzstr)
|
||||||
|
|
||||||
|
|
||||||
|
def _parsems(value):
|
||||||
|
"""Parse a I[.F] seconds value into (seconds, microseconds)."""
|
||||||
|
if "." not in value:
|
||||||
|
return int(value), 0
|
||||||
|
else:
|
||||||
|
i, f = value.split(".")
|
||||||
|
return int(i), int(f.ljust(6, "0")[:6])
|
||||||
|
|
||||||
|
|
||||||
|
# vim:ts=4:sw=4:et
|
||||||
432
lib/dateutil/relativedelta.py
Normal file
432
lib/dateutil/relativedelta.py
Normal file
@@ -0,0 +1,432 @@
|
|||||||
|
"""
|
||||||
|
Copyright (c) 2003-2010 Gustavo Niemeyer <gustavo@niemeyer.net>
|
||||||
|
|
||||||
|
This module offers extensions to the standard python 2.3+
|
||||||
|
datetime module.
|
||||||
|
"""
|
||||||
|
__author__ = "Gustavo Niemeyer <gustavo@niemeyer.net>"
|
||||||
|
__license__ = "PSF License"
|
||||||
|
|
||||||
|
import datetime
|
||||||
|
import calendar
|
||||||
|
|
||||||
|
__all__ = ["relativedelta", "MO", "TU", "WE", "TH", "FR", "SA", "SU"]
|
||||||
|
|
||||||
|
class weekday(object):
|
||||||
|
__slots__ = ["weekday", "n"]
|
||||||
|
|
||||||
|
def __init__(self, weekday, n=None):
|
||||||
|
self.weekday = weekday
|
||||||
|
self.n = n
|
||||||
|
|
||||||
|
def __call__(self, n):
|
||||||
|
if n == self.n:
|
||||||
|
return self
|
||||||
|
else:
|
||||||
|
return self.__class__(self.weekday, n)
|
||||||
|
|
||||||
|
def __eq__(self, other):
|
||||||
|
try:
|
||||||
|
if self.weekday != other.weekday or self.n != other.n:
|
||||||
|
return False
|
||||||
|
except AttributeError:
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
s = ("MO", "TU", "WE", "TH", "FR", "SA", "SU")[self.weekday]
|
||||||
|
if not self.n:
|
||||||
|
return s
|
||||||
|
else:
|
||||||
|
return "%s(%+d)" % (s, self.n)
|
||||||
|
|
||||||
|
MO, TU, WE, TH, FR, SA, SU = weekdays = tuple([weekday(x) for x in range(7)])
|
||||||
|
|
||||||
|
class relativedelta:
|
||||||
|
"""
|
||||||
|
The relativedelta type is based on the specification of the excelent
|
||||||
|
work done by M.-A. Lemburg in his mx.DateTime extension. However,
|
||||||
|
notice that this type does *NOT* implement the same algorithm as
|
||||||
|
his work. Do *NOT* expect it to behave like mx.DateTime's counterpart.
|
||||||
|
|
||||||
|
There's two different ways to build a relativedelta instance. The
|
||||||
|
first one is passing it two date/datetime classes:
|
||||||
|
|
||||||
|
relativedelta(datetime1, datetime2)
|
||||||
|
|
||||||
|
And the other way is to use the following keyword arguments:
|
||||||
|
|
||||||
|
year, month, day, hour, minute, second, microsecond:
|
||||||
|
Absolute information.
|
||||||
|
|
||||||
|
years, months, weeks, days, hours, minutes, seconds, microseconds:
|
||||||
|
Relative information, may be negative.
|
||||||
|
|
||||||
|
weekday:
|
||||||
|
One of the weekday instances (MO, TU, etc). These instances may
|
||||||
|
receive a parameter N, specifying the Nth weekday, which could
|
||||||
|
be positive or negative (like MO(+1) or MO(-2). Not specifying
|
||||||
|
it is the same as specifying +1. You can also use an integer,
|
||||||
|
where 0=MO.
|
||||||
|
|
||||||
|
leapdays:
|
||||||
|
Will add given days to the date found, if year is a leap
|
||||||
|
year, and the date found is post 28 of february.
|
||||||
|
|
||||||
|
yearday, nlyearday:
|
||||||
|
Set the yearday or the non-leap year day (jump leap days).
|
||||||
|
These are converted to day/month/leapdays information.
|
||||||
|
|
||||||
|
Here is the behavior of operations with relativedelta:
|
||||||
|
|
||||||
|
1) Calculate the absolute year, using the 'year' argument, or the
|
||||||
|
original datetime year, if the argument is not present.
|
||||||
|
|
||||||
|
2) Add the relative 'years' argument to the absolute year.
|
||||||
|
|
||||||
|
3) Do steps 1 and 2 for month/months.
|
||||||
|
|
||||||
|
4) Calculate the absolute day, using the 'day' argument, or the
|
||||||
|
original datetime day, if the argument is not present. Then,
|
||||||
|
subtract from the day until it fits in the year and month
|
||||||
|
found after their operations.
|
||||||
|
|
||||||
|
5) Add the relative 'days' argument to the absolute day. Notice
|
||||||
|
that the 'weeks' argument is multiplied by 7 and added to
|
||||||
|
'days'.
|
||||||
|
|
||||||
|
6) Do steps 1 and 2 for hour/hours, minute/minutes, second/seconds,
|
||||||
|
microsecond/microseconds.
|
||||||
|
|
||||||
|
7) If the 'weekday' argument is present, calculate the weekday,
|
||||||
|
with the given (wday, nth) tuple. wday is the index of the
|
||||||
|
weekday (0-6, 0=Mon), and nth is the number of weeks to add
|
||||||
|
forward or backward, depending on its signal. Notice that if
|
||||||
|
the calculated date is already Monday, for example, using
|
||||||
|
(0, 1) or (0, -1) won't change the day.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, dt1=None, dt2=None,
|
||||||
|
years=0, months=0, days=0, leapdays=0, weeks=0,
|
||||||
|
hours=0, minutes=0, seconds=0, microseconds=0,
|
||||||
|
year=None, month=None, day=None, weekday=None,
|
||||||
|
yearday=None, nlyearday=None,
|
||||||
|
hour=None, minute=None, second=None, microsecond=None):
|
||||||
|
if dt1 and dt2:
|
||||||
|
if not isinstance(dt1, datetime.date) or \
|
||||||
|
not isinstance(dt2, datetime.date):
|
||||||
|
raise TypeError, "relativedelta only diffs datetime/date"
|
||||||
|
if type(dt1) is not type(dt2):
|
||||||
|
if not isinstance(dt1, datetime.datetime):
|
||||||
|
dt1 = datetime.datetime.fromordinal(dt1.toordinal())
|
||||||
|
elif not isinstance(dt2, datetime.datetime):
|
||||||
|
dt2 = datetime.datetime.fromordinal(dt2.toordinal())
|
||||||
|
self.years = 0
|
||||||
|
self.months = 0
|
||||||
|
self.days = 0
|
||||||
|
self.leapdays = 0
|
||||||
|
self.hours = 0
|
||||||
|
self.minutes = 0
|
||||||
|
self.seconds = 0
|
||||||
|
self.microseconds = 0
|
||||||
|
self.year = None
|
||||||
|
self.month = None
|
||||||
|
self.day = None
|
||||||
|
self.weekday = None
|
||||||
|
self.hour = None
|
||||||
|
self.minute = None
|
||||||
|
self.second = None
|
||||||
|
self.microsecond = None
|
||||||
|
self._has_time = 0
|
||||||
|
|
||||||
|
months = (dt1.year*12+dt1.month)-(dt2.year*12+dt2.month)
|
||||||
|
self._set_months(months)
|
||||||
|
dtm = self.__radd__(dt2)
|
||||||
|
if dt1 < dt2:
|
||||||
|
while dt1 > dtm:
|
||||||
|
months += 1
|
||||||
|
self._set_months(months)
|
||||||
|
dtm = self.__radd__(dt2)
|
||||||
|
else:
|
||||||
|
while dt1 < dtm:
|
||||||
|
months -= 1
|
||||||
|
self._set_months(months)
|
||||||
|
dtm = self.__radd__(dt2)
|
||||||
|
delta = dt1 - dtm
|
||||||
|
self.seconds = delta.seconds+delta.days*86400
|
||||||
|
self.microseconds = delta.microseconds
|
||||||
|
else:
|
||||||
|
self.years = years
|
||||||
|
self.months = months
|
||||||
|
self.days = days+weeks*7
|
||||||
|
self.leapdays = leapdays
|
||||||
|
self.hours = hours
|
||||||
|
self.minutes = minutes
|
||||||
|
self.seconds = seconds
|
||||||
|
self.microseconds = microseconds
|
||||||
|
self.year = year
|
||||||
|
self.month = month
|
||||||
|
self.day = day
|
||||||
|
self.hour = hour
|
||||||
|
self.minute = minute
|
||||||
|
self.second = second
|
||||||
|
self.microsecond = microsecond
|
||||||
|
|
||||||
|
if type(weekday) is int:
|
||||||
|
self.weekday = weekdays[weekday]
|
||||||
|
else:
|
||||||
|
self.weekday = weekday
|
||||||
|
|
||||||
|
yday = 0
|
||||||
|
if nlyearday:
|
||||||
|
yday = nlyearday
|
||||||
|
elif yearday:
|
||||||
|
yday = yearday
|
||||||
|
if yearday > 59:
|
||||||
|
self.leapdays = -1
|
||||||
|
if yday:
|
||||||
|
ydayidx = [31,59,90,120,151,181,212,243,273,304,334,366]
|
||||||
|
for idx, ydays in enumerate(ydayidx):
|
||||||
|
if yday <= ydays:
|
||||||
|
self.month = idx+1
|
||||||
|
if idx == 0:
|
||||||
|
self.day = yday
|
||||||
|
else:
|
||||||
|
self.day = yday-ydayidx[idx-1]
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
raise ValueError, "invalid year day (%d)" % yday
|
||||||
|
|
||||||
|
self._fix()
|
||||||
|
|
||||||
|
def _fix(self):
|
||||||
|
if abs(self.microseconds) > 999999:
|
||||||
|
s = self.microseconds//abs(self.microseconds)
|
||||||
|
div, mod = divmod(self.microseconds*s, 1000000)
|
||||||
|
self.microseconds = mod*s
|
||||||
|
self.seconds += div*s
|
||||||
|
if abs(self.seconds) > 59:
|
||||||
|
s = self.seconds//abs(self.seconds)
|
||||||
|
div, mod = divmod(self.seconds*s, 60)
|
||||||
|
self.seconds = mod*s
|
||||||
|
self.minutes += div*s
|
||||||
|
if abs(self.minutes) > 59:
|
||||||
|
s = self.minutes//abs(self.minutes)
|
||||||
|
div, mod = divmod(self.minutes*s, 60)
|
||||||
|
self.minutes = mod*s
|
||||||
|
self.hours += div*s
|
||||||
|
if abs(self.hours) > 23:
|
||||||
|
s = self.hours//abs(self.hours)
|
||||||
|
div, mod = divmod(self.hours*s, 24)
|
||||||
|
self.hours = mod*s
|
||||||
|
self.days += div*s
|
||||||
|
if abs(self.months) > 11:
|
||||||
|
s = self.months//abs(self.months)
|
||||||
|
div, mod = divmod(self.months*s, 12)
|
||||||
|
self.months = mod*s
|
||||||
|
self.years += div*s
|
||||||
|
if (self.hours or self.minutes or self.seconds or self.microseconds or
|
||||||
|
self.hour is not None or self.minute is not None or
|
||||||
|
self.second is not None or self.microsecond is not None):
|
||||||
|
self._has_time = 1
|
||||||
|
else:
|
||||||
|
self._has_time = 0
|
||||||
|
|
||||||
|
def _set_months(self, months):
|
||||||
|
self.months = months
|
||||||
|
if abs(self.months) > 11:
|
||||||
|
s = self.months//abs(self.months)
|
||||||
|
div, mod = divmod(self.months*s, 12)
|
||||||
|
self.months = mod*s
|
||||||
|
self.years = div*s
|
||||||
|
else:
|
||||||
|
self.years = 0
|
||||||
|
|
||||||
|
def __radd__(self, other):
|
||||||
|
if not isinstance(other, datetime.date):
|
||||||
|
raise TypeError, "unsupported type for add operation"
|
||||||
|
elif self._has_time and not isinstance(other, datetime.datetime):
|
||||||
|
other = datetime.datetime.fromordinal(other.toordinal())
|
||||||
|
year = (self.year or other.year)+self.years
|
||||||
|
month = self.month or other.month
|
||||||
|
if self.months:
|
||||||
|
assert 1 <= abs(self.months) <= 12
|
||||||
|
month += self.months
|
||||||
|
if month > 12:
|
||||||
|
year += 1
|
||||||
|
month -= 12
|
||||||
|
elif month < 1:
|
||||||
|
year -= 1
|
||||||
|
month += 12
|
||||||
|
day = min(calendar.monthrange(year, month)[1],
|
||||||
|
self.day or other.day)
|
||||||
|
repl = {"year": year, "month": month, "day": day}
|
||||||
|
for attr in ["hour", "minute", "second", "microsecond"]:
|
||||||
|
value = getattr(self, attr)
|
||||||
|
if value is not None:
|
||||||
|
repl[attr] = value
|
||||||
|
days = self.days
|
||||||
|
if self.leapdays and month > 2 and calendar.isleap(year):
|
||||||
|
days += self.leapdays
|
||||||
|
ret = (other.replace(**repl)
|
||||||
|
+ datetime.timedelta(days=days,
|
||||||
|
hours=self.hours,
|
||||||
|
minutes=self.minutes,
|
||||||
|
seconds=self.seconds,
|
||||||
|
microseconds=self.microseconds))
|
||||||
|
if self.weekday:
|
||||||
|
weekday, nth = self.weekday.weekday, self.weekday.n or 1
|
||||||
|
jumpdays = (abs(nth)-1)*7
|
||||||
|
if nth > 0:
|
||||||
|
jumpdays += (7-ret.weekday()+weekday)%7
|
||||||
|
else:
|
||||||
|
jumpdays += (ret.weekday()-weekday)%7
|
||||||
|
jumpdays *= -1
|
||||||
|
ret += datetime.timedelta(days=jumpdays)
|
||||||
|
return ret
|
||||||
|
|
||||||
|
def __rsub__(self, other):
|
||||||
|
return self.__neg__().__radd__(other)
|
||||||
|
|
||||||
|
def __add__(self, other):
|
||||||
|
if not isinstance(other, relativedelta):
|
||||||
|
raise TypeError, "unsupported type for add operation"
|
||||||
|
return relativedelta(years=other.years+self.years,
|
||||||
|
months=other.months+self.months,
|
||||||
|
days=other.days+self.days,
|
||||||
|
hours=other.hours+self.hours,
|
||||||
|
minutes=other.minutes+self.minutes,
|
||||||
|
seconds=other.seconds+self.seconds,
|
||||||
|
microseconds=other.microseconds+self.microseconds,
|
||||||
|
leapdays=other.leapdays or self.leapdays,
|
||||||
|
year=other.year or self.year,
|
||||||
|
month=other.month or self.month,
|
||||||
|
day=other.day or self.day,
|
||||||
|
weekday=other.weekday or self.weekday,
|
||||||
|
hour=other.hour or self.hour,
|
||||||
|
minute=other.minute or self.minute,
|
||||||
|
second=other.second or self.second,
|
||||||
|
microsecond=other.second or self.microsecond)
|
||||||
|
|
||||||
|
def __sub__(self, other):
|
||||||
|
if not isinstance(other, relativedelta):
|
||||||
|
raise TypeError, "unsupported type for sub operation"
|
||||||
|
return relativedelta(years=other.years-self.years,
|
||||||
|
months=other.months-self.months,
|
||||||
|
days=other.days-self.days,
|
||||||
|
hours=other.hours-self.hours,
|
||||||
|
minutes=other.minutes-self.minutes,
|
||||||
|
seconds=other.seconds-self.seconds,
|
||||||
|
microseconds=other.microseconds-self.microseconds,
|
||||||
|
leapdays=other.leapdays or self.leapdays,
|
||||||
|
year=other.year or self.year,
|
||||||
|
month=other.month or self.month,
|
||||||
|
day=other.day or self.day,
|
||||||
|
weekday=other.weekday or self.weekday,
|
||||||
|
hour=other.hour or self.hour,
|
||||||
|
minute=other.minute or self.minute,
|
||||||
|
second=other.second or self.second,
|
||||||
|
microsecond=other.second or self.microsecond)
|
||||||
|
|
||||||
|
def __neg__(self):
|
||||||
|
return relativedelta(years=-self.years,
|
||||||
|
months=-self.months,
|
||||||
|
days=-self.days,
|
||||||
|
hours=-self.hours,
|
||||||
|
minutes=-self.minutes,
|
||||||
|
seconds=-self.seconds,
|
||||||
|
microseconds=-self.microseconds,
|
||||||
|
leapdays=self.leapdays,
|
||||||
|
year=self.year,
|
||||||
|
month=self.month,
|
||||||
|
day=self.day,
|
||||||
|
weekday=self.weekday,
|
||||||
|
hour=self.hour,
|
||||||
|
minute=self.minute,
|
||||||
|
second=self.second,
|
||||||
|
microsecond=self.microsecond)
|
||||||
|
|
||||||
|
def __nonzero__(self):
|
||||||
|
return not (not self.years and
|
||||||
|
not self.months and
|
||||||
|
not self.days and
|
||||||
|
not self.hours and
|
||||||
|
not self.minutes and
|
||||||
|
not self.seconds and
|
||||||
|
not self.microseconds and
|
||||||
|
not self.leapdays and
|
||||||
|
self.year is None and
|
||||||
|
self.month is None and
|
||||||
|
self.day is None and
|
||||||
|
self.weekday is None and
|
||||||
|
self.hour is None and
|
||||||
|
self.minute is None and
|
||||||
|
self.second is None and
|
||||||
|
self.microsecond is None)
|
||||||
|
|
||||||
|
def __mul__(self, other):
|
||||||
|
f = float(other)
|
||||||
|
return relativedelta(years = int(round(self.years*f)),
|
||||||
|
months = int(round(self.months*f)),
|
||||||
|
days = int(round(self.days*f)),
|
||||||
|
hours = int(round(self.hours*f)),
|
||||||
|
minutes = int(round(self.minutes*f)),
|
||||||
|
seconds = int(round(self.seconds*f)),
|
||||||
|
microseconds = self.microseconds*f,
|
||||||
|
leapdays = self.leapdays,
|
||||||
|
year = self.year,
|
||||||
|
month = self.month,
|
||||||
|
day = self.day,
|
||||||
|
weekday = self.weekday,
|
||||||
|
hour = self.hour,
|
||||||
|
minute = self.minute,
|
||||||
|
second = self.second,
|
||||||
|
microsecond = self.microsecond)
|
||||||
|
|
||||||
|
def __eq__(self, other):
|
||||||
|
if not isinstance(other, relativedelta):
|
||||||
|
return False
|
||||||
|
if self.weekday or other.weekday:
|
||||||
|
if not self.weekday or not other.weekday:
|
||||||
|
return False
|
||||||
|
if self.weekday.weekday != other.weekday.weekday:
|
||||||
|
return False
|
||||||
|
n1, n2 = self.weekday.n, other.weekday.n
|
||||||
|
if n1 != n2 and not ((not n1 or n1 == 1) and (not n2 or n2 == 1)):
|
||||||
|
return False
|
||||||
|
return (self.years == other.years and
|
||||||
|
self.months == other.months and
|
||||||
|
self.days == other.days and
|
||||||
|
self.hours == other.hours and
|
||||||
|
self.minutes == other.minutes and
|
||||||
|
self.seconds == other.seconds and
|
||||||
|
self.leapdays == other.leapdays and
|
||||||
|
self.year == other.year and
|
||||||
|
self.month == other.month and
|
||||||
|
self.day == other.day and
|
||||||
|
self.hour == other.hour and
|
||||||
|
self.minute == other.minute and
|
||||||
|
self.second == other.second and
|
||||||
|
self.microsecond == other.microsecond)
|
||||||
|
|
||||||
|
def __ne__(self, other):
|
||||||
|
return not self.__eq__(other)
|
||||||
|
|
||||||
|
def __div__(self, other):
|
||||||
|
return self.__mul__(1/float(other))
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
l = []
|
||||||
|
for attr in ["years", "months", "days", "leapdays",
|
||||||
|
"hours", "minutes", "seconds", "microseconds"]:
|
||||||
|
value = getattr(self, attr)
|
||||||
|
if value:
|
||||||
|
l.append("%s=%+d" % (attr, value))
|
||||||
|
for attr in ["year", "month", "day", "weekday",
|
||||||
|
"hour", "minute", "second", "microsecond"]:
|
||||||
|
value = getattr(self, attr)
|
||||||
|
if value is not None:
|
||||||
|
l.append("%s=%s" % (attr, `value`))
|
||||||
|
return "%s(%s)" % (self.__class__.__name__, ", ".join(l))
|
||||||
|
|
||||||
|
# vim:ts=4:sw=4:et
|
||||||
1108
lib/dateutil/rrule.py
Normal file
1108
lib/dateutil/rrule.py
Normal file
File diff suppressed because it is too large
Load Diff
958
lib/dateutil/tz.py
Normal file
958
lib/dateutil/tz.py
Normal file
@@ -0,0 +1,958 @@
|
|||||||
|
"""
|
||||||
|
Copyright (c) 2003-2007 Gustavo Niemeyer <gustavo@niemeyer.net>
|
||||||
|
|
||||||
|
This module offers extensions to the standard python 2.3+
|
||||||
|
datetime module.
|
||||||
|
"""
|
||||||
|
__author__ = "Gustavo Niemeyer <gustavo@niemeyer.net>"
|
||||||
|
__license__ = "PSF License"
|
||||||
|
|
||||||
|
import datetime
|
||||||
|
import struct
|
||||||
|
import time
|
||||||
|
import sys
|
||||||
|
import os
|
||||||
|
|
||||||
|
relativedelta = None
|
||||||
|
parser = None
|
||||||
|
rrule = None
|
||||||
|
|
||||||
|
__all__ = ["tzutc", "tzoffset", "tzlocal", "tzfile", "tzrange",
|
||||||
|
"tzstr", "tzical", "tzwin", "tzwinlocal", "gettz"]
|
||||||
|
|
||||||
|
try:
|
||||||
|
from dateutil.tzwin import tzwin, tzwinlocal
|
||||||
|
except (ImportError, OSError):
|
||||||
|
tzwin, tzwinlocal = None, None
|
||||||
|
|
||||||
|
ZERO = datetime.timedelta(0)
|
||||||
|
EPOCHORDINAL = datetime.datetime.utcfromtimestamp(0).toordinal()
|
||||||
|
|
||||||
|
class tzutc(datetime.tzinfo):
|
||||||
|
|
||||||
|
def utcoffset(self, dt):
|
||||||
|
return ZERO
|
||||||
|
|
||||||
|
def dst(self, dt):
|
||||||
|
return ZERO
|
||||||
|
|
||||||
|
def tzname(self, dt):
|
||||||
|
return "UTC"
|
||||||
|
|
||||||
|
def __eq__(self, other):
|
||||||
|
return (isinstance(other, tzutc) or
|
||||||
|
(isinstance(other, tzoffset) and other._offset == ZERO))
|
||||||
|
|
||||||
|
def __ne__(self, other):
|
||||||
|
return not self.__eq__(other)
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return "%s()" % self.__class__.__name__
|
||||||
|
|
||||||
|
__reduce__ = object.__reduce__
|
||||||
|
|
||||||
|
class tzoffset(datetime.tzinfo):
|
||||||
|
|
||||||
|
def __init__(self, name, offset):
|
||||||
|
self._name = name
|
||||||
|
self._offset = datetime.timedelta(seconds=offset)
|
||||||
|
|
||||||
|
def utcoffset(self, dt):
|
||||||
|
return self._offset
|
||||||
|
|
||||||
|
def dst(self, dt):
|
||||||
|
return ZERO
|
||||||
|
|
||||||
|
def tzname(self, dt):
|
||||||
|
return self._name
|
||||||
|
|
||||||
|
def __eq__(self, other):
|
||||||
|
return (isinstance(other, tzoffset) and
|
||||||
|
self._offset == other._offset)
|
||||||
|
|
||||||
|
def __ne__(self, other):
|
||||||
|
return not self.__eq__(other)
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return "%s(%s, %s)" % (self.__class__.__name__,
|
||||||
|
`self._name`,
|
||||||
|
self._offset.days*86400+self._offset.seconds)
|
||||||
|
|
||||||
|
__reduce__ = object.__reduce__
|
||||||
|
|
||||||
|
class tzlocal(datetime.tzinfo):
|
||||||
|
|
||||||
|
_std_offset = datetime.timedelta(seconds=-time.timezone)
|
||||||
|
if time.daylight:
|
||||||
|
_dst_offset = datetime.timedelta(seconds=-time.altzone)
|
||||||
|
else:
|
||||||
|
_dst_offset = _std_offset
|
||||||
|
|
||||||
|
def utcoffset(self, dt):
|
||||||
|
if self._isdst(dt):
|
||||||
|
return self._dst_offset
|
||||||
|
else:
|
||||||
|
return self._std_offset
|
||||||
|
|
||||||
|
def dst(self, dt):
|
||||||
|
if self._isdst(dt):
|
||||||
|
return self._dst_offset-self._std_offset
|
||||||
|
else:
|
||||||
|
return ZERO
|
||||||
|
|
||||||
|
def tzname(self, dt):
|
||||||
|
return time.tzname[self._isdst(dt)]
|
||||||
|
|
||||||
|
def _isdst(self, dt):
|
||||||
|
# We can't use mktime here. It is unstable when deciding if
|
||||||
|
# the hour near to a change is DST or not.
|
||||||
|
#
|
||||||
|
# timestamp = time.mktime((dt.year, dt.month, dt.day, dt.hour,
|
||||||
|
# dt.minute, dt.second, dt.weekday(), 0, -1))
|
||||||
|
# return time.localtime(timestamp).tm_isdst
|
||||||
|
#
|
||||||
|
# The code above yields the following result:
|
||||||
|
#
|
||||||
|
#>>> import tz, datetime
|
||||||
|
#>>> t = tz.tzlocal()
|
||||||
|
#>>> datetime.datetime(2003,2,15,23,tzinfo=t).tzname()
|
||||||
|
#'BRDT'
|
||||||
|
#>>> datetime.datetime(2003,2,16,0,tzinfo=t).tzname()
|
||||||
|
#'BRST'
|
||||||
|
#>>> datetime.datetime(2003,2,15,23,tzinfo=t).tzname()
|
||||||
|
#'BRST'
|
||||||
|
#>>> datetime.datetime(2003,2,15,22,tzinfo=t).tzname()
|
||||||
|
#'BRDT'
|
||||||
|
#>>> datetime.datetime(2003,2,15,23,tzinfo=t).tzname()
|
||||||
|
#'BRDT'
|
||||||
|
#
|
||||||
|
# Here is a more stable implementation:
|
||||||
|
#
|
||||||
|
timestamp = ((dt.toordinal() - EPOCHORDINAL) * 86400
|
||||||
|
+ dt.hour * 3600
|
||||||
|
+ dt.minute * 60
|
||||||
|
+ dt.second)
|
||||||
|
return time.localtime(timestamp+time.timezone).tm_isdst
|
||||||
|
|
||||||
|
def __eq__(self, other):
|
||||||
|
if not isinstance(other, tzlocal):
|
||||||
|
return False
|
||||||
|
return (self._std_offset == other._std_offset and
|
||||||
|
self._dst_offset == other._dst_offset)
|
||||||
|
return True
|
||||||
|
|
||||||
|
def __ne__(self, other):
|
||||||
|
return not self.__eq__(other)
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return "%s()" % self.__class__.__name__
|
||||||
|
|
||||||
|
__reduce__ = object.__reduce__
|
||||||
|
|
||||||
|
class _ttinfo(object):
|
||||||
|
__slots__ = ["offset", "delta", "isdst", "abbr", "isstd", "isgmt"]
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
for attr in self.__slots__:
|
||||||
|
setattr(self, attr, None)
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
l = []
|
||||||
|
for attr in self.__slots__:
|
||||||
|
value = getattr(self, attr)
|
||||||
|
if value is not None:
|
||||||
|
l.append("%s=%s" % (attr, `value`))
|
||||||
|
return "%s(%s)" % (self.__class__.__name__, ", ".join(l))
|
||||||
|
|
||||||
|
def __eq__(self, other):
|
||||||
|
if not isinstance(other, _ttinfo):
|
||||||
|
return False
|
||||||
|
return (self.offset == other.offset and
|
||||||
|
self.delta == other.delta and
|
||||||
|
self.isdst == other.isdst and
|
||||||
|
self.abbr == other.abbr and
|
||||||
|
self.isstd == other.isstd and
|
||||||
|
self.isgmt == other.isgmt)
|
||||||
|
|
||||||
|
def __ne__(self, other):
|
||||||
|
return not self.__eq__(other)
|
||||||
|
|
||||||
|
def __getstate__(self):
|
||||||
|
state = {}
|
||||||
|
for name in self.__slots__:
|
||||||
|
state[name] = getattr(self, name, None)
|
||||||
|
return state
|
||||||
|
|
||||||
|
def __setstate__(self, state):
|
||||||
|
for name in self.__slots__:
|
||||||
|
if name in state:
|
||||||
|
setattr(self, name, state[name])
|
||||||
|
|
||||||
|
class tzfile(datetime.tzinfo):
|
||||||
|
|
||||||
|
# http://www.twinsun.com/tz/tz-link.htm
|
||||||
|
# ftp://elsie.nci.nih.gov/pub/tz*.tar.gz
|
||||||
|
|
||||||
|
def __init__(self, fileobj):
|
||||||
|
if isinstance(fileobj, basestring):
|
||||||
|
self._filename = fileobj
|
||||||
|
fileobj = open(fileobj)
|
||||||
|
elif hasattr(fileobj, "name"):
|
||||||
|
self._filename = fileobj.name
|
||||||
|
else:
|
||||||
|
self._filename = `fileobj`
|
||||||
|
|
||||||
|
# From tzfile(5):
|
||||||
|
#
|
||||||
|
# The time zone information files used by tzset(3)
|
||||||
|
# begin with the magic characters "TZif" to identify
|
||||||
|
# them as time zone information files, followed by
|
||||||
|
# sixteen bytes reserved for future use, followed by
|
||||||
|
# six four-byte values of type long, written in a
|
||||||
|
# ``standard'' byte order (the high-order byte
|
||||||
|
# of the value is written first).
|
||||||
|
|
||||||
|
if fileobj.read(4) != "TZif":
|
||||||
|
raise ValueError, "magic not found"
|
||||||
|
|
||||||
|
fileobj.read(16)
|
||||||
|
|
||||||
|
(
|
||||||
|
# The number of UTC/local indicators stored in the file.
|
||||||
|
ttisgmtcnt,
|
||||||
|
|
||||||
|
# The number of standard/wall indicators stored in the file.
|
||||||
|
ttisstdcnt,
|
||||||
|
|
||||||
|
# The number of leap seconds for which data is
|
||||||
|
# stored in the file.
|
||||||
|
leapcnt,
|
||||||
|
|
||||||
|
# The number of "transition times" for which data
|
||||||
|
# is stored in the file.
|
||||||
|
timecnt,
|
||||||
|
|
||||||
|
# The number of "local time types" for which data
|
||||||
|
# is stored in the file (must not be zero).
|
||||||
|
typecnt,
|
||||||
|
|
||||||
|
# The number of characters of "time zone
|
||||||
|
# abbreviation strings" stored in the file.
|
||||||
|
charcnt,
|
||||||
|
|
||||||
|
) = struct.unpack(">6l", fileobj.read(24))
|
||||||
|
|
||||||
|
# The above header is followed by tzh_timecnt four-byte
|
||||||
|
# values of type long, sorted in ascending order.
|
||||||
|
# These values are written in ``standard'' byte order.
|
||||||
|
# Each is used as a transition time (as returned by
|
||||||
|
# time(2)) at which the rules for computing local time
|
||||||
|
# change.
|
||||||
|
|
||||||
|
if timecnt:
|
||||||
|
self._trans_list = struct.unpack(">%dl" % timecnt,
|
||||||
|
fileobj.read(timecnt*4))
|
||||||
|
else:
|
||||||
|
self._trans_list = []
|
||||||
|
|
||||||
|
# Next come tzh_timecnt one-byte values of type unsigned
|
||||||
|
# char; each one tells which of the different types of
|
||||||
|
# ``local time'' types described in the file is associated
|
||||||
|
# with the same-indexed transition time. These values
|
||||||
|
# serve as indices into an array of ttinfo structures that
|
||||||
|
# appears next in the file.
|
||||||
|
|
||||||
|
if timecnt:
|
||||||
|
self._trans_idx = struct.unpack(">%dB" % timecnt,
|
||||||
|
fileobj.read(timecnt))
|
||||||
|
else:
|
||||||
|
self._trans_idx = []
|
||||||
|
|
||||||
|
# Each ttinfo structure is written as a four-byte value
|
||||||
|
# for tt_gmtoff of type long, in a standard byte
|
||||||
|
# order, followed by a one-byte value for tt_isdst
|
||||||
|
# and a one-byte value for tt_abbrind. In each
|
||||||
|
# structure, tt_gmtoff gives the number of
|
||||||
|
# seconds to be added to UTC, tt_isdst tells whether
|
||||||
|
# tm_isdst should be set by localtime(3), and
|
||||||
|
# tt_abbrind serves as an index into the array of
|
||||||
|
# time zone abbreviation characters that follow the
|
||||||
|
# ttinfo structure(s) in the file.
|
||||||
|
|
||||||
|
ttinfo = []
|
||||||
|
|
||||||
|
for i in range(typecnt):
|
||||||
|
ttinfo.append(struct.unpack(">lbb", fileobj.read(6)))
|
||||||
|
|
||||||
|
abbr = fileobj.read(charcnt)
|
||||||
|
|
||||||
|
# Then there are tzh_leapcnt pairs of four-byte
|
||||||
|
# values, written in standard byte order; the
|
||||||
|
# first value of each pair gives the time (as
|
||||||
|
# returned by time(2)) at which a leap second
|
||||||
|
# occurs; the second gives the total number of
|
||||||
|
# leap seconds to be applied after the given time.
|
||||||
|
# The pairs of values are sorted in ascending order
|
||||||
|
# by time.
|
||||||
|
|
||||||
|
# Not used, for now
|
||||||
|
if leapcnt:
|
||||||
|
leap = struct.unpack(">%dl" % (leapcnt*2),
|
||||||
|
fileobj.read(leapcnt*8))
|
||||||
|
|
||||||
|
# Then there are tzh_ttisstdcnt standard/wall
|
||||||
|
# indicators, each stored as a one-byte value;
|
||||||
|
# they tell whether the transition times associated
|
||||||
|
# with local time types were specified as standard
|
||||||
|
# time or wall clock time, and are used when
|
||||||
|
# a time zone file is used in handling POSIX-style
|
||||||
|
# time zone environment variables.
|
||||||
|
|
||||||
|
if ttisstdcnt:
|
||||||
|
isstd = struct.unpack(">%db" % ttisstdcnt,
|
||||||
|
fileobj.read(ttisstdcnt))
|
||||||
|
|
||||||
|
# Finally, there are tzh_ttisgmtcnt UTC/local
|
||||||
|
# indicators, each stored as a one-byte value;
|
||||||
|
# they tell whether the transition times associated
|
||||||
|
# with local time types were specified as UTC or
|
||||||
|
# local time, and are used when a time zone file
|
||||||
|
# is used in handling POSIX-style time zone envi-
|
||||||
|
# ronment variables.
|
||||||
|
|
||||||
|
if ttisgmtcnt:
|
||||||
|
isgmt = struct.unpack(">%db" % ttisgmtcnt,
|
||||||
|
fileobj.read(ttisgmtcnt))
|
||||||
|
|
||||||
|
# ** Everything has been read **
|
||||||
|
|
||||||
|
# Build ttinfo list
|
||||||
|
self._ttinfo_list = []
|
||||||
|
for i in range(typecnt):
|
||||||
|
gmtoff, isdst, abbrind = ttinfo[i]
|
||||||
|
# Round to full-minutes if that's not the case. Python's
|
||||||
|
# datetime doesn't accept sub-minute timezones. Check
|
||||||
|
# http://python.org/sf/1447945 for some information.
|
||||||
|
gmtoff = (gmtoff+30)//60*60
|
||||||
|
tti = _ttinfo()
|
||||||
|
tti.offset = gmtoff
|
||||||
|
tti.delta = datetime.timedelta(seconds=gmtoff)
|
||||||
|
tti.isdst = isdst
|
||||||
|
tti.abbr = abbr[abbrind:abbr.find('\x00', abbrind)]
|
||||||
|
tti.isstd = (ttisstdcnt > i and isstd[i] != 0)
|
||||||
|
tti.isgmt = (ttisgmtcnt > i and isgmt[i] != 0)
|
||||||
|
self._ttinfo_list.append(tti)
|
||||||
|
|
||||||
|
# Replace ttinfo indexes for ttinfo objects.
|
||||||
|
trans_idx = []
|
||||||
|
for idx in self._trans_idx:
|
||||||
|
trans_idx.append(self._ttinfo_list[idx])
|
||||||
|
self._trans_idx = tuple(trans_idx)
|
||||||
|
|
||||||
|
# Set standard, dst, and before ttinfos. before will be
|
||||||
|
# used when a given time is before any transitions,
|
||||||
|
# and will be set to the first non-dst ttinfo, or to
|
||||||
|
# the first dst, if all of them are dst.
|
||||||
|
self._ttinfo_std = None
|
||||||
|
self._ttinfo_dst = None
|
||||||
|
self._ttinfo_before = None
|
||||||
|
if self._ttinfo_list:
|
||||||
|
if not self._trans_list:
|
||||||
|
self._ttinfo_std = self._ttinfo_first = self._ttinfo_list[0]
|
||||||
|
else:
|
||||||
|
for i in range(timecnt-1,-1,-1):
|
||||||
|
tti = self._trans_idx[i]
|
||||||
|
if not self._ttinfo_std and not tti.isdst:
|
||||||
|
self._ttinfo_std = tti
|
||||||
|
elif not self._ttinfo_dst and tti.isdst:
|
||||||
|
self._ttinfo_dst = tti
|
||||||
|
if self._ttinfo_std and self._ttinfo_dst:
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
if self._ttinfo_dst and not self._ttinfo_std:
|
||||||
|
self._ttinfo_std = self._ttinfo_dst
|
||||||
|
|
||||||
|
for tti in self._ttinfo_list:
|
||||||
|
if not tti.isdst:
|
||||||
|
self._ttinfo_before = tti
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
self._ttinfo_before = self._ttinfo_list[0]
|
||||||
|
|
||||||
|
# Now fix transition times to become relative to wall time.
|
||||||
|
#
|
||||||
|
# I'm not sure about this. In my tests, the tz source file
|
||||||
|
# is setup to wall time, and in the binary file isstd and
|
||||||
|
# isgmt are off, so it should be in wall time. OTOH, it's
|
||||||
|
# always in gmt time. Let me know if you have comments
|
||||||
|
# about this.
|
||||||
|
laststdoffset = 0
|
||||||
|
self._trans_list = list(self._trans_list)
|
||||||
|
for i in range(len(self._trans_list)):
|
||||||
|
tti = self._trans_idx[i]
|
||||||
|
if not tti.isdst:
|
||||||
|
# This is std time.
|
||||||
|
self._trans_list[i] += tti.offset
|
||||||
|
laststdoffset = tti.offset
|
||||||
|
else:
|
||||||
|
# This is dst time. Convert to std.
|
||||||
|
self._trans_list[i] += laststdoffset
|
||||||
|
self._trans_list = tuple(self._trans_list)
|
||||||
|
|
||||||
|
def _find_ttinfo(self, dt, laststd=0):
|
||||||
|
timestamp = ((dt.toordinal() - EPOCHORDINAL) * 86400
|
||||||
|
+ dt.hour * 3600
|
||||||
|
+ dt.minute * 60
|
||||||
|
+ dt.second)
|
||||||
|
idx = 0
|
||||||
|
for trans in self._trans_list:
|
||||||
|
if timestamp < trans:
|
||||||
|
break
|
||||||
|
idx += 1
|
||||||
|
else:
|
||||||
|
return self._ttinfo_std
|
||||||
|
if idx == 0:
|
||||||
|
return self._ttinfo_before
|
||||||
|
if laststd:
|
||||||
|
while idx > 0:
|
||||||
|
tti = self._trans_idx[idx-1]
|
||||||
|
if not tti.isdst:
|
||||||
|
return tti
|
||||||
|
idx -= 1
|
||||||
|
else:
|
||||||
|
return self._ttinfo_std
|
||||||
|
else:
|
||||||
|
return self._trans_idx[idx-1]
|
||||||
|
|
||||||
|
def utcoffset(self, dt):
|
||||||
|
if not self._ttinfo_std:
|
||||||
|
return ZERO
|
||||||
|
return self._find_ttinfo(dt).delta
|
||||||
|
|
||||||
|
def dst(self, dt):
|
||||||
|
if not self._ttinfo_dst:
|
||||||
|
return ZERO
|
||||||
|
tti = self._find_ttinfo(dt)
|
||||||
|
if not tti.isdst:
|
||||||
|
return ZERO
|
||||||
|
|
||||||
|
# The documentation says that utcoffset()-dst() must
|
||||||
|
# be constant for every dt.
|
||||||
|
return tti.delta-self._find_ttinfo(dt, laststd=1).delta
|
||||||
|
|
||||||
|
# An alternative for that would be:
|
||||||
|
#
|
||||||
|
# return self._ttinfo_dst.offset-self._ttinfo_std.offset
|
||||||
|
#
|
||||||
|
# However, this class stores historical changes in the
|
||||||
|
# dst offset, so I belive that this wouldn't be the right
|
||||||
|
# way to implement this.
|
||||||
|
|
||||||
|
def tzname(self, dt):
|
||||||
|
if not self._ttinfo_std:
|
||||||
|
return None
|
||||||
|
return self._find_ttinfo(dt).abbr
|
||||||
|
|
||||||
|
def __eq__(self, other):
|
||||||
|
if not isinstance(other, tzfile):
|
||||||
|
return False
|
||||||
|
return (self._trans_list == other._trans_list and
|
||||||
|
self._trans_idx == other._trans_idx and
|
||||||
|
self._ttinfo_list == other._ttinfo_list)
|
||||||
|
|
||||||
|
def __ne__(self, other):
|
||||||
|
return not self.__eq__(other)
|
||||||
|
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return "%s(%s)" % (self.__class__.__name__, `self._filename`)
|
||||||
|
|
||||||
|
def __reduce__(self):
|
||||||
|
if not os.path.isfile(self._filename):
|
||||||
|
raise ValueError, "Unpickable %s class" % self.__class__.__name__
|
||||||
|
return (self.__class__, (self._filename,))
|
||||||
|
|
||||||
|
class tzrange(datetime.tzinfo):
|
||||||
|
|
||||||
|
def __init__(self, stdabbr, stdoffset=None,
|
||||||
|
dstabbr=None, dstoffset=None,
|
||||||
|
start=None, end=None):
|
||||||
|
global relativedelta
|
||||||
|
if not relativedelta:
|
||||||
|
from dateutil import relativedelta
|
||||||
|
self._std_abbr = stdabbr
|
||||||
|
self._dst_abbr = dstabbr
|
||||||
|
if stdoffset is not None:
|
||||||
|
self._std_offset = datetime.timedelta(seconds=stdoffset)
|
||||||
|
else:
|
||||||
|
self._std_offset = ZERO
|
||||||
|
if dstoffset is not None:
|
||||||
|
self._dst_offset = datetime.timedelta(seconds=dstoffset)
|
||||||
|
elif dstabbr and stdoffset is not None:
|
||||||
|
self._dst_offset = self._std_offset+datetime.timedelta(hours=+1)
|
||||||
|
else:
|
||||||
|
self._dst_offset = ZERO
|
||||||
|
if dstabbr and start is None:
|
||||||
|
self._start_delta = relativedelta.relativedelta(
|
||||||
|
hours=+2, month=4, day=1, weekday=relativedelta.SU(+1))
|
||||||
|
else:
|
||||||
|
self._start_delta = start
|
||||||
|
if dstabbr and end is None:
|
||||||
|
self._end_delta = relativedelta.relativedelta(
|
||||||
|
hours=+1, month=10, day=31, weekday=relativedelta.SU(-1))
|
||||||
|
else:
|
||||||
|
self._end_delta = end
|
||||||
|
|
||||||
|
def utcoffset(self, dt):
|
||||||
|
if self._isdst(dt):
|
||||||
|
return self._dst_offset
|
||||||
|
else:
|
||||||
|
return self._std_offset
|
||||||
|
|
||||||
|
def dst(self, dt):
|
||||||
|
if self._isdst(dt):
|
||||||
|
return self._dst_offset-self._std_offset
|
||||||
|
else:
|
||||||
|
return ZERO
|
||||||
|
|
||||||
|
def tzname(self, dt):
|
||||||
|
if self._isdst(dt):
|
||||||
|
return self._dst_abbr
|
||||||
|
else:
|
||||||
|
return self._std_abbr
|
||||||
|
|
||||||
|
def _isdst(self, dt):
|
||||||
|
if not self._start_delta:
|
||||||
|
return False
|
||||||
|
year = datetime.datetime(dt.year,1,1)
|
||||||
|
start = year+self._start_delta
|
||||||
|
end = year+self._end_delta
|
||||||
|
dt = dt.replace(tzinfo=None)
|
||||||
|
if start < end:
|
||||||
|
return dt >= start and dt < end
|
||||||
|
else:
|
||||||
|
return dt >= start or dt < end
|
||||||
|
|
||||||
|
def __eq__(self, other):
|
||||||
|
if not isinstance(other, tzrange):
|
||||||
|
return False
|
||||||
|
return (self._std_abbr == other._std_abbr and
|
||||||
|
self._dst_abbr == other._dst_abbr and
|
||||||
|
self._std_offset == other._std_offset and
|
||||||
|
self._dst_offset == other._dst_offset and
|
||||||
|
self._start_delta == other._start_delta and
|
||||||
|
self._end_delta == other._end_delta)
|
||||||
|
|
||||||
|
def __ne__(self, other):
|
||||||
|
return not self.__eq__(other)
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return "%s(...)" % self.__class__.__name__
|
||||||
|
|
||||||
|
__reduce__ = object.__reduce__
|
||||||
|
|
||||||
|
class tzstr(tzrange):
|
||||||
|
|
||||||
|
def __init__(self, s):
|
||||||
|
global parser
|
||||||
|
if not parser:
|
||||||
|
from dateutil import parser
|
||||||
|
self._s = s
|
||||||
|
|
||||||
|
res = parser._parsetz(s)
|
||||||
|
if res is None:
|
||||||
|
raise ValueError, "unknown string format"
|
||||||
|
|
||||||
|
# Here we break the compatibility with the TZ variable handling.
|
||||||
|
# GMT-3 actually *means* the timezone -3.
|
||||||
|
if res.stdabbr in ("GMT", "UTC"):
|
||||||
|
res.stdoffset *= -1
|
||||||
|
|
||||||
|
# We must initialize it first, since _delta() needs
|
||||||
|
# _std_offset and _dst_offset set. Use False in start/end
|
||||||
|
# to avoid building it two times.
|
||||||
|
tzrange.__init__(self, res.stdabbr, res.stdoffset,
|
||||||
|
res.dstabbr, res.dstoffset,
|
||||||
|
start=False, end=False)
|
||||||
|
|
||||||
|
if not res.dstabbr:
|
||||||
|
self._start_delta = None
|
||||||
|
self._end_delta = None
|
||||||
|
else:
|
||||||
|
self._start_delta = self._delta(res.start)
|
||||||
|
if self._start_delta:
|
||||||
|
self._end_delta = self._delta(res.end, isend=1)
|
||||||
|
|
||||||
|
def _delta(self, x, isend=0):
|
||||||
|
kwargs = {}
|
||||||
|
if x.month is not None:
|
||||||
|
kwargs["month"] = x.month
|
||||||
|
if x.weekday is not None:
|
||||||
|
kwargs["weekday"] = relativedelta.weekday(x.weekday, x.week)
|
||||||
|
if x.week > 0:
|
||||||
|
kwargs["day"] = 1
|
||||||
|
else:
|
||||||
|
kwargs["day"] = 31
|
||||||
|
elif x.day:
|
||||||
|
kwargs["day"] = x.day
|
||||||
|
elif x.yday is not None:
|
||||||
|
kwargs["yearday"] = x.yday
|
||||||
|
elif x.jyday is not None:
|
||||||
|
kwargs["nlyearday"] = x.jyday
|
||||||
|
if not kwargs:
|
||||||
|
# Default is to start on first sunday of april, and end
|
||||||
|
# on last sunday of october.
|
||||||
|
if not isend:
|
||||||
|
kwargs["month"] = 4
|
||||||
|
kwargs["day"] = 1
|
||||||
|
kwargs["weekday"] = relativedelta.SU(+1)
|
||||||
|
else:
|
||||||
|
kwargs["month"] = 10
|
||||||
|
kwargs["day"] = 31
|
||||||
|
kwargs["weekday"] = relativedelta.SU(-1)
|
||||||
|
if x.time is not None:
|
||||||
|
kwargs["seconds"] = x.time
|
||||||
|
else:
|
||||||
|
# Default is 2AM.
|
||||||
|
kwargs["seconds"] = 7200
|
||||||
|
if isend:
|
||||||
|
# Convert to standard time, to follow the documented way
|
||||||
|
# of working with the extra hour. See the documentation
|
||||||
|
# of the tzinfo class.
|
||||||
|
delta = self._dst_offset-self._std_offset
|
||||||
|
kwargs["seconds"] -= delta.seconds+delta.days*86400
|
||||||
|
return relativedelta.relativedelta(**kwargs)
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return "%s(%s)" % (self.__class__.__name__, `self._s`)
|
||||||
|
|
||||||
|
class _tzicalvtzcomp:
|
||||||
|
def __init__(self, tzoffsetfrom, tzoffsetto, isdst,
|
||||||
|
tzname=None, rrule=None):
|
||||||
|
self.tzoffsetfrom = datetime.timedelta(seconds=tzoffsetfrom)
|
||||||
|
self.tzoffsetto = datetime.timedelta(seconds=tzoffsetto)
|
||||||
|
self.tzoffsetdiff = self.tzoffsetto-self.tzoffsetfrom
|
||||||
|
self.isdst = isdst
|
||||||
|
self.tzname = tzname
|
||||||
|
self.rrule = rrule
|
||||||
|
|
||||||
|
class _tzicalvtz(datetime.tzinfo):
|
||||||
|
def __init__(self, tzid, comps=[]):
|
||||||
|
self._tzid = tzid
|
||||||
|
self._comps = comps
|
||||||
|
self._cachedate = []
|
||||||
|
self._cachecomp = []
|
||||||
|
|
||||||
|
def _find_comp(self, dt):
|
||||||
|
if len(self._comps) == 1:
|
||||||
|
return self._comps[0]
|
||||||
|
dt = dt.replace(tzinfo=None)
|
||||||
|
try:
|
||||||
|
return self._cachecomp[self._cachedate.index(dt)]
|
||||||
|
except ValueError:
|
||||||
|
pass
|
||||||
|
lastcomp = None
|
||||||
|
lastcompdt = None
|
||||||
|
for comp in self._comps:
|
||||||
|
if not comp.isdst:
|
||||||
|
# Handle the extra hour in DST -> STD
|
||||||
|
compdt = comp.rrule.before(dt-comp.tzoffsetdiff, inc=True)
|
||||||
|
else:
|
||||||
|
compdt = comp.rrule.before(dt, inc=True)
|
||||||
|
if compdt and (not lastcompdt or lastcompdt < compdt):
|
||||||
|
lastcompdt = compdt
|
||||||
|
lastcomp = comp
|
||||||
|
if not lastcomp:
|
||||||
|
# RFC says nothing about what to do when a given
|
||||||
|
# time is before the first onset date. We'll look for the
|
||||||
|
# first standard component, or the first component, if
|
||||||
|
# none is found.
|
||||||
|
for comp in self._comps:
|
||||||
|
if not comp.isdst:
|
||||||
|
lastcomp = comp
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
lastcomp = comp[0]
|
||||||
|
self._cachedate.insert(0, dt)
|
||||||
|
self._cachecomp.insert(0, lastcomp)
|
||||||
|
if len(self._cachedate) > 10:
|
||||||
|
self._cachedate.pop()
|
||||||
|
self._cachecomp.pop()
|
||||||
|
return lastcomp
|
||||||
|
|
||||||
|
def utcoffset(self, dt):
|
||||||
|
return self._find_comp(dt).tzoffsetto
|
||||||
|
|
||||||
|
def dst(self, dt):
|
||||||
|
comp = self._find_comp(dt)
|
||||||
|
if comp.isdst:
|
||||||
|
return comp.tzoffsetdiff
|
||||||
|
else:
|
||||||
|
return ZERO
|
||||||
|
|
||||||
|
def tzname(self, dt):
|
||||||
|
return self._find_comp(dt).tzname
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return "<tzicalvtz %s>" % `self._tzid`
|
||||||
|
|
||||||
|
__reduce__ = object.__reduce__
|
||||||
|
|
||||||
|
class tzical:
|
||||||
|
def __init__(self, fileobj):
|
||||||
|
global rrule
|
||||||
|
if not rrule:
|
||||||
|
from dateutil import rrule
|
||||||
|
|
||||||
|
if isinstance(fileobj, basestring):
|
||||||
|
self._s = fileobj
|
||||||
|
fileobj = open(fileobj)
|
||||||
|
elif hasattr(fileobj, "name"):
|
||||||
|
self._s = fileobj.name
|
||||||
|
else:
|
||||||
|
self._s = `fileobj`
|
||||||
|
|
||||||
|
self._vtz = {}
|
||||||
|
|
||||||
|
self._parse_rfc(fileobj.read())
|
||||||
|
|
||||||
|
def keys(self):
|
||||||
|
return self._vtz.keys()
|
||||||
|
|
||||||
|
def get(self, tzid=None):
|
||||||
|
if tzid is None:
|
||||||
|
keys = self._vtz.keys()
|
||||||
|
if len(keys) == 0:
|
||||||
|
raise ValueError, "no timezones defined"
|
||||||
|
elif len(keys) > 1:
|
||||||
|
raise ValueError, "more than one timezone available"
|
||||||
|
tzid = keys[0]
|
||||||
|
return self._vtz.get(tzid)
|
||||||
|
|
||||||
|
def _parse_offset(self, s):
|
||||||
|
s = s.strip()
|
||||||
|
if not s:
|
||||||
|
raise ValueError, "empty offset"
|
||||||
|
if s[0] in ('+', '-'):
|
||||||
|
signal = (-1,+1)[s[0]=='+']
|
||||||
|
s = s[1:]
|
||||||
|
else:
|
||||||
|
signal = +1
|
||||||
|
if len(s) == 4:
|
||||||
|
return (int(s[:2])*3600+int(s[2:])*60)*signal
|
||||||
|
elif len(s) == 6:
|
||||||
|
return (int(s[:2])*3600+int(s[2:4])*60+int(s[4:]))*signal
|
||||||
|
else:
|
||||||
|
raise ValueError, "invalid offset: "+s
|
||||||
|
|
||||||
|
def _parse_rfc(self, s):
|
||||||
|
lines = s.splitlines()
|
||||||
|
if not lines:
|
||||||
|
raise ValueError, "empty string"
|
||||||
|
|
||||||
|
# Unfold
|
||||||
|
i = 0
|
||||||
|
while i < len(lines):
|
||||||
|
line = lines[i].rstrip()
|
||||||
|
if not line:
|
||||||
|
del lines[i]
|
||||||
|
elif i > 0 and line[0] in (" ", "\t"):
|
||||||
|
lines[i-1] += line[1:]
|
||||||
|
del lines[i]
|
||||||
|
else:
|
||||||
|
i += 1
|
||||||
|
|
||||||
|
tzid = None
|
||||||
|
comps = []
|
||||||
|
invtz = False
|
||||||
|
comptype = None
|
||||||
|
for line in lines:
|
||||||
|
if not line:
|
||||||
|
continue
|
||||||
|
name, value = line.split(':', 1)
|
||||||
|
parms = name.split(';')
|
||||||
|
if not parms:
|
||||||
|
raise ValueError, "empty property name"
|
||||||
|
name = parms[0].upper()
|
||||||
|
parms = parms[1:]
|
||||||
|
if invtz:
|
||||||
|
if name == "BEGIN":
|
||||||
|
if value in ("STANDARD", "DAYLIGHT"):
|
||||||
|
# Process component
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
raise ValueError, "unknown component: "+value
|
||||||
|
comptype = value
|
||||||
|
founddtstart = False
|
||||||
|
tzoffsetfrom = None
|
||||||
|
tzoffsetto = None
|
||||||
|
rrulelines = []
|
||||||
|
tzname = None
|
||||||
|
elif name == "END":
|
||||||
|
if value == "VTIMEZONE":
|
||||||
|
if comptype:
|
||||||
|
raise ValueError, \
|
||||||
|
"component not closed: "+comptype
|
||||||
|
if not tzid:
|
||||||
|
raise ValueError, \
|
||||||
|
"mandatory TZID not found"
|
||||||
|
if not comps:
|
||||||
|
raise ValueError, \
|
||||||
|
"at least one component is needed"
|
||||||
|
# Process vtimezone
|
||||||
|
self._vtz[tzid] = _tzicalvtz(tzid, comps)
|
||||||
|
invtz = False
|
||||||
|
elif value == comptype:
|
||||||
|
if not founddtstart:
|
||||||
|
raise ValueError, \
|
||||||
|
"mandatory DTSTART not found"
|
||||||
|
if tzoffsetfrom is None:
|
||||||
|
raise ValueError, \
|
||||||
|
"mandatory TZOFFSETFROM not found"
|
||||||
|
if tzoffsetto is None:
|
||||||
|
raise ValueError, \
|
||||||
|
"mandatory TZOFFSETFROM not found"
|
||||||
|
# Process component
|
||||||
|
rr = None
|
||||||
|
if rrulelines:
|
||||||
|
rr = rrule.rrulestr("\n".join(rrulelines),
|
||||||
|
compatible=True,
|
||||||
|
ignoretz=True,
|
||||||
|
cache=True)
|
||||||
|
comp = _tzicalvtzcomp(tzoffsetfrom, tzoffsetto,
|
||||||
|
(comptype == "DAYLIGHT"),
|
||||||
|
tzname, rr)
|
||||||
|
comps.append(comp)
|
||||||
|
comptype = None
|
||||||
|
else:
|
||||||
|
raise ValueError, \
|
||||||
|
"invalid component end: "+value
|
||||||
|
elif comptype:
|
||||||
|
if name == "DTSTART":
|
||||||
|
rrulelines.append(line)
|
||||||
|
founddtstart = True
|
||||||
|
elif name in ("RRULE", "RDATE", "EXRULE", "EXDATE"):
|
||||||
|
rrulelines.append(line)
|
||||||
|
elif name == "TZOFFSETFROM":
|
||||||
|
if parms:
|
||||||
|
raise ValueError, \
|
||||||
|
"unsupported %s parm: %s "%(name, parms[0])
|
||||||
|
tzoffsetfrom = self._parse_offset(value)
|
||||||
|
elif name == "TZOFFSETTO":
|
||||||
|
if parms:
|
||||||
|
raise ValueError, \
|
||||||
|
"unsupported TZOFFSETTO parm: "+parms[0]
|
||||||
|
tzoffsetto = self._parse_offset(value)
|
||||||
|
elif name == "TZNAME":
|
||||||
|
if parms:
|
||||||
|
raise ValueError, \
|
||||||
|
"unsupported TZNAME parm: "+parms[0]
|
||||||
|
tzname = value
|
||||||
|
elif name == "COMMENT":
|
||||||
|
pass
|
||||||
|
elif name.upper().startswith('X-'):
|
||||||
|
# Ignore experimental properties.
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
raise ValueError, "unsupported property: "+name
|
||||||
|
else:
|
||||||
|
if name == "TZID":
|
||||||
|
for p in parms:
|
||||||
|
if not p.upper().startswith('X-'):
|
||||||
|
raise ValueError, \
|
||||||
|
"unsupported TZID parm: "+p
|
||||||
|
tzid = value
|
||||||
|
elif name in ("TZURL", "LAST-MODIFIED", "COMMENT"):
|
||||||
|
pass
|
||||||
|
elif name.upper().startswith('X-'):
|
||||||
|
# Ignore experimental properties.
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
raise ValueError, "unsupported property: "+name
|
||||||
|
elif name == "BEGIN" and value == "VTIMEZONE":
|
||||||
|
tzid = None
|
||||||
|
comps = []
|
||||||
|
invtz = True
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return "%s(%s)" % (self.__class__.__name__, `self._s`)
|
||||||
|
|
||||||
|
if sys.platform != "win32":
|
||||||
|
TZFILES = ["/etc/localtime", "localtime"]
|
||||||
|
TZPATHS = ["/usr/share/zoneinfo", "/usr/lib/zoneinfo", "/etc/zoneinfo"]
|
||||||
|
else:
|
||||||
|
TZFILES = []
|
||||||
|
TZPATHS = []
|
||||||
|
|
||||||
|
def gettz(name=None):
|
||||||
|
tz = None
|
||||||
|
if not name:
|
||||||
|
try:
|
||||||
|
name = os.environ["TZ"]
|
||||||
|
except KeyError:
|
||||||
|
pass
|
||||||
|
if name is None or name == ":":
|
||||||
|
for filepath in TZFILES:
|
||||||
|
if not os.path.isabs(filepath):
|
||||||
|
filename = filepath
|
||||||
|
for path in TZPATHS:
|
||||||
|
filepath = os.path.join(path, filename)
|
||||||
|
if os.path.isfile(filepath):
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
continue
|
||||||
|
if os.path.isfile(filepath):
|
||||||
|
try:
|
||||||
|
tz = tzfile(filepath)
|
||||||
|
break
|
||||||
|
except (IOError, OSError, ValueError):
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
tz = tzlocal()
|
||||||
|
else:
|
||||||
|
if name.startswith(":"):
|
||||||
|
name = name[:-1]
|
||||||
|
if os.path.isabs(name):
|
||||||
|
if os.path.isfile(name):
|
||||||
|
tz = tzfile(name)
|
||||||
|
else:
|
||||||
|
tz = None
|
||||||
|
else:
|
||||||
|
for path in TZPATHS:
|
||||||
|
filepath = os.path.join(path, name)
|
||||||
|
if not os.path.isfile(filepath):
|
||||||
|
filepath = filepath.replace(' ','_')
|
||||||
|
if not os.path.isfile(filepath):
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
tz = tzfile(filepath)
|
||||||
|
break
|
||||||
|
except (IOError, OSError, ValueError):
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
tz = None
|
||||||
|
if tzwin:
|
||||||
|
try:
|
||||||
|
tz = tzwin(name)
|
||||||
|
except OSError:
|
||||||
|
pass
|
||||||
|
if not tz:
|
||||||
|
from dateutil.zoneinfo import gettz
|
||||||
|
tz = gettz(name)
|
||||||
|
if not tz:
|
||||||
|
for c in name:
|
||||||
|
# name must have at least one offset to be a tzstr
|
||||||
|
if c in "0123456789":
|
||||||
|
try:
|
||||||
|
tz = tzstr(name)
|
||||||
|
except ValueError:
|
||||||
|
pass
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
if name in ("GMT", "UTC"):
|
||||||
|
tz = tzutc()
|
||||||
|
elif name in time.tzname:
|
||||||
|
tz = tzlocal()
|
||||||
|
return tz
|
||||||
|
|
||||||
|
# vim:ts=4:sw=4:et
|
||||||
180
lib/dateutil/tzwin.py
Normal file
180
lib/dateutil/tzwin.py
Normal file
@@ -0,0 +1,180 @@
|
|||||||
|
# This code was originally contributed by Jeffrey Harris.
|
||||||
|
import datetime
|
||||||
|
import struct
|
||||||
|
import _winreg
|
||||||
|
|
||||||
|
__author__ = "Jeffrey Harris & Gustavo Niemeyer <gustavo@niemeyer.net>"
|
||||||
|
|
||||||
|
__all__ = ["tzwin", "tzwinlocal"]
|
||||||
|
|
||||||
|
ONEWEEK = datetime.timedelta(7)
|
||||||
|
|
||||||
|
TZKEYNAMENT = r"SOFTWARE\Microsoft\Windows NT\CurrentVersion\Time Zones"
|
||||||
|
TZKEYNAME9X = r"SOFTWARE\Microsoft\Windows\CurrentVersion\Time Zones"
|
||||||
|
TZLOCALKEYNAME = r"SYSTEM\CurrentControlSet\Control\TimeZoneInformation"
|
||||||
|
|
||||||
|
def _settzkeyname():
|
||||||
|
global TZKEYNAME
|
||||||
|
handle = _winreg.ConnectRegistry(None, _winreg.HKEY_LOCAL_MACHINE)
|
||||||
|
try:
|
||||||
|
_winreg.OpenKey(handle, TZKEYNAMENT).Close()
|
||||||
|
TZKEYNAME = TZKEYNAMENT
|
||||||
|
except WindowsError:
|
||||||
|
TZKEYNAME = TZKEYNAME9X
|
||||||
|
handle.Close()
|
||||||
|
|
||||||
|
_settzkeyname()
|
||||||
|
|
||||||
|
class tzwinbase(datetime.tzinfo):
|
||||||
|
"""tzinfo class based on win32's timezones available in the registry."""
|
||||||
|
|
||||||
|
def utcoffset(self, dt):
|
||||||
|
if self._isdst(dt):
|
||||||
|
return datetime.timedelta(minutes=self._dstoffset)
|
||||||
|
else:
|
||||||
|
return datetime.timedelta(minutes=self._stdoffset)
|
||||||
|
|
||||||
|
def dst(self, dt):
|
||||||
|
if self._isdst(dt):
|
||||||
|
minutes = self._dstoffset - self._stdoffset
|
||||||
|
return datetime.timedelta(minutes=minutes)
|
||||||
|
else:
|
||||||
|
return datetime.timedelta(0)
|
||||||
|
|
||||||
|
def tzname(self, dt):
|
||||||
|
if self._isdst(dt):
|
||||||
|
return self._dstname
|
||||||
|
else:
|
||||||
|
return self._stdname
|
||||||
|
|
||||||
|
def list():
|
||||||
|
"""Return a list of all time zones known to the system."""
|
||||||
|
handle = _winreg.ConnectRegistry(None, _winreg.HKEY_LOCAL_MACHINE)
|
||||||
|
tzkey = _winreg.OpenKey(handle, TZKEYNAME)
|
||||||
|
result = [_winreg.EnumKey(tzkey, i)
|
||||||
|
for i in range(_winreg.QueryInfoKey(tzkey)[0])]
|
||||||
|
tzkey.Close()
|
||||||
|
handle.Close()
|
||||||
|
return result
|
||||||
|
list = staticmethod(list)
|
||||||
|
|
||||||
|
def display(self):
|
||||||
|
return self._display
|
||||||
|
|
||||||
|
def _isdst(self, dt):
|
||||||
|
dston = picknthweekday(dt.year, self._dstmonth, self._dstdayofweek,
|
||||||
|
self._dsthour, self._dstminute,
|
||||||
|
self._dstweeknumber)
|
||||||
|
dstoff = picknthweekday(dt.year, self._stdmonth, self._stddayofweek,
|
||||||
|
self._stdhour, self._stdminute,
|
||||||
|
self._stdweeknumber)
|
||||||
|
if dston < dstoff:
|
||||||
|
return dston <= dt.replace(tzinfo=None) < dstoff
|
||||||
|
else:
|
||||||
|
return not dstoff <= dt.replace(tzinfo=None) < dston
|
||||||
|
|
||||||
|
|
||||||
|
class tzwin(tzwinbase):
|
||||||
|
|
||||||
|
def __init__(self, name):
|
||||||
|
self._name = name
|
||||||
|
|
||||||
|
handle = _winreg.ConnectRegistry(None, _winreg.HKEY_LOCAL_MACHINE)
|
||||||
|
tzkey = _winreg.OpenKey(handle, "%s\%s" % (TZKEYNAME, name))
|
||||||
|
keydict = valuestodict(tzkey)
|
||||||
|
tzkey.Close()
|
||||||
|
handle.Close()
|
||||||
|
|
||||||
|
self._stdname = keydict["Std"].encode("iso-8859-1")
|
||||||
|
self._dstname = keydict["Dlt"].encode("iso-8859-1")
|
||||||
|
|
||||||
|
self._display = keydict["Display"]
|
||||||
|
|
||||||
|
# See http://ww_winreg.jsiinc.com/SUBA/tip0300/rh0398.htm
|
||||||
|
tup = struct.unpack("=3l16h", keydict["TZI"])
|
||||||
|
self._stdoffset = -tup[0]-tup[1] # Bias + StandardBias * -1
|
||||||
|
self._dstoffset = self._stdoffset-tup[2] # + DaylightBias * -1
|
||||||
|
|
||||||
|
(self._stdmonth,
|
||||||
|
self._stddayofweek, # Sunday = 0
|
||||||
|
self._stdweeknumber, # Last = 5
|
||||||
|
self._stdhour,
|
||||||
|
self._stdminute) = tup[4:9]
|
||||||
|
|
||||||
|
(self._dstmonth,
|
||||||
|
self._dstdayofweek, # Sunday = 0
|
||||||
|
self._dstweeknumber, # Last = 5
|
||||||
|
self._dsthour,
|
||||||
|
self._dstminute) = tup[12:17]
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return "tzwin(%s)" % repr(self._name)
|
||||||
|
|
||||||
|
def __reduce__(self):
|
||||||
|
return (self.__class__, (self._name,))
|
||||||
|
|
||||||
|
|
||||||
|
class tzwinlocal(tzwinbase):
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
|
||||||
|
handle = _winreg.ConnectRegistry(None, _winreg.HKEY_LOCAL_MACHINE)
|
||||||
|
|
||||||
|
tzlocalkey = _winreg.OpenKey(handle, TZLOCALKEYNAME)
|
||||||
|
keydict = valuestodict(tzlocalkey)
|
||||||
|
tzlocalkey.Close()
|
||||||
|
|
||||||
|
self._stdname = keydict["StandardName"].encode("iso-8859-1")
|
||||||
|
self._dstname = keydict["DaylightName"].encode("iso-8859-1")
|
||||||
|
|
||||||
|
try:
|
||||||
|
tzkey = _winreg.OpenKey(handle, "%s\%s"%(TZKEYNAME, self._stdname))
|
||||||
|
_keydict = valuestodict(tzkey)
|
||||||
|
self._display = _keydict["Display"]
|
||||||
|
tzkey.Close()
|
||||||
|
except OSError:
|
||||||
|
self._display = None
|
||||||
|
|
||||||
|
handle.Close()
|
||||||
|
|
||||||
|
self._stdoffset = -keydict["Bias"]-keydict["StandardBias"]
|
||||||
|
self._dstoffset = self._stdoffset-keydict["DaylightBias"]
|
||||||
|
|
||||||
|
|
||||||
|
# See http://ww_winreg.jsiinc.com/SUBA/tip0300/rh0398.htm
|
||||||
|
tup = struct.unpack("=8h", keydict["StandardStart"])
|
||||||
|
|
||||||
|
(self._stdmonth,
|
||||||
|
self._stddayofweek, # Sunday = 0
|
||||||
|
self._stdweeknumber, # Last = 5
|
||||||
|
self._stdhour,
|
||||||
|
self._stdminute) = tup[1:6]
|
||||||
|
|
||||||
|
tup = struct.unpack("=8h", keydict["DaylightStart"])
|
||||||
|
|
||||||
|
(self._dstmonth,
|
||||||
|
self._dstdayofweek, # Sunday = 0
|
||||||
|
self._dstweeknumber, # Last = 5
|
||||||
|
self._dsthour,
|
||||||
|
self._dstminute) = tup[1:6]
|
||||||
|
|
||||||
|
def __reduce__(self):
|
||||||
|
return (self.__class__, ())
|
||||||
|
|
||||||
|
def picknthweekday(year, month, dayofweek, hour, minute, whichweek):
|
||||||
|
"""dayofweek == 0 means Sunday, whichweek 5 means last instance"""
|
||||||
|
first = datetime.datetime(year, month, 1, hour, minute)
|
||||||
|
weekdayone = first.replace(day=((dayofweek-first.isoweekday())%7+1))
|
||||||
|
for n in xrange(whichweek):
|
||||||
|
dt = weekdayone+(whichweek-n)*ONEWEEK
|
||||||
|
if dt.month == month:
|
||||||
|
return dt
|
||||||
|
|
||||||
|
def valuestodict(key):
|
||||||
|
"""Convert a registry key's values to a dictionary."""
|
||||||
|
dict = {}
|
||||||
|
size = _winreg.QueryInfoKey(key)[1]
|
||||||
|
for i in range(size):
|
||||||
|
data = _winreg.EnumValue(key, i)
|
||||||
|
dict[data[0]] = data[1]
|
||||||
|
return dict
|
||||||
85
lib/dateutil/zoneinfo/__init__.py
Normal file
85
lib/dateutil/zoneinfo/__init__.py
Normal file
@@ -0,0 +1,85 @@
|
|||||||
|
"""
|
||||||
|
Copyright (c) 2003-2005 Gustavo Niemeyer <gustavo@niemeyer.net>
|
||||||
|
|
||||||
|
This module offers extensions to the standard python 2.3+
|
||||||
|
datetime module.
|
||||||
|
"""
|
||||||
|
from dateutil.tz import tzfile
|
||||||
|
from tarfile import TarFile
|
||||||
|
import os
|
||||||
|
|
||||||
|
__author__ = "Gustavo Niemeyer <gustavo@niemeyer.net>"
|
||||||
|
__license__ = "PSF License"
|
||||||
|
|
||||||
|
__all__ = ["setcachesize", "gettz", "rebuild"]
|
||||||
|
|
||||||
|
CACHE = {}
|
||||||
|
|
||||||
|
class tzfile(tzfile):
|
||||||
|
def __reduce__(self):
|
||||||
|
return (gettz, (self._filename,))
|
||||||
|
|
||||||
|
def getzoneinfofile():
|
||||||
|
filenames = os.listdir(os.path.join(os.path.dirname(__file__)))
|
||||||
|
filenames.sort()
|
||||||
|
filenames.reverse()
|
||||||
|
for entry in filenames:
|
||||||
|
if entry.startswith("zoneinfo") and ".tar." in entry:
|
||||||
|
return os.path.join(os.path.dirname(__file__), entry)
|
||||||
|
return None
|
||||||
|
|
||||||
|
def buildcache():
|
||||||
|
global CACHE
|
||||||
|
zoneinfofile = getzoneinfofile()
|
||||||
|
if zoneinfofile:
|
||||||
|
tf = TarFile.open(zoneinfofile)
|
||||||
|
try:
|
||||||
|
for tarinfo in tf.getmembers():
|
||||||
|
if tarinfo.islnk() or tarinfo.isfile():
|
||||||
|
zonefile = tf.extractfile(tarinfo)
|
||||||
|
CACHE[tarinfo.name] = tzfile(zonefile)
|
||||||
|
finally:
|
||||||
|
tf.close()
|
||||||
|
|
||||||
|
buildcache()
|
||||||
|
|
||||||
|
del getzoneinfofile
|
||||||
|
del buildcache
|
||||||
|
|
||||||
|
def setcachesize(_):
|
||||||
|
# Since the cache now eagerly initialized at
|
||||||
|
# import time, there's no point in controlling
|
||||||
|
# its size.
|
||||||
|
pass
|
||||||
|
|
||||||
|
def gettz(name):
|
||||||
|
return CACHE.get(name)
|
||||||
|
|
||||||
|
def rebuild(filename, tag=None, format="gz"):
|
||||||
|
import tempfile, shutil
|
||||||
|
tmpdir = tempfile.mkdtemp()
|
||||||
|
zonedir = os.path.join(tmpdir, "zoneinfo")
|
||||||
|
moduledir = os.path.dirname(__file__)
|
||||||
|
if tag: tag = "-"+tag
|
||||||
|
targetname = "zoneinfo%s.tar.%s" % (tag, format)
|
||||||
|
try:
|
||||||
|
tf = TarFile.open(filename)
|
||||||
|
for name in tf.getnames():
|
||||||
|
if not (name.endswith(".sh") or
|
||||||
|
name.endswith(".tab") or
|
||||||
|
name == "leapseconds"):
|
||||||
|
tf.extract(name, tmpdir)
|
||||||
|
filepath = os.path.join(tmpdir, name)
|
||||||
|
os.system("zic -d %s %s" % (zonedir, filepath))
|
||||||
|
tf.close()
|
||||||
|
target = os.path.join(moduledir, targetname)
|
||||||
|
for entry in os.listdir(moduledir):
|
||||||
|
if entry.startswith("zoneinfo") and ".tar." in entry:
|
||||||
|
os.unlink(os.path.join(moduledir, entry))
|
||||||
|
tf = TarFile.open(target, "w:%s" % format)
|
||||||
|
for entry in os.listdir(zonedir):
|
||||||
|
entrypath = os.path.join(zonedir, entry)
|
||||||
|
tf.add(entrypath, entry)
|
||||||
|
tf.close()
|
||||||
|
finally:
|
||||||
|
shutil.rmtree(tmpdir)
|
||||||
BIN
lib/dateutil/zoneinfo/zoneinfo-2012c.tar.gz
Normal file
BIN
lib/dateutil/zoneinfo/zoneinfo-2012c.tar.gz
Normal file
Binary file not shown.
14
lib/guessit/__init__.py
Normal file
14
lib/guessit/__init__.py
Normal file
@@ -0,0 +1,14 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
"""
|
||||||
|
Extracts as much information as possible from a video file.
|
||||||
|
"""
|
||||||
|
from . import monkeypatch as _monkeypatch
|
||||||
|
|
||||||
|
from .api import guessit, GuessItApi
|
||||||
|
from .options import ConfigurationException
|
||||||
|
from .rules.common.quantity import Size
|
||||||
|
|
||||||
|
from .__version__ import __version__
|
||||||
|
|
||||||
|
_monkeypatch.monkeypatch_rebulk()
|
||||||
180
lib/guessit/__main__.py
Normal file
180
lib/guessit/__main__.py
Normal file
@@ -0,0 +1,180 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
"""
|
||||||
|
Entry point module
|
||||||
|
"""
|
||||||
|
# pragma: no cover
|
||||||
|
from __future__ import print_function
|
||||||
|
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
|
||||||
|
import six
|
||||||
|
from rebulk.__version__ import __version__ as __rebulk_version__
|
||||||
|
|
||||||
|
from guessit import api
|
||||||
|
from guessit.__version__ import __version__
|
||||||
|
from guessit.jsonutils import GuessitEncoder
|
||||||
|
from guessit.options import argument_parser, parse_options, load_config, merge_options
|
||||||
|
|
||||||
|
|
||||||
|
try:
|
||||||
|
from collections import OrderedDict
|
||||||
|
except ImportError: # pragma: no-cover
|
||||||
|
from ordereddict import OrderedDict # pylint:disable=import-error
|
||||||
|
|
||||||
|
|
||||||
|
def guess_filename(filename, options):
|
||||||
|
"""
|
||||||
|
Guess a single filename using given options
|
||||||
|
:param filename: filename to parse
|
||||||
|
:type filename: str
|
||||||
|
:param options:
|
||||||
|
:type options: dict
|
||||||
|
:return:
|
||||||
|
:rtype:
|
||||||
|
"""
|
||||||
|
if not options.get('yaml') and not options.get('json') and not options.get('show_property'):
|
||||||
|
print('For:', filename)
|
||||||
|
|
||||||
|
guess = api.guessit(filename, options)
|
||||||
|
|
||||||
|
if options.get('show_property'):
|
||||||
|
print(guess.get(options.get('show_property'), ''))
|
||||||
|
return
|
||||||
|
|
||||||
|
if options.get('json'):
|
||||||
|
print(json.dumps(guess, cls=GuessitEncoder, ensure_ascii=False))
|
||||||
|
elif options.get('yaml'):
|
||||||
|
import yaml
|
||||||
|
from guessit import yamlutils
|
||||||
|
|
||||||
|
ystr = yaml.dump({filename: OrderedDict(guess)}, Dumper=yamlutils.CustomDumper, default_flow_style=False,
|
||||||
|
allow_unicode=True)
|
||||||
|
i = 0
|
||||||
|
for yline in ystr.splitlines():
|
||||||
|
if i == 0:
|
||||||
|
print("? " + yline[:-1])
|
||||||
|
elif i == 1:
|
||||||
|
print(":" + yline[1:])
|
||||||
|
else:
|
||||||
|
print(yline)
|
||||||
|
i += 1
|
||||||
|
else:
|
||||||
|
print('GuessIt found:', json.dumps(guess, cls=GuessitEncoder, indent=4, ensure_ascii=False))
|
||||||
|
|
||||||
|
|
||||||
|
def display_properties(options):
|
||||||
|
"""
|
||||||
|
Display properties
|
||||||
|
"""
|
||||||
|
properties = api.properties(options)
|
||||||
|
|
||||||
|
if options.get('json'):
|
||||||
|
if options.get('values'):
|
||||||
|
print(json.dumps(properties, cls=GuessitEncoder, ensure_ascii=False))
|
||||||
|
else:
|
||||||
|
print(json.dumps(list(properties.keys()), cls=GuessitEncoder, ensure_ascii=False))
|
||||||
|
elif options.get('yaml'):
|
||||||
|
import yaml
|
||||||
|
from guessit import yamlutils
|
||||||
|
if options.get('values'):
|
||||||
|
print(yaml.dump(properties, Dumper=yamlutils.CustomDumper, default_flow_style=False, allow_unicode=True))
|
||||||
|
else:
|
||||||
|
print(yaml.dump(list(properties.keys()), Dumper=yamlutils.CustomDumper, default_flow_style=False,
|
||||||
|
allow_unicode=True))
|
||||||
|
else:
|
||||||
|
print('GuessIt properties:')
|
||||||
|
|
||||||
|
properties_list = list(sorted(properties.keys()))
|
||||||
|
for property_name in properties_list:
|
||||||
|
property_values = properties.get(property_name)
|
||||||
|
print(2 * ' ' + '[+] %s' % (property_name,))
|
||||||
|
if property_values and options.get('values'):
|
||||||
|
for property_value in property_values:
|
||||||
|
print(4 * ' ' + '[!] %s' % (property_value,))
|
||||||
|
|
||||||
|
|
||||||
|
def fix_argv_encoding():
|
||||||
|
"""
|
||||||
|
Fix encoding of sys.argv on windows Python 2
|
||||||
|
"""
|
||||||
|
if six.PY2 and os.name == 'nt': # pragma: no cover
|
||||||
|
# see http://bugs.python.org/issue2128
|
||||||
|
import locale
|
||||||
|
|
||||||
|
for i, j in enumerate(sys.argv):
|
||||||
|
sys.argv[i] = j.decode(locale.getpreferredencoding())
|
||||||
|
|
||||||
|
|
||||||
|
def main(args=None): # pylint:disable=too-many-branches
|
||||||
|
"""
|
||||||
|
Main function for entry point
|
||||||
|
"""
|
||||||
|
fix_argv_encoding()
|
||||||
|
|
||||||
|
if args is None: # pragma: no cover
|
||||||
|
options = parse_options()
|
||||||
|
else:
|
||||||
|
options = parse_options(args)
|
||||||
|
|
||||||
|
config = load_config(options)
|
||||||
|
options = merge_options(config, options)
|
||||||
|
|
||||||
|
if options.get('verbose'):
|
||||||
|
logging.basicConfig(stream=sys.stdout, format='%(message)s')
|
||||||
|
logging.getLogger().setLevel(logging.DEBUG)
|
||||||
|
|
||||||
|
help_required = True
|
||||||
|
|
||||||
|
if options.get('version'):
|
||||||
|
print('+-------------------------------------------------------+')
|
||||||
|
print('+ GuessIt ' + __version__ + (28 - len(__version__)) * ' ' + '+')
|
||||||
|
print('+-------------------------------------------------------+')
|
||||||
|
print('+ Rebulk ' + __rebulk_version__ + (29 - len(__rebulk_version__)) * ' ' + '+')
|
||||||
|
print('+-------------------------------------------------------+')
|
||||||
|
print('| Please report any bug or feature request at |')
|
||||||
|
print('| https://github.com/guessit-io/guessit/issues. |')
|
||||||
|
print('+-------------------------------------------------------+')
|
||||||
|
help_required = False
|
||||||
|
|
||||||
|
if options.get('yaml'):
|
||||||
|
try:
|
||||||
|
import yaml # pylint:disable=unused-variable,unused-import
|
||||||
|
except ImportError: # pragma: no cover
|
||||||
|
del options['yaml']
|
||||||
|
print('PyYAML is not installed. \'--yaml\' option will be ignored ...', file=sys.stderr)
|
||||||
|
|
||||||
|
if options.get('properties') or options.get('values'):
|
||||||
|
display_properties(options)
|
||||||
|
help_required = False
|
||||||
|
|
||||||
|
filenames = []
|
||||||
|
if options.get('filename'):
|
||||||
|
for filename in options.get('filename'):
|
||||||
|
filenames.append(filename)
|
||||||
|
if options.get('input_file'):
|
||||||
|
if six.PY2:
|
||||||
|
input_file = open(options.get('input_file'), 'r')
|
||||||
|
else:
|
||||||
|
input_file = open(options.get('input_file'), 'r', encoding='utf-8')
|
||||||
|
try:
|
||||||
|
filenames.extend([line.strip() for line in input_file.readlines()])
|
||||||
|
finally:
|
||||||
|
input_file.close()
|
||||||
|
|
||||||
|
filenames = list(filter(lambda f: f, filenames))
|
||||||
|
|
||||||
|
if filenames:
|
||||||
|
for filename in filenames:
|
||||||
|
help_required = False
|
||||||
|
guess_filename(filename, options)
|
||||||
|
|
||||||
|
if help_required: # pragma: no cover
|
||||||
|
argument_parser.print_help()
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__': # pragma: no cover
|
||||||
|
main()
|
||||||
7
lib/guessit/__version__.py
Normal file
7
lib/guessit/__version__.py
Normal file
@@ -0,0 +1,7 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
"""
|
||||||
|
Version module
|
||||||
|
"""
|
||||||
|
# pragma: no cover
|
||||||
|
__version__ = '3.1.2.dev0'
|
||||||
263
lib/guessit/api.py
Normal file
263
lib/guessit/api.py
Normal file
@@ -0,0 +1,263 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
"""
|
||||||
|
API functions that can be used by external software
|
||||||
|
"""
|
||||||
|
|
||||||
|
try:
|
||||||
|
from collections import OrderedDict
|
||||||
|
except ImportError: # pragma: no-cover
|
||||||
|
from ordereddict import OrderedDict # pylint:disable=import-error
|
||||||
|
|
||||||
|
import os
|
||||||
|
import traceback
|
||||||
|
|
||||||
|
import six
|
||||||
|
from rebulk.introspector import introspect
|
||||||
|
|
||||||
|
from .__version__ import __version__
|
||||||
|
from .options import parse_options, load_config, merge_options
|
||||||
|
from .rules import rebulk_builder
|
||||||
|
|
||||||
|
|
||||||
|
class GuessitException(Exception):
|
||||||
|
"""
|
||||||
|
Exception raised when guessit fails to perform a guess because of an internal error.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, string, options):
|
||||||
|
super(GuessitException, self).__init__("An internal error has occured in guessit.\n"
|
||||||
|
"===================== Guessit Exception Report =====================\n"
|
||||||
|
"version=%s\n"
|
||||||
|
"string=%s\n"
|
||||||
|
"options=%s\n"
|
||||||
|
"--------------------------------------------------------------------\n"
|
||||||
|
"%s"
|
||||||
|
"--------------------------------------------------------------------\n"
|
||||||
|
"Please report at "
|
||||||
|
"https://github.com/guessit-io/guessit/issues.\n"
|
||||||
|
"====================================================================" %
|
||||||
|
(__version__, str(string), str(options), traceback.format_exc()))
|
||||||
|
|
||||||
|
self.string = string
|
||||||
|
self.options = options
|
||||||
|
|
||||||
|
|
||||||
|
def configure(options=None, rules_builder=rebulk_builder, force=False):
|
||||||
|
"""
|
||||||
|
Load configuration files and initialize rebulk rules if required.
|
||||||
|
|
||||||
|
:param options:
|
||||||
|
:type options: dict
|
||||||
|
:param rules_builder:
|
||||||
|
:type rules_builder:
|
||||||
|
:param force:
|
||||||
|
:type force: bool
|
||||||
|
:return:
|
||||||
|
"""
|
||||||
|
default_api.configure(options, rules_builder=rules_builder, force=force)
|
||||||
|
|
||||||
|
|
||||||
|
def guessit(string, options=None):
|
||||||
|
"""
|
||||||
|
Retrieves all matches from string as a dict
|
||||||
|
:param string: the filename or release name
|
||||||
|
:type string: str
|
||||||
|
:param options:
|
||||||
|
:type options: str|dict
|
||||||
|
:return:
|
||||||
|
:rtype:
|
||||||
|
"""
|
||||||
|
return default_api.guessit(string, options)
|
||||||
|
|
||||||
|
|
||||||
|
def properties(options=None):
|
||||||
|
"""
|
||||||
|
Retrieves all properties with possible values that can be guessed
|
||||||
|
:param options:
|
||||||
|
:type options: str|dict
|
||||||
|
:return:
|
||||||
|
:rtype:
|
||||||
|
"""
|
||||||
|
return default_api.properties(options)
|
||||||
|
|
||||||
|
|
||||||
|
def suggested_expected(titles, options=None):
|
||||||
|
"""
|
||||||
|
Return a list of suggested titles to be used as `expected_title` based on the list of titles
|
||||||
|
:param titles: the filename or release name
|
||||||
|
:type titles: list|set|dict
|
||||||
|
:param options:
|
||||||
|
:type options: str|dict
|
||||||
|
:return:
|
||||||
|
:rtype: list of str
|
||||||
|
"""
|
||||||
|
return default_api.suggested_expected(titles, options)
|
||||||
|
|
||||||
|
|
||||||
|
class GuessItApi(object):
|
||||||
|
"""
|
||||||
|
An api class that can be configured with custom Rebulk configuration.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
"""Default constructor."""
|
||||||
|
self.rebulk = None
|
||||||
|
self.config = None
|
||||||
|
self.load_config_options = None
|
||||||
|
self.advanced_config = None
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def _fix_encoding(cls, value):
|
||||||
|
if isinstance(value, list):
|
||||||
|
return [cls._fix_encoding(item) for item in value]
|
||||||
|
if isinstance(value, dict):
|
||||||
|
return {cls._fix_encoding(k): cls._fix_encoding(v) for k, v in value.items()}
|
||||||
|
if six.PY2 and isinstance(value, six.text_type):
|
||||||
|
return value.encode('utf-8')
|
||||||
|
if six.PY3 and isinstance(value, six.binary_type):
|
||||||
|
return value.decode('ascii')
|
||||||
|
return value
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def _has_same_properties(cls, dic1, dic2, values):
|
||||||
|
for value in values:
|
||||||
|
if dic1.get(value) != dic2.get(value):
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
|
||||||
|
def configure(self, options=None, rules_builder=rebulk_builder, force=False, sanitize_options=True):
|
||||||
|
"""
|
||||||
|
Load configuration files and initialize rebulk rules if required.
|
||||||
|
|
||||||
|
:param options:
|
||||||
|
:type options: str|dict
|
||||||
|
:param rules_builder:
|
||||||
|
:type rules_builder:
|
||||||
|
:param force:
|
||||||
|
:type force: bool
|
||||||
|
:return:
|
||||||
|
:rtype: dict
|
||||||
|
"""
|
||||||
|
if sanitize_options:
|
||||||
|
options = parse_options(options, True)
|
||||||
|
options = self._fix_encoding(options)
|
||||||
|
|
||||||
|
if self.config is None or self.load_config_options is None or force or \
|
||||||
|
not self._has_same_properties(self.load_config_options,
|
||||||
|
options,
|
||||||
|
['config', 'no_user_config', 'no_default_config']):
|
||||||
|
config = load_config(options)
|
||||||
|
config = self._fix_encoding(config)
|
||||||
|
self.load_config_options = options
|
||||||
|
else:
|
||||||
|
config = self.config
|
||||||
|
|
||||||
|
advanced_config = merge_options(config.get('advanced_config'), options.get('advanced_config'))
|
||||||
|
|
||||||
|
should_build_rebulk = force or not self.rebulk or not self.advanced_config or \
|
||||||
|
self.advanced_config != advanced_config
|
||||||
|
|
||||||
|
if should_build_rebulk:
|
||||||
|
self.advanced_config = advanced_config
|
||||||
|
self.rebulk = rules_builder(advanced_config)
|
||||||
|
|
||||||
|
self.config = config
|
||||||
|
return self.config
|
||||||
|
|
||||||
|
def guessit(self, string, options=None): # pylint: disable=too-many-branches
|
||||||
|
"""
|
||||||
|
Retrieves all matches from string as a dict
|
||||||
|
:param string: the filename or release name
|
||||||
|
:type string: str|Path
|
||||||
|
:param options:
|
||||||
|
:type options: str|dict
|
||||||
|
:return:
|
||||||
|
:rtype:
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
from pathlib import Path
|
||||||
|
if isinstance(string, Path):
|
||||||
|
try:
|
||||||
|
# Handle path-like object
|
||||||
|
string = os.fspath(string)
|
||||||
|
except AttributeError:
|
||||||
|
string = str(string)
|
||||||
|
except ImportError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
try:
|
||||||
|
options = parse_options(options, True)
|
||||||
|
options = self._fix_encoding(options)
|
||||||
|
config = self.configure(options, sanitize_options=False)
|
||||||
|
options = merge_options(config, options)
|
||||||
|
result_decode = False
|
||||||
|
result_encode = False
|
||||||
|
|
||||||
|
if six.PY2:
|
||||||
|
if isinstance(string, six.text_type):
|
||||||
|
string = string.encode("utf-8")
|
||||||
|
result_decode = True
|
||||||
|
elif isinstance(string, six.binary_type):
|
||||||
|
string = six.binary_type(string)
|
||||||
|
if six.PY3:
|
||||||
|
if isinstance(string, six.binary_type):
|
||||||
|
string = string.decode('ascii')
|
||||||
|
result_encode = True
|
||||||
|
elif isinstance(string, six.text_type):
|
||||||
|
string = six.text_type(string)
|
||||||
|
|
||||||
|
matches = self.rebulk.matches(string, options)
|
||||||
|
if result_decode:
|
||||||
|
for match in matches:
|
||||||
|
if isinstance(match.value, six.binary_type):
|
||||||
|
match.value = match.value.decode("utf-8")
|
||||||
|
if result_encode:
|
||||||
|
for match in matches:
|
||||||
|
if isinstance(match.value, six.text_type):
|
||||||
|
match.value = match.value.encode("ascii")
|
||||||
|
return matches.to_dict(options.get('advanced', False), options.get('single_value', False),
|
||||||
|
options.get('enforce_list', False))
|
||||||
|
except:
|
||||||
|
raise GuessitException(string, options)
|
||||||
|
|
||||||
|
def properties(self, options=None):
|
||||||
|
"""
|
||||||
|
Grab properties and values that can be generated.
|
||||||
|
:param options:
|
||||||
|
:type options:
|
||||||
|
:return:
|
||||||
|
:rtype:
|
||||||
|
"""
|
||||||
|
options = parse_options(options, True)
|
||||||
|
options = self._fix_encoding(options)
|
||||||
|
config = self.configure(options, sanitize_options=False)
|
||||||
|
options = merge_options(config, options)
|
||||||
|
unordered = introspect(self.rebulk, options).properties
|
||||||
|
ordered = OrderedDict()
|
||||||
|
for k in sorted(unordered.keys(), key=six.text_type):
|
||||||
|
ordered[k] = list(sorted(unordered[k], key=six.text_type))
|
||||||
|
if hasattr(self.rebulk, 'customize_properties'):
|
||||||
|
ordered = self.rebulk.customize_properties(ordered)
|
||||||
|
return ordered
|
||||||
|
|
||||||
|
def suggested_expected(self, titles, options=None):
|
||||||
|
"""
|
||||||
|
Return a list of suggested titles to be used as `expected_title` based on the list of titles
|
||||||
|
:param titles: the filename or release name
|
||||||
|
:type titles: list|set|dict
|
||||||
|
:param options:
|
||||||
|
:type options: str|dict
|
||||||
|
:return:
|
||||||
|
:rtype: list of str
|
||||||
|
"""
|
||||||
|
suggested = []
|
||||||
|
for title in titles:
|
||||||
|
guess = self.guessit(title, options)
|
||||||
|
if len(guess) != 2 or 'title' not in guess:
|
||||||
|
suggested.append(title)
|
||||||
|
|
||||||
|
return suggested
|
||||||
|
|
||||||
|
|
||||||
|
default_api = GuessItApi()
|
||||||
27
lib/guessit/backports.py
Normal file
27
lib/guessit/backports.py
Normal file
@@ -0,0 +1,27 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
"""
|
||||||
|
Backports
|
||||||
|
"""
|
||||||
|
# pragma: no-cover
|
||||||
|
# pylint: skip-file
|
||||||
|
|
||||||
|
def cmp_to_key(mycmp):
|
||||||
|
"""functools.cmp_to_key backport"""
|
||||||
|
class KeyClass(object):
|
||||||
|
"""Key class"""
|
||||||
|
def __init__(self, obj, *args): # pylint: disable=unused-argument
|
||||||
|
self.obj = obj
|
||||||
|
def __lt__(self, other):
|
||||||
|
return mycmp(self.obj, other.obj) < 0
|
||||||
|
def __gt__(self, other):
|
||||||
|
return mycmp(self.obj, other.obj) > 0
|
||||||
|
def __eq__(self, other):
|
||||||
|
return mycmp(self.obj, other.obj) == 0
|
||||||
|
def __le__(self, other):
|
||||||
|
return mycmp(self.obj, other.obj) <= 0
|
||||||
|
def __ge__(self, other):
|
||||||
|
return mycmp(self.obj, other.obj) >= 0
|
||||||
|
def __ne__(self, other):
|
||||||
|
return mycmp(self.obj, other.obj) != 0
|
||||||
|
return KeyClass
|
||||||
586
lib/guessit/config/options.json
Normal file
586
lib/guessit/config/options.json
Normal file
@@ -0,0 +1,586 @@
|
|||||||
|
{
|
||||||
|
"expected_title": [
|
||||||
|
"OSS 117",
|
||||||
|
"This is Us"
|
||||||
|
],
|
||||||
|
"allowed_countries": [
|
||||||
|
"au",
|
||||||
|
"gb",
|
||||||
|
"us"
|
||||||
|
],
|
||||||
|
"allowed_languages": [
|
||||||
|
"ca",
|
||||||
|
"cs",
|
||||||
|
"de",
|
||||||
|
"en",
|
||||||
|
"es",
|
||||||
|
"fr",
|
||||||
|
"he",
|
||||||
|
"hi",
|
||||||
|
"hu",
|
||||||
|
"it",
|
||||||
|
"ja",
|
||||||
|
"ko",
|
||||||
|
"mul",
|
||||||
|
"nl",
|
||||||
|
"no",
|
||||||
|
"pl",
|
||||||
|
"pt",
|
||||||
|
"ro",
|
||||||
|
"ru",
|
||||||
|
"sv",
|
||||||
|
"te",
|
||||||
|
"uk",
|
||||||
|
"und"
|
||||||
|
],
|
||||||
|
"advanced_config": {
|
||||||
|
"common_words": [
|
||||||
|
"ca",
|
||||||
|
"cat",
|
||||||
|
"de",
|
||||||
|
"he",
|
||||||
|
"it",
|
||||||
|
"no",
|
||||||
|
"por",
|
||||||
|
"rum",
|
||||||
|
"se",
|
||||||
|
"st",
|
||||||
|
"sub"
|
||||||
|
],
|
||||||
|
"groups": {
|
||||||
|
"starting": "([{",
|
||||||
|
"ending": ")]}"
|
||||||
|
},
|
||||||
|
"audio_codec": {
|
||||||
|
"audio_channels": {
|
||||||
|
"1.0": [
|
||||||
|
"1ch",
|
||||||
|
"mono"
|
||||||
|
],
|
||||||
|
"2.0": [
|
||||||
|
"2ch",
|
||||||
|
"stereo",
|
||||||
|
"re:(2[\\W_]0(?:ch)?)(?=[^\\d]|$)"
|
||||||
|
],
|
||||||
|
"5.1": [
|
||||||
|
"5ch",
|
||||||
|
"6ch",
|
||||||
|
"re:(5[\\W_][01](?:ch)?)(?=[^\\d]|$)",
|
||||||
|
"re:(6[\\W_]0(?:ch)?)(?=[^\\d]|$)"
|
||||||
|
],
|
||||||
|
"7.1": [
|
||||||
|
"7ch",
|
||||||
|
"8ch",
|
||||||
|
"re:(7[\\W_][01](?:ch)?)(?=[^\\d]|$)"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"container": {
|
||||||
|
"subtitles": [
|
||||||
|
"srt",
|
||||||
|
"idx",
|
||||||
|
"sub",
|
||||||
|
"ssa",
|
||||||
|
"ass"
|
||||||
|
],
|
||||||
|
"info": [
|
||||||
|
"nfo"
|
||||||
|
],
|
||||||
|
"videos": [
|
||||||
|
"3g2",
|
||||||
|
"3gp",
|
||||||
|
"3gp2",
|
||||||
|
"asf",
|
||||||
|
"avi",
|
||||||
|
"divx",
|
||||||
|
"flv",
|
||||||
|
"iso",
|
||||||
|
"m4v",
|
||||||
|
"mk2",
|
||||||
|
"mk3d",
|
||||||
|
"mka",
|
||||||
|
"mkv",
|
||||||
|
"mov",
|
||||||
|
"mp4",
|
||||||
|
"mp4a",
|
||||||
|
"mpeg",
|
||||||
|
"mpg",
|
||||||
|
"ogg",
|
||||||
|
"ogm",
|
||||||
|
"ogv",
|
||||||
|
"qt",
|
||||||
|
"ra",
|
||||||
|
"ram",
|
||||||
|
"rm",
|
||||||
|
"ts",
|
||||||
|
"vob",
|
||||||
|
"wav",
|
||||||
|
"webm",
|
||||||
|
"wma",
|
||||||
|
"wmv"
|
||||||
|
],
|
||||||
|
"torrent": [
|
||||||
|
"torrent"
|
||||||
|
],
|
||||||
|
"nzb": [
|
||||||
|
"nzb"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"country": {
|
||||||
|
"synonyms": {
|
||||||
|
"ES": [
|
||||||
|
"españa"
|
||||||
|
],
|
||||||
|
"GB": [
|
||||||
|
"UK"
|
||||||
|
],
|
||||||
|
"BR": [
|
||||||
|
"brazilian",
|
||||||
|
"bra"
|
||||||
|
],
|
||||||
|
"CA": [
|
||||||
|
"québec",
|
||||||
|
"quebec",
|
||||||
|
"qc"
|
||||||
|
],
|
||||||
|
"MX": [
|
||||||
|
"Latinoamérica",
|
||||||
|
"latin america"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"episodes": {
|
||||||
|
"season_max_range": 100,
|
||||||
|
"episode_max_range": 100,
|
||||||
|
"max_range_gap": 1,
|
||||||
|
"season_markers": [
|
||||||
|
"s"
|
||||||
|
],
|
||||||
|
"season_ep_markers": [
|
||||||
|
"x"
|
||||||
|
],
|
||||||
|
"disc_markers": [
|
||||||
|
"d"
|
||||||
|
],
|
||||||
|
"episode_markers": [
|
||||||
|
"xe",
|
||||||
|
"ex",
|
||||||
|
"ep",
|
||||||
|
"e",
|
||||||
|
"x"
|
||||||
|
],
|
||||||
|
"range_separators": [
|
||||||
|
"-",
|
||||||
|
"~",
|
||||||
|
"to",
|
||||||
|
"a"
|
||||||
|
],
|
||||||
|
"discrete_separators": [
|
||||||
|
"+",
|
||||||
|
"&",
|
||||||
|
"and",
|
||||||
|
"et"
|
||||||
|
],
|
||||||
|
"season_words": [
|
||||||
|
"season",
|
||||||
|
"saison",
|
||||||
|
"seizoen",
|
||||||
|
"seasons",
|
||||||
|
"saisons",
|
||||||
|
"tem",
|
||||||
|
"temp",
|
||||||
|
"temporada",
|
||||||
|
"temporadas",
|
||||||
|
"stagione"
|
||||||
|
],
|
||||||
|
"episode_words": [
|
||||||
|
"episode",
|
||||||
|
"episodes",
|
||||||
|
"eps",
|
||||||
|
"ep",
|
||||||
|
"episodio",
|
||||||
|
"episodios",
|
||||||
|
"capitulo",
|
||||||
|
"capitulos"
|
||||||
|
],
|
||||||
|
"of_words": [
|
||||||
|
"of",
|
||||||
|
"sur"
|
||||||
|
],
|
||||||
|
"all_words": [
|
||||||
|
"All"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"language": {
|
||||||
|
"synonyms": {
|
||||||
|
"ell": [
|
||||||
|
"gr",
|
||||||
|
"greek"
|
||||||
|
],
|
||||||
|
"spa": [
|
||||||
|
"esp",
|
||||||
|
"español",
|
||||||
|
"espanol"
|
||||||
|
],
|
||||||
|
"fra": [
|
||||||
|
"français",
|
||||||
|
"vf",
|
||||||
|
"vff",
|
||||||
|
"vfi",
|
||||||
|
"vfq"
|
||||||
|
],
|
||||||
|
"swe": [
|
||||||
|
"se"
|
||||||
|
],
|
||||||
|
"por_BR": [
|
||||||
|
"po",
|
||||||
|
"pb",
|
||||||
|
"pob",
|
||||||
|
"ptbr",
|
||||||
|
"br",
|
||||||
|
"brazilian"
|
||||||
|
],
|
||||||
|
"deu_CH": [
|
||||||
|
"swissgerman",
|
||||||
|
"swiss german"
|
||||||
|
],
|
||||||
|
"nld_BE": [
|
||||||
|
"flemish"
|
||||||
|
],
|
||||||
|
"cat": [
|
||||||
|
"català",
|
||||||
|
"castellano",
|
||||||
|
"espanol castellano",
|
||||||
|
"español castellano"
|
||||||
|
],
|
||||||
|
"ces": [
|
||||||
|
"cz"
|
||||||
|
],
|
||||||
|
"ukr": [
|
||||||
|
"ua"
|
||||||
|
],
|
||||||
|
"zho": [
|
||||||
|
"cn"
|
||||||
|
],
|
||||||
|
"jpn": [
|
||||||
|
"jp"
|
||||||
|
],
|
||||||
|
"hrv": [
|
||||||
|
"scr"
|
||||||
|
],
|
||||||
|
"mul": [
|
||||||
|
"multi",
|
||||||
|
"dl"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"subtitle_affixes": [
|
||||||
|
"sub",
|
||||||
|
"subs",
|
||||||
|
"esub",
|
||||||
|
"esubs",
|
||||||
|
"subbed",
|
||||||
|
"custom subbed",
|
||||||
|
"custom subs",
|
||||||
|
"custom sub",
|
||||||
|
"customsubbed",
|
||||||
|
"customsubs",
|
||||||
|
"customsub",
|
||||||
|
"soft subtitles",
|
||||||
|
"soft subs"
|
||||||
|
],
|
||||||
|
"subtitle_prefixes": [
|
||||||
|
"st",
|
||||||
|
"vost",
|
||||||
|
"subforced",
|
||||||
|
"fansub",
|
||||||
|
"hardsub",
|
||||||
|
"legenda",
|
||||||
|
"legendas",
|
||||||
|
"legendado",
|
||||||
|
"subtitulado",
|
||||||
|
"soft",
|
||||||
|
"subtitles"
|
||||||
|
],
|
||||||
|
"subtitle_suffixes": [
|
||||||
|
"subforced",
|
||||||
|
"fansub",
|
||||||
|
"hardsub"
|
||||||
|
],
|
||||||
|
"language_affixes": [
|
||||||
|
"dublado",
|
||||||
|
"dubbed",
|
||||||
|
"dub"
|
||||||
|
],
|
||||||
|
"language_prefixes": [
|
||||||
|
"true"
|
||||||
|
],
|
||||||
|
"language_suffixes": [
|
||||||
|
"audio"
|
||||||
|
],
|
||||||
|
"weak_affixes": [
|
||||||
|
"v",
|
||||||
|
"audio",
|
||||||
|
"true"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"part": {
|
||||||
|
"prefixes": [
|
||||||
|
"pt",
|
||||||
|
"part"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"release_group": {
|
||||||
|
"forbidden_names": [
|
||||||
|
"bonus",
|
||||||
|
"by",
|
||||||
|
"for",
|
||||||
|
"par",
|
||||||
|
"pour",
|
||||||
|
"rip"
|
||||||
|
],
|
||||||
|
"ignored_seps": "[]{}()"
|
||||||
|
},
|
||||||
|
"screen_size": {
|
||||||
|
"frame_rates": [
|
||||||
|
"23.976",
|
||||||
|
"24",
|
||||||
|
"25",
|
||||||
|
"29.970",
|
||||||
|
"30",
|
||||||
|
"48",
|
||||||
|
"50",
|
||||||
|
"60",
|
||||||
|
"120"
|
||||||
|
],
|
||||||
|
"min_ar": 1.333,
|
||||||
|
"max_ar": 1.898,
|
||||||
|
"interlaced": [
|
||||||
|
"360",
|
||||||
|
"480",
|
||||||
|
"576",
|
||||||
|
"900",
|
||||||
|
"1080"
|
||||||
|
],
|
||||||
|
"progressive": [
|
||||||
|
"360",
|
||||||
|
"480",
|
||||||
|
"540",
|
||||||
|
"576",
|
||||||
|
"900",
|
||||||
|
"1080",
|
||||||
|
"368",
|
||||||
|
"720",
|
||||||
|
"1440",
|
||||||
|
"2160",
|
||||||
|
"4320"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"website": {
|
||||||
|
"safe_tlds": [
|
||||||
|
"com",
|
||||||
|
"net",
|
||||||
|
"org"
|
||||||
|
],
|
||||||
|
"safe_subdomains": [
|
||||||
|
"www"
|
||||||
|
],
|
||||||
|
"safe_prefixes": [
|
||||||
|
"co",
|
||||||
|
"com",
|
||||||
|
"net",
|
||||||
|
"org"
|
||||||
|
],
|
||||||
|
"prefixes": [
|
||||||
|
"from"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"streaming_service": {
|
||||||
|
"A&E": [
|
||||||
|
"AE",
|
||||||
|
"A&E"
|
||||||
|
],
|
||||||
|
"ABC": "AMBC",
|
||||||
|
"ABC Australia": "AUBC",
|
||||||
|
"Al Jazeera English": "AJAZ",
|
||||||
|
"AMC": "AMC",
|
||||||
|
"Amazon Prime": [
|
||||||
|
"AMZN",
|
||||||
|
"Amazon",
|
||||||
|
"re:Amazon-?Prime"
|
||||||
|
],
|
||||||
|
"Adult Swim": [
|
||||||
|
"AS",
|
||||||
|
"re:Adult-?Swim"
|
||||||
|
],
|
||||||
|
"America's Test Kitchen": "ATK",
|
||||||
|
"Animal Planet": "ANPL",
|
||||||
|
"AnimeLab": "ANLB",
|
||||||
|
"AOL": "AOL",
|
||||||
|
"ARD": "ARD",
|
||||||
|
"BBC iPlayer": [
|
||||||
|
"iP",
|
||||||
|
"re:BBC-?iPlayer"
|
||||||
|
],
|
||||||
|
"BravoTV": "BRAV",
|
||||||
|
"Canal+": "CNLP",
|
||||||
|
"Cartoon Network": "CN",
|
||||||
|
"CBC": "CBC",
|
||||||
|
"CBS": "CBS",
|
||||||
|
"CNBC": "CNBC",
|
||||||
|
"Comedy Central": [
|
||||||
|
"CC",
|
||||||
|
"re:Comedy-?Central"
|
||||||
|
],
|
||||||
|
"Channel 4": "4OD",
|
||||||
|
"CHRGD": "CHGD",
|
||||||
|
"Cinemax": "CMAX",
|
||||||
|
"Country Music Television": "CMT",
|
||||||
|
"Comedians in Cars Getting Coffee": "CCGC",
|
||||||
|
"Crunchy Roll": [
|
||||||
|
"CR",
|
||||||
|
"re:Crunchy-?Roll"
|
||||||
|
],
|
||||||
|
"Crackle": "CRKL",
|
||||||
|
"CSpan": "CSPN",
|
||||||
|
"CTV": "CTV",
|
||||||
|
"CuriosityStream": "CUR",
|
||||||
|
"CWSeed": "CWS",
|
||||||
|
"Daisuki": "DSKI",
|
||||||
|
"DC Universe": "DCU",
|
||||||
|
"Deadhouse Films": "DHF",
|
||||||
|
"DramaFever": [
|
||||||
|
"DF",
|
||||||
|
"DramaFever"
|
||||||
|
],
|
||||||
|
"Digiturk Diledigin Yerde": "DDY",
|
||||||
|
"Discovery": [
|
||||||
|
"DISC",
|
||||||
|
"Discovery"
|
||||||
|
],
|
||||||
|
"Disney": [
|
||||||
|
"DSNY",
|
||||||
|
"Disney"
|
||||||
|
],
|
||||||
|
"DIY Network": "DIY",
|
||||||
|
"Doc Club": "DOCC",
|
||||||
|
"DPlay": "DPLY",
|
||||||
|
"E!": "ETV",
|
||||||
|
"ePix": "EPIX",
|
||||||
|
"El Trece": "ETTV",
|
||||||
|
"ESPN": "ESPN",
|
||||||
|
"Esquire": "ESQ",
|
||||||
|
"Family": "FAM",
|
||||||
|
"Family Jr": "FJR",
|
||||||
|
"Food Network": "FOOD",
|
||||||
|
"Fox": "FOX",
|
||||||
|
"Freeform": "FREE",
|
||||||
|
"FYI Network": "FYI",
|
||||||
|
"Global": "GLBL",
|
||||||
|
"GloboSat Play": "GLOB",
|
||||||
|
"Hallmark": "HLMK",
|
||||||
|
"HBO Go": [
|
||||||
|
"HBO",
|
||||||
|
"re:HBO-?Go"
|
||||||
|
],
|
||||||
|
"HGTV": "HGTV",
|
||||||
|
"History": [
|
||||||
|
"HIST",
|
||||||
|
"History"
|
||||||
|
],
|
||||||
|
"Hulu": "HULU",
|
||||||
|
"Investigation Discovery": "ID",
|
||||||
|
"IFC": "IFC",
|
||||||
|
"iTunes": "iTunes",
|
||||||
|
"ITV": "ITV",
|
||||||
|
"Knowledge Network": "KNOW",
|
||||||
|
"Lifetime": "LIFE",
|
||||||
|
"Motor Trend OnDemand": "MTOD",
|
||||||
|
"MBC": [
|
||||||
|
"MBC",
|
||||||
|
"MBCVOD"
|
||||||
|
],
|
||||||
|
"MSNBC": "MNBC",
|
||||||
|
"MTV": "MTV",
|
||||||
|
"National Geographic": [
|
||||||
|
"NATG",
|
||||||
|
"re:National-?Geographic"
|
||||||
|
],
|
||||||
|
"NBA TV": [
|
||||||
|
"NBA",
|
||||||
|
"re:NBA-?TV"
|
||||||
|
],
|
||||||
|
"NBC": "NBC",
|
||||||
|
"Netflix": [
|
||||||
|
"NF",
|
||||||
|
"Netflix"
|
||||||
|
],
|
||||||
|
"NFL": "NFL",
|
||||||
|
"NFL Now": "NFLN",
|
||||||
|
"NHL GameCenter": "GC",
|
||||||
|
"Nickelodeon": [
|
||||||
|
"NICK",
|
||||||
|
"Nickelodeon"
|
||||||
|
],
|
||||||
|
"Norsk Rikskringkasting": "NRK",
|
||||||
|
"OnDemandKorea": [
|
||||||
|
"ODK",
|
||||||
|
"OnDemandKorea"
|
||||||
|
],
|
||||||
|
"PBS": "PBS",
|
||||||
|
"PBS Kids": "PBSK",
|
||||||
|
"Playstation Network": "PSN",
|
||||||
|
"Pluzz": "PLUZ",
|
||||||
|
"RTE One": "RTE",
|
||||||
|
"SBS (AU)": "SBS",
|
||||||
|
"SeeSo": [
|
||||||
|
"SESO",
|
||||||
|
"SeeSo"
|
||||||
|
],
|
||||||
|
"Shomi": "SHMI",
|
||||||
|
"Spike": "SPIK",
|
||||||
|
"Spike TV": [
|
||||||
|
"SPKE",
|
||||||
|
"re:Spike-?TV"
|
||||||
|
],
|
||||||
|
"Sportsnet": "SNET",
|
||||||
|
"Sprout": "SPRT",
|
||||||
|
"Stan": "STAN",
|
||||||
|
"Starz": "STZ",
|
||||||
|
"Sveriges Television": "SVT",
|
||||||
|
"SwearNet": "SWER",
|
||||||
|
"Syfy": "SYFY",
|
||||||
|
"TBS": "TBS",
|
||||||
|
"TFou": "TFOU",
|
||||||
|
"The CW": [
|
||||||
|
"CW",
|
||||||
|
"re:The-?CW"
|
||||||
|
],
|
||||||
|
"TLC": "TLC",
|
||||||
|
"TubiTV": "TUBI",
|
||||||
|
"TV3 Ireland": "TV3",
|
||||||
|
"TV4 Sweeden": "TV4",
|
||||||
|
"TVING": "TVING",
|
||||||
|
"TV Land": [
|
||||||
|
"TVL",
|
||||||
|
"re:TV-?Land"
|
||||||
|
],
|
||||||
|
"UFC": "UFC",
|
||||||
|
"UKTV": "UKTV",
|
||||||
|
"Univision": "UNIV",
|
||||||
|
"USA Network": "USAN",
|
||||||
|
"Velocity": "VLCT",
|
||||||
|
"VH1": "VH1",
|
||||||
|
"Viceland": "VICE",
|
||||||
|
"Viki": "VIKI",
|
||||||
|
"Vimeo": "VMEO",
|
||||||
|
"VRV": "VRV",
|
||||||
|
"W Network": "WNET",
|
||||||
|
"WatchMe": "WME",
|
||||||
|
"WWE Network": "WWEN",
|
||||||
|
"Xbox Video": "XBOX",
|
||||||
|
"Yahoo": "YHOO",
|
||||||
|
"YouTube Red": "RED",
|
||||||
|
"ZDF": "ZDF"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
22
lib/guessit/jsonutils.py
Normal file
22
lib/guessit/jsonutils.py
Normal file
@@ -0,0 +1,22 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
"""
|
||||||
|
JSON Utils
|
||||||
|
"""
|
||||||
|
import json
|
||||||
|
|
||||||
|
from six import text_type
|
||||||
|
from rebulk.match import Match
|
||||||
|
|
||||||
|
class GuessitEncoder(json.JSONEncoder):
|
||||||
|
"""
|
||||||
|
JSON Encoder for guessit response
|
||||||
|
"""
|
||||||
|
|
||||||
|
def default(self, o): # pylint:disable=method-hidden
|
||||||
|
if isinstance(o, Match):
|
||||||
|
return o.advanced
|
||||||
|
if hasattr(o, 'name'): # Babelfish languages/countries long name
|
||||||
|
return text_type(o.name)
|
||||||
|
# pragma: no cover
|
||||||
|
return text_type(o)
|
||||||
34
lib/guessit/monkeypatch.py
Normal file
34
lib/guessit/monkeypatch.py
Normal file
@@ -0,0 +1,34 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
"""
|
||||||
|
Monkeypatch initialisation functions
|
||||||
|
"""
|
||||||
|
|
||||||
|
try:
|
||||||
|
from collections import OrderedDict
|
||||||
|
except ImportError: # pragma: no-cover
|
||||||
|
from ordereddict import OrderedDict # pylint:disable=import-error
|
||||||
|
|
||||||
|
from rebulk.match import Match
|
||||||
|
|
||||||
|
|
||||||
|
def monkeypatch_rebulk():
|
||||||
|
"""Monkeypatch rebulk classes"""
|
||||||
|
|
||||||
|
@property
|
||||||
|
def match_advanced(self):
|
||||||
|
"""
|
||||||
|
Build advanced dict from match
|
||||||
|
:param self:
|
||||||
|
:return:
|
||||||
|
"""
|
||||||
|
|
||||||
|
ret = OrderedDict()
|
||||||
|
ret['value'] = self.value
|
||||||
|
if self.raw:
|
||||||
|
ret['raw'] = self.raw
|
||||||
|
ret['start'] = self.start
|
||||||
|
ret['end'] = self.end
|
||||||
|
return ret
|
||||||
|
|
||||||
|
Match.advanced = match_advanced
|
||||||
295
lib/guessit/options.py
Normal file
295
lib/guessit/options.py
Normal file
@@ -0,0 +1,295 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
"""
|
||||||
|
Options
|
||||||
|
"""
|
||||||
|
import copy
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import pkgutil
|
||||||
|
import shlex
|
||||||
|
|
||||||
|
from argparse import ArgumentParser
|
||||||
|
|
||||||
|
import six
|
||||||
|
|
||||||
|
|
||||||
|
def build_argument_parser():
|
||||||
|
"""
|
||||||
|
Builds the argument parser
|
||||||
|
:return: the argument parser
|
||||||
|
:rtype: ArgumentParser
|
||||||
|
"""
|
||||||
|
opts = ArgumentParser()
|
||||||
|
opts.add_argument(dest='filename', help='Filename or release name to guess', nargs='*')
|
||||||
|
|
||||||
|
naming_opts = opts.add_argument_group("Naming")
|
||||||
|
naming_opts.add_argument('-t', '--type', dest='type', default=None,
|
||||||
|
help='The suggested file type: movie, episode. If undefined, type will be guessed.')
|
||||||
|
naming_opts.add_argument('-n', '--name-only', dest='name_only', action='store_true', default=None,
|
||||||
|
help='Parse files as name only, considering "/" and "\\" like other separators.')
|
||||||
|
naming_opts.add_argument('-Y', '--date-year-first', action='store_true', dest='date_year_first', default=None,
|
||||||
|
help='If short date is found, consider the first digits as the year.')
|
||||||
|
naming_opts.add_argument('-D', '--date-day-first', action='store_true', dest='date_day_first', default=None,
|
||||||
|
help='If short date is found, consider the second digits as the day.')
|
||||||
|
naming_opts.add_argument('-L', '--allowed-languages', action='append', dest='allowed_languages', default=None,
|
||||||
|
help='Allowed language (can be used multiple times)')
|
||||||
|
naming_opts.add_argument('-C', '--allowed-countries', action='append', dest='allowed_countries', default=None,
|
||||||
|
help='Allowed country (can be used multiple times)')
|
||||||
|
naming_opts.add_argument('-E', '--episode-prefer-number', action='store_true', dest='episode_prefer_number',
|
||||||
|
default=None,
|
||||||
|
help='Guess "serie.213.avi" as the episode 213. Without this option, '
|
||||||
|
'it will be guessed as season 2, episode 13')
|
||||||
|
naming_opts.add_argument('-T', '--expected-title', action='append', dest='expected_title', default=None,
|
||||||
|
help='Expected title to parse (can be used multiple times)')
|
||||||
|
naming_opts.add_argument('-G', '--expected-group', action='append', dest='expected_group', default=None,
|
||||||
|
help='Expected release group (can be used multiple times)')
|
||||||
|
naming_opts.add_argument('--includes', action='append', dest='includes', default=None,
|
||||||
|
help='List of properties to be detected')
|
||||||
|
naming_opts.add_argument('--excludes', action='append', dest='excludes', default=None,
|
||||||
|
help='List of properties to be ignored')
|
||||||
|
|
||||||
|
input_opts = opts.add_argument_group("Input")
|
||||||
|
input_opts.add_argument('-f', '--input-file', dest='input_file', default=None,
|
||||||
|
help='Read filenames from an input text file. File should use UTF-8 charset.')
|
||||||
|
|
||||||
|
output_opts = opts.add_argument_group("Output")
|
||||||
|
output_opts.add_argument('-v', '--verbose', action='store_true', dest='verbose', default=None,
|
||||||
|
help='Display debug output')
|
||||||
|
output_opts.add_argument('-P', '--show-property', dest='show_property', default=None,
|
||||||
|
help='Display the value of a single property (title, series, video_codec, year, ...)')
|
||||||
|
output_opts.add_argument('-a', '--advanced', dest='advanced', action='store_true', default=None,
|
||||||
|
help='Display advanced information for filename guesses, as json output')
|
||||||
|
output_opts.add_argument('-s', '--single-value', dest='single_value', action='store_true', default=None,
|
||||||
|
help='Keep only first value found for each property')
|
||||||
|
output_opts.add_argument('-l', '--enforce-list', dest='enforce_list', action='store_true', default=None,
|
||||||
|
help='Wrap each found value in a list even when property has a single value')
|
||||||
|
output_opts.add_argument('-j', '--json', dest='json', action='store_true', default=None,
|
||||||
|
help='Display information for filename guesses as json output')
|
||||||
|
output_opts.add_argument('-y', '--yaml', dest='yaml', action='store_true', default=None,
|
||||||
|
help='Display information for filename guesses as yaml output')
|
||||||
|
|
||||||
|
conf_opts = opts.add_argument_group("Configuration")
|
||||||
|
conf_opts.add_argument('-c', '--config', dest='config', action='append', default=None,
|
||||||
|
help='Filepath to configuration file. Configuration file contains the same '
|
||||||
|
'options as those from command line options, but option names have "-" characters '
|
||||||
|
'replaced with "_". This configuration will be merged with default and user '
|
||||||
|
'configuration files.')
|
||||||
|
conf_opts.add_argument('--no-user-config', dest='no_user_config', action='store_true',
|
||||||
|
default=None,
|
||||||
|
help='Disable user configuration. If not defined, guessit tries to read configuration files '
|
||||||
|
'at ~/.guessit/options.(json|yml|yaml) and ~/.config/guessit/options.(json|yml|yaml)')
|
||||||
|
conf_opts.add_argument('--no-default-config', dest='no_default_config', action='store_true',
|
||||||
|
default=None,
|
||||||
|
help='Disable default configuration. This should be done only if you are providing a full '
|
||||||
|
'configuration through user configuration or --config option. If no "advanced_config" '
|
||||||
|
'is provided by another configuration file, it will still be loaded from default '
|
||||||
|
'configuration.')
|
||||||
|
|
||||||
|
information_opts = opts.add_argument_group("Information")
|
||||||
|
information_opts.add_argument('-p', '--properties', dest='properties', action='store_true', default=None,
|
||||||
|
help='Display properties that can be guessed.')
|
||||||
|
information_opts.add_argument('-V', '--values', dest='values', action='store_true', default=None,
|
||||||
|
help='Display property values that can be guessed.')
|
||||||
|
information_opts.add_argument('--version', dest='version', action='store_true', default=None,
|
||||||
|
help='Display the guessit version.')
|
||||||
|
|
||||||
|
return opts
|
||||||
|
|
||||||
|
|
||||||
|
def parse_options(options=None, api=False):
|
||||||
|
"""
|
||||||
|
Parse given option string
|
||||||
|
|
||||||
|
:param options:
|
||||||
|
:type options:
|
||||||
|
:param api
|
||||||
|
:type api: boolean
|
||||||
|
:return:
|
||||||
|
:rtype:
|
||||||
|
"""
|
||||||
|
if isinstance(options, six.string_types):
|
||||||
|
args = shlex.split(options)
|
||||||
|
options = vars(argument_parser.parse_args(args))
|
||||||
|
elif options is None:
|
||||||
|
if api:
|
||||||
|
options = {}
|
||||||
|
else:
|
||||||
|
options = vars(argument_parser.parse_args())
|
||||||
|
elif not isinstance(options, dict):
|
||||||
|
options = vars(argument_parser.parse_args(options))
|
||||||
|
return options
|
||||||
|
|
||||||
|
|
||||||
|
argument_parser = build_argument_parser()
|
||||||
|
|
||||||
|
|
||||||
|
class ConfigurationException(Exception):
|
||||||
|
"""
|
||||||
|
Exception related to configuration file.
|
||||||
|
"""
|
||||||
|
pass # pylint:disable=unnecessary-pass
|
||||||
|
|
||||||
|
|
||||||
|
def load_config(options):
|
||||||
|
"""
|
||||||
|
Load options from configuration files, if defined and present.
|
||||||
|
:param options:
|
||||||
|
:type options:
|
||||||
|
:return:
|
||||||
|
:rtype:
|
||||||
|
"""
|
||||||
|
configurations = []
|
||||||
|
|
||||||
|
if not options.get('no_default_config'):
|
||||||
|
default_options_data = pkgutil.get_data('guessit', 'config/options.json').decode('utf-8')
|
||||||
|
default_options = json.loads(default_options_data)
|
||||||
|
configurations.append(default_options)
|
||||||
|
|
||||||
|
config_files = []
|
||||||
|
|
||||||
|
if not options.get('no_user_config'):
|
||||||
|
home_directory = os.path.expanduser("~")
|
||||||
|
cwd = os.getcwd()
|
||||||
|
yaml_supported = False
|
||||||
|
try:
|
||||||
|
import yaml # pylint:disable=unused-variable,unused-import
|
||||||
|
yaml_supported = True
|
||||||
|
except ImportError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
config_file_locations = get_options_file_locations(home_directory, cwd, yaml_supported)
|
||||||
|
config_files = [f for f in config_file_locations if os.path.exists(f)]
|
||||||
|
|
||||||
|
custom_config_files = options.get('config')
|
||||||
|
if custom_config_files:
|
||||||
|
config_files = config_files + custom_config_files
|
||||||
|
|
||||||
|
for config_file in config_files:
|
||||||
|
config_file_options = load_config_file(config_file)
|
||||||
|
if config_file_options:
|
||||||
|
configurations.append(config_file_options)
|
||||||
|
|
||||||
|
config = {}
|
||||||
|
if configurations:
|
||||||
|
config = merge_options(*configurations)
|
||||||
|
|
||||||
|
if 'advanced_config' not in config:
|
||||||
|
# Guessit doesn't work without advanced_config, so we use default if no configuration files provides it.
|
||||||
|
default_options_data = pkgutil.get_data('guessit', 'config/options.json').decode('utf-8')
|
||||||
|
default_options = json.loads(default_options_data)
|
||||||
|
config['advanced_config'] = default_options['advanced_config']
|
||||||
|
|
||||||
|
return config
|
||||||
|
|
||||||
|
|
||||||
|
def merge_options(*options):
|
||||||
|
"""
|
||||||
|
Merge options into a single options dict.
|
||||||
|
:param options:
|
||||||
|
:type options:
|
||||||
|
:return:
|
||||||
|
:rtype:
|
||||||
|
"""
|
||||||
|
|
||||||
|
merged = {}
|
||||||
|
if options:
|
||||||
|
if options[0]:
|
||||||
|
merged.update(copy.deepcopy(options[0]))
|
||||||
|
|
||||||
|
for options in options[1:]:
|
||||||
|
if options:
|
||||||
|
pristine = options.get('pristine')
|
||||||
|
|
||||||
|
if pristine is True:
|
||||||
|
merged = {}
|
||||||
|
elif pristine:
|
||||||
|
for to_reset in pristine:
|
||||||
|
if to_reset in merged:
|
||||||
|
del merged[to_reset]
|
||||||
|
|
||||||
|
for (option, value) in options.items():
|
||||||
|
merge_option_value(option, value, merged)
|
||||||
|
|
||||||
|
return merged
|
||||||
|
|
||||||
|
|
||||||
|
def merge_option_value(option, value, merged):
|
||||||
|
"""
|
||||||
|
Merge option value
|
||||||
|
:param option:
|
||||||
|
:param value:
|
||||||
|
:param merged:
|
||||||
|
:return:
|
||||||
|
"""
|
||||||
|
if value is not None and option != 'pristine':
|
||||||
|
if option in merged.keys() and isinstance(merged[option], list):
|
||||||
|
for val in value:
|
||||||
|
if val not in merged[option]:
|
||||||
|
merged[option].append(val)
|
||||||
|
elif option in merged.keys() and isinstance(merged[option], dict):
|
||||||
|
merged[option] = merge_options(merged[option], value)
|
||||||
|
elif isinstance(value, list):
|
||||||
|
merged[option] = list(value)
|
||||||
|
else:
|
||||||
|
merged[option] = value
|
||||||
|
|
||||||
|
|
||||||
|
def load_config_file(filepath):
|
||||||
|
"""
|
||||||
|
Load a configuration as an options dict.
|
||||||
|
|
||||||
|
Format of the file is given with filepath extension.
|
||||||
|
:param filepath:
|
||||||
|
:type filepath:
|
||||||
|
:return:
|
||||||
|
:rtype:
|
||||||
|
"""
|
||||||
|
if filepath.endswith('.json'):
|
||||||
|
with open(filepath) as config_file_data:
|
||||||
|
return json.load(config_file_data)
|
||||||
|
if filepath.endswith('.yaml') or filepath.endswith('.yml'):
|
||||||
|
try:
|
||||||
|
import yaml
|
||||||
|
with open(filepath) as config_file_data:
|
||||||
|
return yaml.load(config_file_data, yaml.SafeLoader)
|
||||||
|
except ImportError: # pragma: no cover
|
||||||
|
raise ConfigurationException('Configuration file extension is not supported. '
|
||||||
|
'PyYAML should be installed to support "%s" file' % (
|
||||||
|
filepath,))
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Try to load input as JSON
|
||||||
|
return json.loads(filepath)
|
||||||
|
except: # pylint: disable=bare-except
|
||||||
|
pass
|
||||||
|
|
||||||
|
raise ConfigurationException('Configuration file extension is not supported for "%s" file.' % (filepath,))
|
||||||
|
|
||||||
|
|
||||||
|
def get_options_file_locations(homedir, cwd, yaml_supported=False):
|
||||||
|
"""
|
||||||
|
Get all possible locations for options file.
|
||||||
|
:param homedir: user home directory
|
||||||
|
:type homedir: basestring
|
||||||
|
:param cwd: current working directory
|
||||||
|
:type homedir: basestring
|
||||||
|
:return:
|
||||||
|
:rtype: list
|
||||||
|
"""
|
||||||
|
locations = []
|
||||||
|
|
||||||
|
configdirs = [(os.path.join(homedir, '.guessit'), 'options'),
|
||||||
|
(os.path.join(homedir, '.config', 'guessit'), 'options'),
|
||||||
|
(cwd, 'guessit.options')]
|
||||||
|
configexts = ['json']
|
||||||
|
|
||||||
|
if yaml_supported:
|
||||||
|
configexts.append('yaml')
|
||||||
|
configexts.append('yml')
|
||||||
|
|
||||||
|
for configdir in configdirs:
|
||||||
|
for configext in configexts:
|
||||||
|
locations.append(os.path.join(configdir[0], configdir[1] + '.' + configext))
|
||||||
|
|
||||||
|
return locations
|
||||||
35
lib/guessit/reutils.py
Normal file
35
lib/guessit/reutils.py
Normal file
@@ -0,0 +1,35 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
"""
|
||||||
|
Utils for re module
|
||||||
|
"""
|
||||||
|
|
||||||
|
from rebulk.remodule import re
|
||||||
|
|
||||||
|
|
||||||
|
def build_or_pattern(patterns, name=None, escape=False):
|
||||||
|
"""
|
||||||
|
Build a or pattern string from a list of possible patterns
|
||||||
|
|
||||||
|
:param patterns:
|
||||||
|
:type patterns:
|
||||||
|
:param name:
|
||||||
|
:type name:
|
||||||
|
:param escape:
|
||||||
|
:type escape:
|
||||||
|
:return:
|
||||||
|
:rtype:
|
||||||
|
"""
|
||||||
|
or_pattern = []
|
||||||
|
for pattern in patterns:
|
||||||
|
if not or_pattern:
|
||||||
|
or_pattern.append('(?')
|
||||||
|
if name:
|
||||||
|
or_pattern.append('P<' + name + '>')
|
||||||
|
else:
|
||||||
|
or_pattern.append(':')
|
||||||
|
else:
|
||||||
|
or_pattern.append('|')
|
||||||
|
or_pattern.append('(?:%s)' % re.escape(pattern) if escape else pattern)
|
||||||
|
or_pattern.append(')')
|
||||||
|
return ''.join(or_pattern)
|
||||||
99
lib/guessit/rules/__init__.py
Normal file
99
lib/guessit/rules/__init__.py
Normal file
@@ -0,0 +1,99 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
"""
|
||||||
|
Rebulk object default builder
|
||||||
|
"""
|
||||||
|
from rebulk import Rebulk
|
||||||
|
|
||||||
|
from .markers.path import path
|
||||||
|
from .markers.groups import groups
|
||||||
|
|
||||||
|
from .properties.episodes import episodes
|
||||||
|
from .properties.container import container
|
||||||
|
from .properties.source import source
|
||||||
|
from .properties.video_codec import video_codec
|
||||||
|
from .properties.audio_codec import audio_codec
|
||||||
|
from .properties.screen_size import screen_size
|
||||||
|
from .properties.website import website
|
||||||
|
from .properties.date import date
|
||||||
|
from .properties.title import title
|
||||||
|
from .properties.episode_title import episode_title
|
||||||
|
from .properties.language import language
|
||||||
|
from .properties.country import country
|
||||||
|
from .properties.release_group import release_group
|
||||||
|
from .properties.streaming_service import streaming_service
|
||||||
|
from .properties.other import other
|
||||||
|
from .properties.size import size
|
||||||
|
from .properties.bit_rate import bit_rate
|
||||||
|
from .properties.edition import edition
|
||||||
|
from .properties.cds import cds
|
||||||
|
from .properties.bonus import bonus
|
||||||
|
from .properties.film import film
|
||||||
|
from .properties.part import part
|
||||||
|
from .properties.crc import crc
|
||||||
|
from .properties.mimetype import mimetype
|
||||||
|
from .properties.type import type_
|
||||||
|
|
||||||
|
from .processors import processors
|
||||||
|
|
||||||
|
|
||||||
|
def rebulk_builder(config):
|
||||||
|
"""
|
||||||
|
Default builder for main Rebulk object used by api.
|
||||||
|
:return: Main Rebulk object
|
||||||
|
:rtype: Rebulk
|
||||||
|
"""
|
||||||
|
def _config(name):
|
||||||
|
return config.get(name, {})
|
||||||
|
|
||||||
|
rebulk = Rebulk()
|
||||||
|
|
||||||
|
common_words = frozenset(_config('common_words'))
|
||||||
|
|
||||||
|
rebulk.rebulk(path(_config('path')))
|
||||||
|
rebulk.rebulk(groups(_config('groups')))
|
||||||
|
|
||||||
|
rebulk.rebulk(episodes(_config('episodes')))
|
||||||
|
rebulk.rebulk(container(_config('container')))
|
||||||
|
rebulk.rebulk(source(_config('source')))
|
||||||
|
rebulk.rebulk(video_codec(_config('video_codec')))
|
||||||
|
rebulk.rebulk(audio_codec(_config('audio_codec')))
|
||||||
|
rebulk.rebulk(screen_size(_config('screen_size')))
|
||||||
|
rebulk.rebulk(website(_config('website')))
|
||||||
|
rebulk.rebulk(date(_config('date')))
|
||||||
|
rebulk.rebulk(title(_config('title')))
|
||||||
|
rebulk.rebulk(episode_title(_config('episode_title')))
|
||||||
|
rebulk.rebulk(language(_config('language'), common_words))
|
||||||
|
rebulk.rebulk(country(_config('country'), common_words))
|
||||||
|
rebulk.rebulk(release_group(_config('release_group')))
|
||||||
|
rebulk.rebulk(streaming_service(_config('streaming_service')))
|
||||||
|
rebulk.rebulk(other(_config('other')))
|
||||||
|
rebulk.rebulk(size(_config('size')))
|
||||||
|
rebulk.rebulk(bit_rate(_config('bit_rate')))
|
||||||
|
rebulk.rebulk(edition(_config('edition')))
|
||||||
|
rebulk.rebulk(cds(_config('cds')))
|
||||||
|
rebulk.rebulk(bonus(_config('bonus')))
|
||||||
|
rebulk.rebulk(film(_config('film')))
|
||||||
|
rebulk.rebulk(part(_config('part')))
|
||||||
|
rebulk.rebulk(crc(_config('crc')))
|
||||||
|
|
||||||
|
rebulk.rebulk(processors(_config('processors')))
|
||||||
|
|
||||||
|
rebulk.rebulk(mimetype(_config('mimetype')))
|
||||||
|
rebulk.rebulk(type_(_config('type')))
|
||||||
|
|
||||||
|
def customize_properties(properties):
|
||||||
|
"""
|
||||||
|
Customize default rebulk properties
|
||||||
|
"""
|
||||||
|
count = properties['count']
|
||||||
|
del properties['count']
|
||||||
|
|
||||||
|
properties['season_count'] = count
|
||||||
|
properties['episode_count'] = count
|
||||||
|
|
||||||
|
return properties
|
||||||
|
|
||||||
|
rebulk.customize_properties = customize_properties
|
||||||
|
|
||||||
|
return rebulk
|
||||||
15
lib/guessit/rules/common/__init__.py
Normal file
15
lib/guessit/rules/common/__init__.py
Normal file
@@ -0,0 +1,15 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
"""
|
||||||
|
Common module
|
||||||
|
"""
|
||||||
|
import re
|
||||||
|
|
||||||
|
seps = r' [](){}+*|=-_~#/\\.,;:' # list of tags/words separators
|
||||||
|
seps_no_groups = seps.replace('[](){}', '')
|
||||||
|
seps_no_fs = seps.replace('/', '').replace('\\', '')
|
||||||
|
|
||||||
|
title_seps = r'-+/\|' # separators for title
|
||||||
|
|
||||||
|
dash = (r'-', r'['+re.escape(seps_no_fs)+']') # abbreviation used by many rebulk objects.
|
||||||
|
alt_dash = (r'@', r'['+re.escape(seps_no_fs)+']') # abbreviation used by many rebulk objects.
|
||||||
75
lib/guessit/rules/common/comparators.py
Normal file
75
lib/guessit/rules/common/comparators.py
Normal file
@@ -0,0 +1,75 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
"""
|
||||||
|
Comparators
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
from functools import cmp_to_key
|
||||||
|
except ImportError:
|
||||||
|
from ...backports import cmp_to_key
|
||||||
|
|
||||||
|
|
||||||
|
def marker_comparator_predicate(match):
|
||||||
|
"""
|
||||||
|
Match predicate used in comparator
|
||||||
|
"""
|
||||||
|
return (
|
||||||
|
not match.private
|
||||||
|
and match.name not in ('proper_count', 'title')
|
||||||
|
and not (match.name == 'container' and 'extension' in match.tags)
|
||||||
|
and not (match.name == 'other' and match.value == 'Rip')
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def marker_weight(matches, marker, predicate):
|
||||||
|
"""
|
||||||
|
Compute the comparator weight of a marker
|
||||||
|
:param matches:
|
||||||
|
:param marker:
|
||||||
|
:param predicate:
|
||||||
|
:return:
|
||||||
|
"""
|
||||||
|
return len(set(match.name for match in matches.range(*marker.span, predicate=predicate)))
|
||||||
|
|
||||||
|
|
||||||
|
def marker_comparator(matches, markers, predicate):
|
||||||
|
"""
|
||||||
|
Builds a comparator that returns markers sorted from the most valuable to the less.
|
||||||
|
|
||||||
|
Take the parts where matches count is higher, then when length is higher, then when position is at left.
|
||||||
|
|
||||||
|
:param matches:
|
||||||
|
:type matches:
|
||||||
|
:param markers:
|
||||||
|
:param predicate:
|
||||||
|
:return:
|
||||||
|
:rtype:
|
||||||
|
"""
|
||||||
|
|
||||||
|
def comparator(marker1, marker2):
|
||||||
|
"""
|
||||||
|
The actual comparator function.
|
||||||
|
"""
|
||||||
|
matches_count = marker_weight(matches, marker2, predicate) - marker_weight(matches, marker1, predicate)
|
||||||
|
if matches_count:
|
||||||
|
return matches_count
|
||||||
|
|
||||||
|
# give preference to rightmost path
|
||||||
|
return markers.index(marker2) - markers.index(marker1)
|
||||||
|
|
||||||
|
return comparator
|
||||||
|
|
||||||
|
|
||||||
|
def marker_sorted(markers, matches, predicate=marker_comparator_predicate):
|
||||||
|
"""
|
||||||
|
Sort markers from matches, from the most valuable to the less.
|
||||||
|
|
||||||
|
:param markers:
|
||||||
|
:type markers:
|
||||||
|
:param matches:
|
||||||
|
:type matches:
|
||||||
|
:param predicate:
|
||||||
|
:return:
|
||||||
|
:rtype:
|
||||||
|
"""
|
||||||
|
return sorted(markers, key=cmp_to_key(marker_comparator(matches, markers, predicate=predicate)))
|
||||||
125
lib/guessit/rules/common/date.py
Normal file
125
lib/guessit/rules/common/date.py
Normal file
@@ -0,0 +1,125 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
"""
|
||||||
|
Date
|
||||||
|
"""
|
||||||
|
from dateutil import parser
|
||||||
|
|
||||||
|
from rebulk.remodule import re
|
||||||
|
|
||||||
|
_dsep = r'[-/ \.]'
|
||||||
|
_dsep_bis = r'[-/ \.x]'
|
||||||
|
|
||||||
|
date_regexps = [
|
||||||
|
re.compile(r'%s((\d{8}))%s' % (_dsep, _dsep), re.IGNORECASE),
|
||||||
|
re.compile(r'%s((\d{6}))%s' % (_dsep, _dsep), re.IGNORECASE),
|
||||||
|
re.compile(r'(?:^|[^\d])((\d{2})%s(\d{1,2})%s(\d{1,2}))(?:$|[^\d])' % (_dsep, _dsep), re.IGNORECASE),
|
||||||
|
re.compile(r'(?:^|[^\d])((\d{1,2})%s(\d{1,2})%s(\d{2}))(?:$|[^\d])' % (_dsep, _dsep), re.IGNORECASE),
|
||||||
|
re.compile(r'(?:^|[^\d])((\d{4})%s(\d{1,2})%s(\d{1,2}))(?:$|[^\d])' % (_dsep_bis, _dsep), re.IGNORECASE),
|
||||||
|
re.compile(r'(?:^|[^\d])((\d{1,2})%s(\d{1,2})%s(\d{4}))(?:$|[^\d])' % (_dsep, _dsep_bis), re.IGNORECASE),
|
||||||
|
re.compile(r'(?:^|[^\d])((\d{1,2}(?:st|nd|rd|th)?%s(?:[a-z]{3,10})%s\d{4}))(?:$|[^\d])' % (_dsep, _dsep),
|
||||||
|
re.IGNORECASE)]
|
||||||
|
|
||||||
|
|
||||||
|
def valid_year(year):
|
||||||
|
"""Check if number is a valid year"""
|
||||||
|
return 1920 <= year < 2030
|
||||||
|
|
||||||
|
|
||||||
|
def _is_int(string):
|
||||||
|
"""
|
||||||
|
Check if the input string is an integer
|
||||||
|
|
||||||
|
:param string:
|
||||||
|
:type string:
|
||||||
|
:return:
|
||||||
|
:rtype:
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
int(string)
|
||||||
|
return True
|
||||||
|
except ValueError:
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def _guess_day_first_parameter(groups): # pylint:disable=inconsistent-return-statements
|
||||||
|
"""
|
||||||
|
If day_first is not defined, use some heuristic to fix it.
|
||||||
|
It helps to solve issues with python dateutils 2.5.3 parser changes.
|
||||||
|
|
||||||
|
:param groups: match groups found for the date
|
||||||
|
:type groups: list of match objects
|
||||||
|
:return: day_first option guessed value
|
||||||
|
:rtype: bool
|
||||||
|
"""
|
||||||
|
|
||||||
|
# If match starts with a long year, then day_first is force to false.
|
||||||
|
if _is_int(groups[0]) and valid_year(int(groups[0][:4])):
|
||||||
|
return False
|
||||||
|
# If match ends with a long year, the day_first is forced to true.
|
||||||
|
if _is_int(groups[-1]) and valid_year(int(groups[-1][-4:])):
|
||||||
|
return True
|
||||||
|
# If match starts with a short year, then day_first is force to false.
|
||||||
|
if _is_int(groups[0]) and int(groups[0][:2]) > 31:
|
||||||
|
return False
|
||||||
|
# If match ends with a short year, then day_first is force to true.
|
||||||
|
if _is_int(groups[-1]) and int(groups[-1][-2:]) > 31:
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
def search_date(string, year_first=None, day_first=None): # pylint:disable=inconsistent-return-statements
|
||||||
|
"""Looks for date patterns, and if found return the date and group span.
|
||||||
|
|
||||||
|
Assumes there are sentinels at the beginning and end of the string that
|
||||||
|
always allow matching a non-digit delimiting the date.
|
||||||
|
|
||||||
|
Year can be defined on two digit only. It will return the nearest possible
|
||||||
|
date from today.
|
||||||
|
|
||||||
|
>>> search_date(' This happened on 2002-04-22. ')
|
||||||
|
(18, 28, datetime.date(2002, 4, 22))
|
||||||
|
|
||||||
|
>>> search_date(' And this on 17-06-1998. ')
|
||||||
|
(13, 23, datetime.date(1998, 6, 17))
|
||||||
|
|
||||||
|
>>> search_date(' no date in here ')
|
||||||
|
"""
|
||||||
|
for date_re in date_regexps:
|
||||||
|
search_match = date_re.search(string)
|
||||||
|
if not search_match:
|
||||||
|
continue
|
||||||
|
|
||||||
|
start, end = search_match.start(1), search_match.end(1)
|
||||||
|
groups = search_match.groups()[1:]
|
||||||
|
match = '-'.join(groups)
|
||||||
|
|
||||||
|
if match is None:
|
||||||
|
continue
|
||||||
|
|
||||||
|
if year_first and day_first is None:
|
||||||
|
day_first = False
|
||||||
|
|
||||||
|
if day_first is None:
|
||||||
|
day_first = _guess_day_first_parameter(groups)
|
||||||
|
|
||||||
|
# If day_first/year_first is undefined, parse is made using both possible values.
|
||||||
|
yearfirst_opts = [False, True]
|
||||||
|
if year_first is not None:
|
||||||
|
yearfirst_opts = [year_first]
|
||||||
|
|
||||||
|
dayfirst_opts = [True, False]
|
||||||
|
if day_first is not None:
|
||||||
|
dayfirst_opts = [day_first]
|
||||||
|
|
||||||
|
kwargs_list = ({'dayfirst': d, 'yearfirst': y}
|
||||||
|
for d in dayfirst_opts for y in yearfirst_opts)
|
||||||
|
for kwargs in kwargs_list:
|
||||||
|
try:
|
||||||
|
date = parser.parse(match, **kwargs)
|
||||||
|
except (ValueError, TypeError): # pragma: no cover
|
||||||
|
# see https://bugs.launchpad.net/dateutil/+bug/1247643
|
||||||
|
date = None
|
||||||
|
|
||||||
|
# check date plausibility
|
||||||
|
if date and valid_year(date.year): # pylint:disable=no-member
|
||||||
|
return start, end, date.date() # pylint:disable=no-member
|
||||||
53
lib/guessit/rules/common/expected.py
Normal file
53
lib/guessit/rules/common/expected.py
Normal file
@@ -0,0 +1,53 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
"""
|
||||||
|
Expected property factory
|
||||||
|
"""
|
||||||
|
import re
|
||||||
|
|
||||||
|
from rebulk import Rebulk
|
||||||
|
from rebulk.utils import find_all
|
||||||
|
|
||||||
|
from . import dash, seps
|
||||||
|
|
||||||
|
|
||||||
|
def build_expected_function(context_key):
|
||||||
|
"""
|
||||||
|
Creates a expected property function
|
||||||
|
:param context_key:
|
||||||
|
:type context_key:
|
||||||
|
:param cleanup:
|
||||||
|
:type cleanup:
|
||||||
|
:return:
|
||||||
|
:rtype:
|
||||||
|
"""
|
||||||
|
|
||||||
|
def expected(input_string, context):
|
||||||
|
"""
|
||||||
|
Expected property functional pattern.
|
||||||
|
:param input_string:
|
||||||
|
:type input_string:
|
||||||
|
:param context:
|
||||||
|
:type context:
|
||||||
|
:return:
|
||||||
|
:rtype:
|
||||||
|
"""
|
||||||
|
ret = []
|
||||||
|
for search in context.get(context_key):
|
||||||
|
if search.startswith('re:'):
|
||||||
|
search = search[3:]
|
||||||
|
search = search.replace(' ', '-')
|
||||||
|
matches = Rebulk().regex(search, abbreviations=[dash], flags=re.IGNORECASE) \
|
||||||
|
.matches(input_string, context)
|
||||||
|
for match in matches:
|
||||||
|
ret.append(match.span)
|
||||||
|
else:
|
||||||
|
value = search
|
||||||
|
for sep in seps:
|
||||||
|
input_string = input_string.replace(sep, ' ')
|
||||||
|
search = search.replace(sep, ' ')
|
||||||
|
for start in find_all(input_string, search, ignore_case=True):
|
||||||
|
ret.append({'start': start, 'end': start + len(search), 'value': value})
|
||||||
|
return ret
|
||||||
|
|
||||||
|
return expected
|
||||||
136
lib/guessit/rules/common/formatters.py
Normal file
136
lib/guessit/rules/common/formatters.py
Normal file
@@ -0,0 +1,136 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
"""
|
||||||
|
Formatters
|
||||||
|
"""
|
||||||
|
from rebulk.formatters import formatters
|
||||||
|
from rebulk.remodule import re
|
||||||
|
from . import seps
|
||||||
|
|
||||||
|
_excluded_clean_chars = ',:;-/\\'
|
||||||
|
clean_chars = ""
|
||||||
|
for sep in seps:
|
||||||
|
if sep not in _excluded_clean_chars:
|
||||||
|
clean_chars += sep
|
||||||
|
|
||||||
|
|
||||||
|
def _potential_before(i, input_string):
|
||||||
|
"""
|
||||||
|
Check if the character at position i can be a potential single char separator considering what's before it.
|
||||||
|
|
||||||
|
:param i:
|
||||||
|
:type i: int
|
||||||
|
:param input_string:
|
||||||
|
:type input_string: str
|
||||||
|
:return:
|
||||||
|
:rtype: bool
|
||||||
|
"""
|
||||||
|
return i - 1 >= 0 and input_string[i] in seps and input_string[i - 2] in seps and input_string[i - 1] not in seps
|
||||||
|
|
||||||
|
|
||||||
|
def _potential_after(i, input_string):
|
||||||
|
"""
|
||||||
|
Check if the character at position i can be a potential single char separator considering what's after it.
|
||||||
|
|
||||||
|
:param i:
|
||||||
|
:type i: int
|
||||||
|
:param input_string:
|
||||||
|
:type input_string: str
|
||||||
|
:return:
|
||||||
|
:rtype: bool
|
||||||
|
"""
|
||||||
|
return i + 2 >= len(input_string) or \
|
||||||
|
input_string[i + 2] == input_string[i] and input_string[i + 1] not in seps
|
||||||
|
|
||||||
|
|
||||||
|
def cleanup(input_string):
|
||||||
|
"""
|
||||||
|
Removes and strip separators from input_string (but keep ',;' characters)
|
||||||
|
|
||||||
|
It also keep separators for single characters (Mavels Agents of S.H.I.E.L.D.)
|
||||||
|
|
||||||
|
:param input_string:
|
||||||
|
:type input_string: str
|
||||||
|
:return:
|
||||||
|
:rtype:
|
||||||
|
"""
|
||||||
|
clean_string = input_string
|
||||||
|
for char in clean_chars:
|
||||||
|
clean_string = clean_string.replace(char, ' ')
|
||||||
|
|
||||||
|
# Restore input separator if they separate single characters.
|
||||||
|
# Useful for Mavels Agents of S.H.I.E.L.D.
|
||||||
|
# https://github.com/guessit-io/guessit/issues/278
|
||||||
|
|
||||||
|
indices = [i for i, letter in enumerate(clean_string) if letter in seps]
|
||||||
|
|
||||||
|
dots = set()
|
||||||
|
if indices:
|
||||||
|
clean_list = list(clean_string)
|
||||||
|
|
||||||
|
potential_indices = []
|
||||||
|
|
||||||
|
for i in indices:
|
||||||
|
if _potential_before(i, input_string) and _potential_after(i, input_string):
|
||||||
|
potential_indices.append(i)
|
||||||
|
|
||||||
|
replace_indices = []
|
||||||
|
|
||||||
|
for potential_index in potential_indices:
|
||||||
|
if potential_index - 2 in potential_indices or potential_index + 2 in potential_indices:
|
||||||
|
replace_indices.append(potential_index)
|
||||||
|
|
||||||
|
if replace_indices:
|
||||||
|
for replace_index in replace_indices:
|
||||||
|
dots.add(input_string[replace_index])
|
||||||
|
clean_list[replace_index] = input_string[replace_index]
|
||||||
|
clean_string = ''.join(clean_list)
|
||||||
|
|
||||||
|
clean_string = strip(clean_string, ''.join([c for c in seps if c not in dots]))
|
||||||
|
|
||||||
|
clean_string = re.sub(' +', ' ', clean_string)
|
||||||
|
return clean_string
|
||||||
|
|
||||||
|
|
||||||
|
def strip(input_string, chars=seps):
|
||||||
|
"""
|
||||||
|
Strip separators from input_string
|
||||||
|
:param input_string:
|
||||||
|
:param chars:
|
||||||
|
:type input_string:
|
||||||
|
:return:
|
||||||
|
:rtype:
|
||||||
|
"""
|
||||||
|
return input_string.strip(chars)
|
||||||
|
|
||||||
|
|
||||||
|
def raw_cleanup(raw):
|
||||||
|
"""
|
||||||
|
Cleanup a raw value to perform raw comparison
|
||||||
|
:param raw:
|
||||||
|
:type raw:
|
||||||
|
:return:
|
||||||
|
:rtype:
|
||||||
|
"""
|
||||||
|
return formatters(cleanup, strip)(raw.lower())
|
||||||
|
|
||||||
|
|
||||||
|
def reorder_title(title, articles=('the',), separators=(',', ', ')):
|
||||||
|
"""
|
||||||
|
Reorder the title
|
||||||
|
:param title:
|
||||||
|
:type title:
|
||||||
|
:param articles:
|
||||||
|
:type articles:
|
||||||
|
:param separators:
|
||||||
|
:type separators:
|
||||||
|
:return:
|
||||||
|
:rtype:
|
||||||
|
"""
|
||||||
|
ltitle = title.lower()
|
||||||
|
for article in articles:
|
||||||
|
for separator in separators:
|
||||||
|
suffix = separator + article
|
||||||
|
if ltitle[-len(suffix):] == suffix:
|
||||||
|
return title[-len(suffix) + len(separator):] + ' ' + title[:-len(suffix)]
|
||||||
|
return title
|
||||||
165
lib/guessit/rules/common/numeral.py
Normal file
165
lib/guessit/rules/common/numeral.py
Normal file
@@ -0,0 +1,165 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
"""
|
||||||
|
parse numeral from various formats
|
||||||
|
"""
|
||||||
|
from rebulk.remodule import re
|
||||||
|
|
||||||
|
digital_numeral = r'\d{1,4}'
|
||||||
|
|
||||||
|
roman_numeral = r'(?=[MCDLXVI]+)M{0,4}(?:CM|CD|D?C{0,3})(?:XC|XL|L?X{0,3})(?:IX|IV|V?I{0,3})'
|
||||||
|
|
||||||
|
english_word_numeral_list = [
|
||||||
|
'zero', 'one', 'two', 'three', 'four', 'five', 'six', 'seven', 'eight', 'nine', 'ten',
|
||||||
|
'eleven', 'twelve', 'thirteen', 'fourteen', 'fifteen', 'sixteen', 'seventeen', 'eighteen', 'nineteen', 'twenty'
|
||||||
|
]
|
||||||
|
|
||||||
|
french_word_numeral_list = [
|
||||||
|
'zéro', 'un', 'deux', 'trois', 'quatre', 'cinq', 'six', 'sept', 'huit', 'neuf', 'dix',
|
||||||
|
'onze', 'douze', 'treize', 'quatorze', 'quinze', 'seize', 'dix-sept', 'dix-huit', 'dix-neuf', 'vingt'
|
||||||
|
]
|
||||||
|
|
||||||
|
french_alt_word_numeral_list = [
|
||||||
|
'zero', 'une', 'deux', 'trois', 'quatre', 'cinq', 'six', 'sept', 'huit', 'neuf', 'dix',
|
||||||
|
'onze', 'douze', 'treize', 'quatorze', 'quinze', 'seize', 'dixsept', 'dixhuit', 'dixneuf', 'vingt'
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def __build_word_numeral(*args):
|
||||||
|
"""
|
||||||
|
Build word numeral regexp from list.
|
||||||
|
|
||||||
|
:param args:
|
||||||
|
:type args:
|
||||||
|
:param kwargs:
|
||||||
|
:type kwargs:
|
||||||
|
:return:
|
||||||
|
:rtype:
|
||||||
|
"""
|
||||||
|
re_ = None
|
||||||
|
for word_list in args:
|
||||||
|
for word in word_list:
|
||||||
|
if not re_:
|
||||||
|
re_ = r'(?:(?=\w+)'
|
||||||
|
else:
|
||||||
|
re_ += '|'
|
||||||
|
re_ += word
|
||||||
|
re_ += ')'
|
||||||
|
return re_
|
||||||
|
|
||||||
|
|
||||||
|
word_numeral = __build_word_numeral(english_word_numeral_list, french_word_numeral_list, french_alt_word_numeral_list)
|
||||||
|
|
||||||
|
numeral = '(?:' + digital_numeral + '|' + roman_numeral + '|' + word_numeral + ')'
|
||||||
|
|
||||||
|
__romanNumeralMap = (
|
||||||
|
('M', 1000),
|
||||||
|
('CM', 900),
|
||||||
|
('D', 500),
|
||||||
|
('CD', 400),
|
||||||
|
('C', 100),
|
||||||
|
('XC', 90),
|
||||||
|
('L', 50),
|
||||||
|
('XL', 40),
|
||||||
|
('X', 10),
|
||||||
|
('IX', 9),
|
||||||
|
('V', 5),
|
||||||
|
('IV', 4),
|
||||||
|
('I', 1)
|
||||||
|
)
|
||||||
|
|
||||||
|
__romanNumeralPattern = re.compile('^' + roman_numeral + '$')
|
||||||
|
|
||||||
|
|
||||||
|
def __parse_roman(value):
|
||||||
|
"""
|
||||||
|
convert Roman numeral to integer
|
||||||
|
|
||||||
|
:param value: Value to parse
|
||||||
|
:type value: string
|
||||||
|
:return:
|
||||||
|
:rtype:
|
||||||
|
"""
|
||||||
|
if not __romanNumeralPattern.search(value):
|
||||||
|
raise ValueError('Invalid Roman numeral: %s' % value)
|
||||||
|
|
||||||
|
result = 0
|
||||||
|
index = 0
|
||||||
|
for num, integer in __romanNumeralMap:
|
||||||
|
while value[index:index + len(num)] == num:
|
||||||
|
result += integer
|
||||||
|
index += len(num)
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def __parse_word(value):
|
||||||
|
"""
|
||||||
|
Convert Word numeral to integer
|
||||||
|
|
||||||
|
:param value: Value to parse
|
||||||
|
:type value: string
|
||||||
|
:return:
|
||||||
|
:rtype:
|
||||||
|
"""
|
||||||
|
for word_list in [english_word_numeral_list, french_word_numeral_list, french_alt_word_numeral_list]:
|
||||||
|
try:
|
||||||
|
return word_list.index(value.lower())
|
||||||
|
except ValueError:
|
||||||
|
pass
|
||||||
|
raise ValueError # pragma: no cover
|
||||||
|
|
||||||
|
|
||||||
|
_clean_re = re.compile(r'[^\d]*(\d+)[^\d]*')
|
||||||
|
|
||||||
|
|
||||||
|
def parse_numeral(value, int_enabled=True, roman_enabled=True, word_enabled=True, clean=True):
|
||||||
|
"""
|
||||||
|
Parse a numeric value into integer.
|
||||||
|
|
||||||
|
:param value: Value to parse. Can be an integer, roman numeral or word.
|
||||||
|
:type value: string
|
||||||
|
:param int_enabled:
|
||||||
|
:type int_enabled:
|
||||||
|
:param roman_enabled:
|
||||||
|
:type roman_enabled:
|
||||||
|
:param word_enabled:
|
||||||
|
:type word_enabled:
|
||||||
|
:param clean:
|
||||||
|
:type clean:
|
||||||
|
:return: Numeric value, or None if value can't be parsed
|
||||||
|
:rtype: int
|
||||||
|
"""
|
||||||
|
# pylint: disable=too-many-branches
|
||||||
|
if int_enabled:
|
||||||
|
try:
|
||||||
|
if clean:
|
||||||
|
match = _clean_re.match(value)
|
||||||
|
if match:
|
||||||
|
clean_value = match.group(1)
|
||||||
|
return int(clean_value)
|
||||||
|
return int(value)
|
||||||
|
except ValueError:
|
||||||
|
pass
|
||||||
|
if roman_enabled:
|
||||||
|
try:
|
||||||
|
if clean:
|
||||||
|
for word in value.split():
|
||||||
|
try:
|
||||||
|
return __parse_roman(word.upper())
|
||||||
|
except ValueError:
|
||||||
|
pass
|
||||||
|
return __parse_roman(value)
|
||||||
|
except ValueError:
|
||||||
|
pass
|
||||||
|
if word_enabled:
|
||||||
|
try:
|
||||||
|
if clean:
|
||||||
|
for word in value.split():
|
||||||
|
try:
|
||||||
|
return __parse_word(word)
|
||||||
|
except ValueError: # pragma: no cover
|
||||||
|
pass
|
||||||
|
return __parse_word(value) # pragma: no cover
|
||||||
|
except ValueError: # pragma: no cover
|
||||||
|
pass
|
||||||
|
raise ValueError('Invalid numeral: ' + value) # pragma: no cover
|
||||||
27
lib/guessit/rules/common/pattern.py
Normal file
27
lib/guessit/rules/common/pattern.py
Normal file
@@ -0,0 +1,27 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
"""
|
||||||
|
Pattern utility functions
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
def is_disabled(context, name):
|
||||||
|
"""Whether a specific pattern is disabled.
|
||||||
|
|
||||||
|
The context object might define an inclusion list (includes) or an exclusion list (excludes)
|
||||||
|
A pattern is considered disabled if it's found in the exclusion list or
|
||||||
|
it's not found in the inclusion list and the inclusion list is not empty or not defined.
|
||||||
|
|
||||||
|
:param context:
|
||||||
|
:param name:
|
||||||
|
:return:
|
||||||
|
"""
|
||||||
|
if not context:
|
||||||
|
return False
|
||||||
|
|
||||||
|
excludes = context.get('excludes')
|
||||||
|
if excludes and name in excludes:
|
||||||
|
return True
|
||||||
|
|
||||||
|
includes = context.get('includes')
|
||||||
|
return includes and name not in includes
|
||||||
106
lib/guessit/rules/common/quantity.py
Normal file
106
lib/guessit/rules/common/quantity.py
Normal file
@@ -0,0 +1,106 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
"""
|
||||||
|
Quantities: Size
|
||||||
|
"""
|
||||||
|
import re
|
||||||
|
from abc import abstractmethod
|
||||||
|
|
||||||
|
import six
|
||||||
|
|
||||||
|
from ..common import seps
|
||||||
|
|
||||||
|
|
||||||
|
class Quantity(object):
|
||||||
|
"""
|
||||||
|
Represent a quantity object with magnitude and units.
|
||||||
|
"""
|
||||||
|
|
||||||
|
parser_re = re.compile(r'(?P<magnitude>\d+(?:[.]\d+)?)(?P<units>[^\d]+)?')
|
||||||
|
|
||||||
|
def __init__(self, magnitude, units):
|
||||||
|
self.magnitude = magnitude
|
||||||
|
self.units = units
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
@abstractmethod
|
||||||
|
def parse_units(cls, value):
|
||||||
|
"""
|
||||||
|
Parse a string to a proper unit notation.
|
||||||
|
"""
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def fromstring(cls, string):
|
||||||
|
"""
|
||||||
|
Parse the string into a quantity object.
|
||||||
|
:param string:
|
||||||
|
:return:
|
||||||
|
"""
|
||||||
|
values = cls.parser_re.match(string).groupdict()
|
||||||
|
try:
|
||||||
|
magnitude = int(values['magnitude'])
|
||||||
|
except ValueError:
|
||||||
|
magnitude = float(values['magnitude'])
|
||||||
|
units = cls.parse_units(values['units'])
|
||||||
|
|
||||||
|
return cls(magnitude, units)
|
||||||
|
|
||||||
|
def __hash__(self):
|
||||||
|
return hash(str(self))
|
||||||
|
|
||||||
|
def __eq__(self, other):
|
||||||
|
if isinstance(other, six.string_types):
|
||||||
|
return str(self) == other
|
||||||
|
if not isinstance(other, self.__class__):
|
||||||
|
return NotImplemented
|
||||||
|
return self.magnitude == other.magnitude and self.units == other.units
|
||||||
|
|
||||||
|
def __ne__(self, other):
|
||||||
|
return not self == other
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return '<{0} [{1}]>'.format(self.__class__.__name__, self)
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return '{0}{1}'.format(self.magnitude, self.units)
|
||||||
|
|
||||||
|
|
||||||
|
class Size(Quantity):
|
||||||
|
"""
|
||||||
|
Represent size.
|
||||||
|
|
||||||
|
e.g.: 1.1GB, 300MB
|
||||||
|
"""
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def parse_units(cls, value):
|
||||||
|
return value.strip(seps).upper()
|
||||||
|
|
||||||
|
|
||||||
|
class BitRate(Quantity):
|
||||||
|
"""
|
||||||
|
Represent bit rate.
|
||||||
|
|
||||||
|
e.g.: 320Kbps, 1.5Mbps
|
||||||
|
"""
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def parse_units(cls, value):
|
||||||
|
value = value.strip(seps).capitalize()
|
||||||
|
for token in ('bits', 'bit'):
|
||||||
|
value = value.replace(token, 'bps')
|
||||||
|
|
||||||
|
return value
|
||||||
|
|
||||||
|
|
||||||
|
class FrameRate(Quantity):
|
||||||
|
"""
|
||||||
|
Represent frame rate.
|
||||||
|
|
||||||
|
e.g.: 24fps, 60fps
|
||||||
|
"""
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def parse_units(cls, value):
|
||||||
|
return 'fps'
|
||||||
74
lib/guessit/rules/common/validators.py
Normal file
74
lib/guessit/rules/common/validators.py
Normal file
@@ -0,0 +1,74 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
"""
|
||||||
|
Validators
|
||||||
|
"""
|
||||||
|
from functools import partial
|
||||||
|
|
||||||
|
from rebulk.validators import chars_before, chars_after, chars_surround
|
||||||
|
from . import seps
|
||||||
|
|
||||||
|
seps_before = partial(chars_before, seps)
|
||||||
|
seps_after = partial(chars_after, seps)
|
||||||
|
seps_surround = partial(chars_surround, seps)
|
||||||
|
|
||||||
|
|
||||||
|
def int_coercable(string):
|
||||||
|
"""
|
||||||
|
Check if string can be coerced to int
|
||||||
|
:param string:
|
||||||
|
:type string:
|
||||||
|
:return:
|
||||||
|
:rtype:
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
int(string)
|
||||||
|
return True
|
||||||
|
except ValueError:
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def and_(*validators):
|
||||||
|
"""
|
||||||
|
Compose validators functions
|
||||||
|
:param validators:
|
||||||
|
:type validators:
|
||||||
|
:return:
|
||||||
|
:rtype:
|
||||||
|
"""
|
||||||
|
def composed(string):
|
||||||
|
"""
|
||||||
|
Composed validators function
|
||||||
|
:param string:
|
||||||
|
:type string:
|
||||||
|
:return:
|
||||||
|
:rtype:
|
||||||
|
"""
|
||||||
|
for validator in validators:
|
||||||
|
if not validator(string):
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
return composed
|
||||||
|
|
||||||
|
|
||||||
|
def or_(*validators):
|
||||||
|
"""
|
||||||
|
Compose validators functions
|
||||||
|
:param validators:
|
||||||
|
:type validators:
|
||||||
|
:return:
|
||||||
|
:rtype:
|
||||||
|
"""
|
||||||
|
def composed(string):
|
||||||
|
"""
|
||||||
|
Composed validators function
|
||||||
|
:param string:
|
||||||
|
:type string:
|
||||||
|
:return:
|
||||||
|
:rtype:
|
||||||
|
"""
|
||||||
|
for validator in validators:
|
||||||
|
if validator(string):
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
return composed
|
||||||
34
lib/guessit/rules/common/words.py
Normal file
34
lib/guessit/rules/common/words.py
Normal file
@@ -0,0 +1,34 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
"""
|
||||||
|
Words utils
|
||||||
|
"""
|
||||||
|
from collections import namedtuple
|
||||||
|
|
||||||
|
from . import seps
|
||||||
|
|
||||||
|
_Word = namedtuple('_Word', ['span', 'value'])
|
||||||
|
|
||||||
|
|
||||||
|
def iter_words(string):
|
||||||
|
"""
|
||||||
|
Iterate on all words in a string
|
||||||
|
:param string:
|
||||||
|
:type string:
|
||||||
|
:return:
|
||||||
|
:rtype: iterable[str]
|
||||||
|
"""
|
||||||
|
i = 0
|
||||||
|
last_sep_index = -1
|
||||||
|
inside_word = False
|
||||||
|
for char in string:
|
||||||
|
if ord(char) < 128 and char in seps: # Make sure we don't exclude unicode characters.
|
||||||
|
if inside_word:
|
||||||
|
yield _Word(span=(last_sep_index+1, i), value=string[last_sep_index+1:i])
|
||||||
|
inside_word = False
|
||||||
|
last_sep_index = i
|
||||||
|
else:
|
||||||
|
inside_word = True
|
||||||
|
i += 1
|
||||||
|
if inside_word:
|
||||||
|
yield _Word(span=(last_sep_index+1, i), value=string[last_sep_index+1:i])
|
||||||
5
lib/guessit/rules/markers/__init__.py
Normal file
5
lib/guessit/rules/markers/__init__.py
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
"""
|
||||||
|
Markers
|
||||||
|
"""
|
||||||
52
lib/guessit/rules/markers/groups.py
Normal file
52
lib/guessit/rules/markers/groups.py
Normal file
@@ -0,0 +1,52 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
"""
|
||||||
|
Groups markers (...), [...] and {...}
|
||||||
|
"""
|
||||||
|
from rebulk import Rebulk
|
||||||
|
|
||||||
|
|
||||||
|
def groups(config):
|
||||||
|
"""
|
||||||
|
Builder for rebulk object.
|
||||||
|
|
||||||
|
:param config: rule configuration
|
||||||
|
:type config: dict
|
||||||
|
:return: Created Rebulk object
|
||||||
|
:rtype: Rebulk
|
||||||
|
"""
|
||||||
|
rebulk = Rebulk()
|
||||||
|
rebulk.defaults(name="group", marker=True)
|
||||||
|
|
||||||
|
starting = config['starting']
|
||||||
|
ending = config['ending']
|
||||||
|
|
||||||
|
def mark_groups(input_string):
|
||||||
|
"""
|
||||||
|
Functional pattern to mark groups (...), [...] and {...}.
|
||||||
|
|
||||||
|
:param input_string:
|
||||||
|
:return:
|
||||||
|
"""
|
||||||
|
openings = ([], [], [])
|
||||||
|
i = 0
|
||||||
|
|
||||||
|
ret = []
|
||||||
|
for char in input_string:
|
||||||
|
start_type = starting.find(char)
|
||||||
|
if start_type > -1:
|
||||||
|
openings[start_type].append(i)
|
||||||
|
|
||||||
|
i += 1
|
||||||
|
|
||||||
|
end_type = ending.find(char)
|
||||||
|
if end_type > -1:
|
||||||
|
try:
|
||||||
|
start_index = openings[end_type].pop()
|
||||||
|
ret.append((start_index, i))
|
||||||
|
except IndexError:
|
||||||
|
pass
|
||||||
|
return ret
|
||||||
|
|
||||||
|
rebulk.functional(mark_groups)
|
||||||
|
return rebulk
|
||||||
47
lib/guessit/rules/markers/path.py
Normal file
47
lib/guessit/rules/markers/path.py
Normal file
@@ -0,0 +1,47 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
"""
|
||||||
|
Path markers
|
||||||
|
"""
|
||||||
|
from rebulk import Rebulk
|
||||||
|
|
||||||
|
from rebulk.utils import find_all
|
||||||
|
|
||||||
|
|
||||||
|
def path(config): # pylint:disable=unused-argument
|
||||||
|
"""
|
||||||
|
Builder for rebulk object.
|
||||||
|
|
||||||
|
:param config: rule configuration
|
||||||
|
:type config: dict
|
||||||
|
:return: Created Rebulk object
|
||||||
|
:rtype: Rebulk
|
||||||
|
"""
|
||||||
|
rebulk = Rebulk()
|
||||||
|
rebulk.defaults(name="path", marker=True)
|
||||||
|
|
||||||
|
def mark_path(input_string, context):
|
||||||
|
"""
|
||||||
|
Functional pattern to mark path elements.
|
||||||
|
|
||||||
|
:param input_string:
|
||||||
|
:param context:
|
||||||
|
:return:
|
||||||
|
"""
|
||||||
|
ret = []
|
||||||
|
if context.get('name_only', False):
|
||||||
|
ret.append((0, len(input_string)))
|
||||||
|
else:
|
||||||
|
indices = list(find_all(input_string, '/'))
|
||||||
|
indices += list(find_all(input_string, '\\'))
|
||||||
|
indices += [-1, len(input_string)]
|
||||||
|
|
||||||
|
indices.sort()
|
||||||
|
|
||||||
|
for i in range(0, len(indices) - 1):
|
||||||
|
ret.append((indices[i] + 1, indices[i + 1]))
|
||||||
|
|
||||||
|
return ret
|
||||||
|
|
||||||
|
rebulk.functional(mark_path)
|
||||||
|
return rebulk
|
||||||
20
lib/guessit/rules/match_processors.py
Normal file
20
lib/guessit/rules/match_processors.py
Normal file
@@ -0,0 +1,20 @@
|
|||||||
|
"""
|
||||||
|
Match processors
|
||||||
|
"""
|
||||||
|
from guessit.rules.common import seps
|
||||||
|
|
||||||
|
|
||||||
|
def strip(match, chars=seps):
|
||||||
|
"""
|
||||||
|
Strip given characters from match.
|
||||||
|
|
||||||
|
:param chars:
|
||||||
|
:param match:
|
||||||
|
:return:
|
||||||
|
"""
|
||||||
|
while match.input_string[match.start] in chars:
|
||||||
|
match.start += 1
|
||||||
|
while match.input_string[match.end - 1] in chars:
|
||||||
|
match.end -= 1
|
||||||
|
if not match:
|
||||||
|
return False
|
||||||
259
lib/guessit/rules/processors.py
Normal file
259
lib/guessit/rules/processors.py
Normal file
@@ -0,0 +1,259 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
"""
|
||||||
|
Processors
|
||||||
|
"""
|
||||||
|
from collections import defaultdict
|
||||||
|
import copy
|
||||||
|
|
||||||
|
import six
|
||||||
|
|
||||||
|
from rebulk import Rebulk, Rule, CustomRule, POST_PROCESS, PRE_PROCESS, AppendMatch, RemoveMatch
|
||||||
|
|
||||||
|
from .common import seps_no_groups
|
||||||
|
from .common.formatters import cleanup
|
||||||
|
from .common.comparators import marker_sorted
|
||||||
|
from .common.date import valid_year
|
||||||
|
from .common.words import iter_words
|
||||||
|
|
||||||
|
|
||||||
|
class EnlargeGroupMatches(CustomRule):
|
||||||
|
"""
|
||||||
|
Enlarge matches that are starting and/or ending group to include brackets in their span.
|
||||||
|
"""
|
||||||
|
priority = PRE_PROCESS
|
||||||
|
|
||||||
|
def when(self, matches, context):
|
||||||
|
starting = []
|
||||||
|
ending = []
|
||||||
|
|
||||||
|
for group in matches.markers.named('group'):
|
||||||
|
for match in matches.starting(group.start + 1):
|
||||||
|
starting.append(match)
|
||||||
|
|
||||||
|
for match in matches.ending(group.end - 1):
|
||||||
|
ending.append(match)
|
||||||
|
|
||||||
|
if starting or ending:
|
||||||
|
return starting, ending
|
||||||
|
return False
|
||||||
|
|
||||||
|
def then(self, matches, when_response, context):
|
||||||
|
starting, ending = when_response
|
||||||
|
for match in starting:
|
||||||
|
matches.remove(match)
|
||||||
|
match.start -= 1
|
||||||
|
match.raw_start += 1
|
||||||
|
matches.append(match)
|
||||||
|
|
||||||
|
for match in ending:
|
||||||
|
matches.remove(match)
|
||||||
|
match.end += 1
|
||||||
|
match.raw_end -= 1
|
||||||
|
matches.append(match)
|
||||||
|
|
||||||
|
|
||||||
|
class EquivalentHoles(Rule):
|
||||||
|
"""
|
||||||
|
Creates equivalent matches for holes that have same values than existing (case insensitive)
|
||||||
|
"""
|
||||||
|
priority = POST_PROCESS
|
||||||
|
consequence = AppendMatch
|
||||||
|
|
||||||
|
def when(self, matches, context):
|
||||||
|
new_matches = []
|
||||||
|
|
||||||
|
for filepath in marker_sorted(matches.markers.named('path'), matches):
|
||||||
|
holes = matches.holes(start=filepath.start, end=filepath.end, formatter=cleanup)
|
||||||
|
for name in matches.names:
|
||||||
|
for hole in list(holes):
|
||||||
|
for current_match in matches.named(name):
|
||||||
|
if isinstance(current_match.value, six.string_types) and \
|
||||||
|
hole.value.lower() == current_match.value.lower():
|
||||||
|
if 'equivalent-ignore' in current_match.tags:
|
||||||
|
continue
|
||||||
|
new_value = _preferred_string(hole.value, current_match.value)
|
||||||
|
if hole.value != new_value:
|
||||||
|
hole.value = new_value
|
||||||
|
if current_match.value != new_value:
|
||||||
|
current_match.value = new_value
|
||||||
|
hole.name = name
|
||||||
|
hole.tags = ['equivalent']
|
||||||
|
new_matches.append(hole)
|
||||||
|
if hole in holes:
|
||||||
|
holes.remove(hole)
|
||||||
|
|
||||||
|
return new_matches
|
||||||
|
|
||||||
|
|
||||||
|
class RemoveAmbiguous(Rule):
|
||||||
|
"""
|
||||||
|
If multiple matches are found with same name and different values, keep the one in the most valuable filepart.
|
||||||
|
Also keep others match with same name and values than those kept ones.
|
||||||
|
"""
|
||||||
|
|
||||||
|
priority = POST_PROCESS
|
||||||
|
consequence = RemoveMatch
|
||||||
|
|
||||||
|
def __init__(self, sort_function=marker_sorted, predicate=None):
|
||||||
|
super(RemoveAmbiguous, self).__init__()
|
||||||
|
self.sort_function = sort_function
|
||||||
|
self.predicate = predicate
|
||||||
|
|
||||||
|
def when(self, matches, context):
|
||||||
|
fileparts = self.sort_function(matches.markers.named('path'), matches)
|
||||||
|
|
||||||
|
previous_fileparts_names = set()
|
||||||
|
values = defaultdict(list)
|
||||||
|
|
||||||
|
to_remove = []
|
||||||
|
for filepart in fileparts:
|
||||||
|
filepart_matches = matches.range(filepart.start, filepart.end, predicate=self.predicate)
|
||||||
|
|
||||||
|
filepart_names = set()
|
||||||
|
for match in filepart_matches:
|
||||||
|
filepart_names.add(match.name)
|
||||||
|
if match.name in previous_fileparts_names:
|
||||||
|
if match.value not in values[match.name]:
|
||||||
|
to_remove.append(match)
|
||||||
|
else:
|
||||||
|
if match.value not in values[match.name]:
|
||||||
|
values[match.name].append(match.value)
|
||||||
|
|
||||||
|
previous_fileparts_names.update(filepart_names)
|
||||||
|
|
||||||
|
return to_remove
|
||||||
|
|
||||||
|
|
||||||
|
class RemoveLessSpecificSeasonEpisode(RemoveAmbiguous):
|
||||||
|
"""
|
||||||
|
If multiple season/episodes matches are found with different values,
|
||||||
|
keep the one tagged as 'SxxExx' or in the rightmost filepart.
|
||||||
|
"""
|
||||||
|
def __init__(self, name):
|
||||||
|
super(RemoveLessSpecificSeasonEpisode, self).__init__(
|
||||||
|
sort_function=(lambda markers, matches:
|
||||||
|
marker_sorted(list(reversed(markers)), matches,
|
||||||
|
lambda match: match.name == name and 'SxxExx' in match.tags)),
|
||||||
|
predicate=lambda match: match.name == name)
|
||||||
|
|
||||||
|
|
||||||
|
def _preferred_string(value1, value2): # pylint:disable=too-many-return-statements
|
||||||
|
"""
|
||||||
|
Retrieves preferred title from both values.
|
||||||
|
:param value1:
|
||||||
|
:type value1: str
|
||||||
|
:param value2:
|
||||||
|
:type value2: str
|
||||||
|
:return: The preferred title
|
||||||
|
:rtype: str
|
||||||
|
"""
|
||||||
|
if value1 == value2:
|
||||||
|
return value1
|
||||||
|
if value1.istitle() and not value2.istitle():
|
||||||
|
return value1
|
||||||
|
if not value1.isupper() and value2.isupper():
|
||||||
|
return value1
|
||||||
|
if not value1.isupper() and value1[0].isupper() and not value2[0].isupper():
|
||||||
|
return value1
|
||||||
|
if _count_title_words(value1) > _count_title_words(value2):
|
||||||
|
return value1
|
||||||
|
return value2
|
||||||
|
|
||||||
|
|
||||||
|
def _count_title_words(value):
|
||||||
|
"""
|
||||||
|
Count only many words are titles in value.
|
||||||
|
:param value:
|
||||||
|
:type value:
|
||||||
|
:return:
|
||||||
|
:rtype:
|
||||||
|
"""
|
||||||
|
ret = 0
|
||||||
|
for word in iter_words(value):
|
||||||
|
if word.value.istitle():
|
||||||
|
ret += 1
|
||||||
|
return ret
|
||||||
|
|
||||||
|
|
||||||
|
class SeasonYear(Rule):
|
||||||
|
"""
|
||||||
|
If a season is a valid year and no year was found, create an match with year.
|
||||||
|
"""
|
||||||
|
priority = POST_PROCESS
|
||||||
|
consequence = AppendMatch
|
||||||
|
|
||||||
|
def when(self, matches, context):
|
||||||
|
ret = []
|
||||||
|
if not matches.named('year'):
|
||||||
|
for season in matches.named('season'):
|
||||||
|
if valid_year(season.value):
|
||||||
|
year = copy.copy(season)
|
||||||
|
year.name = 'year'
|
||||||
|
ret.append(year)
|
||||||
|
return ret
|
||||||
|
|
||||||
|
|
||||||
|
class YearSeason(Rule):
|
||||||
|
"""
|
||||||
|
If a year is found, no season found, and episode is found, create an match with season.
|
||||||
|
"""
|
||||||
|
priority = POST_PROCESS
|
||||||
|
consequence = AppendMatch
|
||||||
|
|
||||||
|
def when(self, matches, context):
|
||||||
|
ret = []
|
||||||
|
if not matches.named('season') and matches.named('episode'):
|
||||||
|
for year in matches.named('year'):
|
||||||
|
season = copy.copy(year)
|
||||||
|
season.name = 'season'
|
||||||
|
ret.append(season)
|
||||||
|
return ret
|
||||||
|
|
||||||
|
|
||||||
|
class Processors(CustomRule):
|
||||||
|
"""
|
||||||
|
Empty rule for ordering post_processing properly.
|
||||||
|
"""
|
||||||
|
priority = POST_PROCESS
|
||||||
|
|
||||||
|
def when(self, matches, context):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def then(self, matches, when_response, context): # pragma: no cover
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class StripSeparators(CustomRule):
|
||||||
|
"""
|
||||||
|
Strip separators from matches. Keep separators if they are from acronyms, like in ".S.H.I.E.L.D."
|
||||||
|
"""
|
||||||
|
priority = POST_PROCESS
|
||||||
|
|
||||||
|
def when(self, matches, context):
|
||||||
|
return matches
|
||||||
|
|
||||||
|
def then(self, matches, when_response, context): # pragma: no cover
|
||||||
|
for match in matches:
|
||||||
|
for _ in range(0, len(match.span)):
|
||||||
|
if match.raw[0] in seps_no_groups and (len(match.raw) < 3 or match.raw[2] not in seps_no_groups):
|
||||||
|
match.raw_start += 1
|
||||||
|
|
||||||
|
for _ in reversed(range(0, len(match.span))):
|
||||||
|
if match.raw[-1] in seps_no_groups and (len(match.raw) < 3 or match.raw[-3] not in seps_no_groups):
|
||||||
|
match.raw_end -= 1
|
||||||
|
|
||||||
|
|
||||||
|
def processors(config): # pylint:disable=unused-argument
|
||||||
|
"""
|
||||||
|
Builder for rebulk object.
|
||||||
|
|
||||||
|
:param config: rule configuration
|
||||||
|
:type config: dict
|
||||||
|
:return: Created Rebulk object
|
||||||
|
:rtype: Rebulk
|
||||||
|
"""
|
||||||
|
return Rebulk().rules(EnlargeGroupMatches, EquivalentHoles,
|
||||||
|
RemoveLessSpecificSeasonEpisode('season'),
|
||||||
|
RemoveLessSpecificSeasonEpisode('episode'),
|
||||||
|
RemoveAmbiguous, SeasonYear, YearSeason, Processors, StripSeparators)
|
||||||
5
lib/guessit/rules/properties/__init__.py
Normal file
5
lib/guessit/rules/properties/__init__.py
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
"""
|
||||||
|
Properties
|
||||||
|
"""
|
||||||
235
lib/guessit/rules/properties/audio_codec.py
Normal file
235
lib/guessit/rules/properties/audio_codec.py
Normal file
@@ -0,0 +1,235 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
"""
|
||||||
|
audio_codec, audio_profile and audio_channels property
|
||||||
|
"""
|
||||||
|
from rebulk import Rebulk, Rule, RemoveMatch
|
||||||
|
from rebulk.remodule import re
|
||||||
|
|
||||||
|
from ..common import dash
|
||||||
|
from ..common.pattern import is_disabled
|
||||||
|
from ..common.validators import seps_before, seps_after
|
||||||
|
|
||||||
|
audio_properties = ['audio_codec', 'audio_profile', 'audio_channels']
|
||||||
|
|
||||||
|
|
||||||
|
def audio_codec(config): # pylint:disable=unused-argument
|
||||||
|
"""
|
||||||
|
Builder for rebulk object.
|
||||||
|
|
||||||
|
:param config: rule configuration
|
||||||
|
:type config: dict
|
||||||
|
:return: Created Rebulk object
|
||||||
|
:rtype: Rebulk
|
||||||
|
"""
|
||||||
|
rebulk = Rebulk()\
|
||||||
|
.regex_defaults(flags=re.IGNORECASE, abbreviations=[dash])\
|
||||||
|
.string_defaults(ignore_case=True)
|
||||||
|
|
||||||
|
def audio_codec_priority(match1, match2):
|
||||||
|
"""
|
||||||
|
Gives priority to audio_codec
|
||||||
|
:param match1:
|
||||||
|
:type match1:
|
||||||
|
:param match2:
|
||||||
|
:type match2:
|
||||||
|
:return:
|
||||||
|
:rtype:
|
||||||
|
"""
|
||||||
|
if match1.name == 'audio_codec' and match2.name in ['audio_profile', 'audio_channels']:
|
||||||
|
return match2
|
||||||
|
if match1.name in ['audio_profile', 'audio_channels'] and match2.name == 'audio_codec':
|
||||||
|
return match1
|
||||||
|
return '__default__'
|
||||||
|
|
||||||
|
rebulk.defaults(name='audio_codec',
|
||||||
|
conflict_solver=audio_codec_priority,
|
||||||
|
disabled=lambda context: is_disabled(context, 'audio_codec'))
|
||||||
|
|
||||||
|
rebulk.regex("MP3", "LAME", r"LAME(?:\d)+-?(?:\d)+", value="MP3")
|
||||||
|
rebulk.string("MP2", value="MP2")
|
||||||
|
rebulk.regex('Dolby', 'DolbyDigital', 'Dolby-Digital', 'DD', 'AC3D?', value='Dolby Digital')
|
||||||
|
rebulk.regex('Dolby-?Atmos', 'Atmos', value='Dolby Atmos')
|
||||||
|
rebulk.string("AAC", value="AAC")
|
||||||
|
rebulk.string('EAC3', 'DDP', 'DD+', value='Dolby Digital Plus')
|
||||||
|
rebulk.string("Flac", value="FLAC")
|
||||||
|
rebulk.string("DTS", value="DTS")
|
||||||
|
rebulk.regex('DTS-?HD', 'DTS(?=-?MA)', value='DTS-HD',
|
||||||
|
conflict_solver=lambda match, other: other if other.name == 'audio_codec' else '__default__')
|
||||||
|
rebulk.regex('True-?HD', value='Dolby TrueHD')
|
||||||
|
rebulk.string('Opus', value='Opus')
|
||||||
|
rebulk.string('Vorbis', value='Vorbis')
|
||||||
|
rebulk.string('PCM', value='PCM')
|
||||||
|
rebulk.string('LPCM', value='LPCM')
|
||||||
|
|
||||||
|
rebulk.defaults(clear=True,
|
||||||
|
name='audio_profile',
|
||||||
|
disabled=lambda context: is_disabled(context, 'audio_profile'))
|
||||||
|
rebulk.string('MA', value='Master Audio', tags=['audio_profile.rule', 'DTS-HD'])
|
||||||
|
rebulk.string('HR', 'HRA', value='High Resolution Audio', tags=['audio_profile.rule', 'DTS-HD'])
|
||||||
|
rebulk.string('ES', value='Extended Surround', tags=['audio_profile.rule', 'DTS'])
|
||||||
|
rebulk.string('HE', value='High Efficiency', tags=['audio_profile.rule', 'AAC'])
|
||||||
|
rebulk.string('LC', value='Low Complexity', tags=['audio_profile.rule', 'AAC'])
|
||||||
|
rebulk.string('HQ', value='High Quality', tags=['audio_profile.rule', 'Dolby Digital'])
|
||||||
|
rebulk.string('EX', value='EX', tags=['audio_profile.rule', 'Dolby Digital'])
|
||||||
|
|
||||||
|
rebulk.defaults(clear=True,
|
||||||
|
name="audio_channels",
|
||||||
|
disabled=lambda context: is_disabled(context, 'audio_channels'))
|
||||||
|
rebulk.regex('7[01]', value='7.1', validator=seps_after, tags='weak-audio_channels')
|
||||||
|
rebulk.regex('5[01]', value='5.1', validator=seps_after, tags='weak-audio_channels')
|
||||||
|
rebulk.string('20', value='2.0', validator=seps_after, tags='weak-audio_channels')
|
||||||
|
|
||||||
|
for value, items in config.get('audio_channels').items():
|
||||||
|
for item in items:
|
||||||
|
if item.startswith('re:'):
|
||||||
|
rebulk.regex(item[3:], value=value, children=True)
|
||||||
|
else:
|
||||||
|
rebulk.string(item, value=value)
|
||||||
|
|
||||||
|
rebulk.rules(DtsHDRule, DtsRule, AacRule, DolbyDigitalRule, AudioValidatorRule, HqConflictRule,
|
||||||
|
AudioChannelsValidatorRule)
|
||||||
|
|
||||||
|
return rebulk
|
||||||
|
|
||||||
|
|
||||||
|
class AudioValidatorRule(Rule):
|
||||||
|
"""
|
||||||
|
Remove audio properties if not surrounded by separators and not next each others
|
||||||
|
"""
|
||||||
|
priority = 64
|
||||||
|
consequence = RemoveMatch
|
||||||
|
|
||||||
|
def when(self, matches, context):
|
||||||
|
ret = []
|
||||||
|
|
||||||
|
audio_list = matches.range(predicate=lambda match: match.name in audio_properties)
|
||||||
|
for audio in audio_list:
|
||||||
|
if not seps_before(audio):
|
||||||
|
valid_before = matches.range(audio.start - 1, audio.start,
|
||||||
|
lambda match: match.name in audio_properties)
|
||||||
|
if not valid_before:
|
||||||
|
ret.append(audio)
|
||||||
|
continue
|
||||||
|
if not seps_after(audio):
|
||||||
|
valid_after = matches.range(audio.end, audio.end + 1,
|
||||||
|
lambda match: match.name in audio_properties)
|
||||||
|
if not valid_after:
|
||||||
|
ret.append(audio)
|
||||||
|
continue
|
||||||
|
|
||||||
|
return ret
|
||||||
|
|
||||||
|
|
||||||
|
class AudioProfileRule(Rule):
|
||||||
|
"""
|
||||||
|
Abstract rule to validate audio profiles
|
||||||
|
"""
|
||||||
|
priority = 64
|
||||||
|
dependency = AudioValidatorRule
|
||||||
|
consequence = RemoveMatch
|
||||||
|
|
||||||
|
def __init__(self, codec):
|
||||||
|
super(AudioProfileRule, self).__init__()
|
||||||
|
self.codec = codec
|
||||||
|
|
||||||
|
def enabled(self, context):
|
||||||
|
return not is_disabled(context, 'audio_profile')
|
||||||
|
|
||||||
|
def when(self, matches, context):
|
||||||
|
profile_list = matches.named('audio_profile',
|
||||||
|
lambda match: 'audio_profile.rule' in match.tags and
|
||||||
|
self.codec in match.tags)
|
||||||
|
ret = []
|
||||||
|
for profile in profile_list:
|
||||||
|
codec = matches.at_span(profile.span,
|
||||||
|
lambda match: match.name == 'audio_codec' and
|
||||||
|
match.value == self.codec, 0)
|
||||||
|
if not codec:
|
||||||
|
codec = matches.previous(profile,
|
||||||
|
lambda match: match.name == 'audio_codec' and
|
||||||
|
match.value == self.codec)
|
||||||
|
if not codec:
|
||||||
|
codec = matches.next(profile,
|
||||||
|
lambda match: match.name == 'audio_codec' and
|
||||||
|
match.value == self.codec)
|
||||||
|
if not codec:
|
||||||
|
ret.append(profile)
|
||||||
|
if codec:
|
||||||
|
ret.extend(matches.conflicting(profile))
|
||||||
|
return ret
|
||||||
|
|
||||||
|
|
||||||
|
class DtsHDRule(AudioProfileRule):
|
||||||
|
"""
|
||||||
|
Rule to validate DTS-HD profile
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
super(DtsHDRule, self).__init__('DTS-HD')
|
||||||
|
|
||||||
|
|
||||||
|
class DtsRule(AudioProfileRule):
|
||||||
|
"""
|
||||||
|
Rule to validate DTS profile
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
super(DtsRule, self).__init__('DTS')
|
||||||
|
|
||||||
|
|
||||||
|
class AacRule(AudioProfileRule):
|
||||||
|
"""
|
||||||
|
Rule to validate AAC profile
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
super(AacRule, self).__init__('AAC')
|
||||||
|
|
||||||
|
|
||||||
|
class DolbyDigitalRule(AudioProfileRule):
|
||||||
|
"""
|
||||||
|
Rule to validate Dolby Digital profile
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
super(DolbyDigitalRule, self).__init__('Dolby Digital')
|
||||||
|
|
||||||
|
|
||||||
|
class HqConflictRule(Rule):
|
||||||
|
"""
|
||||||
|
Solve conflict between HQ from other property and from audio_profile.
|
||||||
|
"""
|
||||||
|
|
||||||
|
dependency = [DtsHDRule, DtsRule, AacRule, DolbyDigitalRule]
|
||||||
|
consequence = RemoveMatch
|
||||||
|
|
||||||
|
def enabled(self, context):
|
||||||
|
return not is_disabled(context, 'audio_profile')
|
||||||
|
|
||||||
|
def when(self, matches, context):
|
||||||
|
hq_audio = matches.named('audio_profile', lambda m: m.value == 'High Quality')
|
||||||
|
hq_audio_spans = [match.span for match in hq_audio]
|
||||||
|
return matches.named('other', lambda m: m.span in hq_audio_spans)
|
||||||
|
|
||||||
|
|
||||||
|
class AudioChannelsValidatorRule(Rule):
|
||||||
|
"""
|
||||||
|
Remove audio_channel if no audio codec as previous match.
|
||||||
|
"""
|
||||||
|
priority = 128
|
||||||
|
consequence = RemoveMatch
|
||||||
|
|
||||||
|
def enabled(self, context):
|
||||||
|
return not is_disabled(context, 'audio_channels')
|
||||||
|
|
||||||
|
def when(self, matches, context):
|
||||||
|
ret = []
|
||||||
|
|
||||||
|
for audio_channel in matches.tagged('weak-audio_channels'):
|
||||||
|
valid_before = matches.range(audio_channel.start - 1, audio_channel.start,
|
||||||
|
lambda match: match.name == 'audio_codec')
|
||||||
|
if not valid_before:
|
||||||
|
ret.append(audio_channel)
|
||||||
|
|
||||||
|
return ret
|
||||||
74
lib/guessit/rules/properties/bit_rate.py
Normal file
74
lib/guessit/rules/properties/bit_rate.py
Normal file
@@ -0,0 +1,74 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
"""
|
||||||
|
video_bit_rate and audio_bit_rate properties
|
||||||
|
"""
|
||||||
|
import re
|
||||||
|
|
||||||
|
from rebulk import Rebulk
|
||||||
|
from rebulk.rules import Rule, RemoveMatch, RenameMatch
|
||||||
|
|
||||||
|
from ..common import dash, seps
|
||||||
|
from ..common.pattern import is_disabled
|
||||||
|
from ..common.quantity import BitRate
|
||||||
|
from ..common.validators import seps_surround
|
||||||
|
|
||||||
|
|
||||||
|
def bit_rate(config): # pylint:disable=unused-argument
|
||||||
|
"""
|
||||||
|
Builder for rebulk object.
|
||||||
|
|
||||||
|
:param config: rule configuration
|
||||||
|
:type config: dict
|
||||||
|
:return: Created Rebulk object
|
||||||
|
:rtype: Rebulk
|
||||||
|
"""
|
||||||
|
rebulk = Rebulk(disabled=lambda context: (is_disabled(context, 'audio_bit_rate')
|
||||||
|
and is_disabled(context, 'video_bit_rate')))
|
||||||
|
rebulk = rebulk.regex_defaults(flags=re.IGNORECASE, abbreviations=[dash])
|
||||||
|
rebulk.defaults(name='audio_bit_rate', validator=seps_surround)
|
||||||
|
rebulk.regex(r'\d+-?[kmg]b(ps|its?)', r'\d+\.\d+-?[kmg]b(ps|its?)',
|
||||||
|
conflict_solver=(
|
||||||
|
lambda match, other: match
|
||||||
|
if other.name == 'audio_channels' and 'weak-audio_channels' not in other.tags
|
||||||
|
else other
|
||||||
|
),
|
||||||
|
formatter=BitRate.fromstring, tags=['release-group-prefix'])
|
||||||
|
|
||||||
|
rebulk.rules(BitRateTypeRule)
|
||||||
|
|
||||||
|
return rebulk
|
||||||
|
|
||||||
|
|
||||||
|
class BitRateTypeRule(Rule):
|
||||||
|
"""
|
||||||
|
Convert audio bit rate guess into video bit rate.
|
||||||
|
"""
|
||||||
|
consequence = [RenameMatch('video_bit_rate'), RemoveMatch]
|
||||||
|
|
||||||
|
def when(self, matches, context):
|
||||||
|
to_rename = []
|
||||||
|
to_remove = []
|
||||||
|
|
||||||
|
if is_disabled(context, 'audio_bit_rate'):
|
||||||
|
to_remove.extend(matches.named('audio_bit_rate'))
|
||||||
|
else:
|
||||||
|
video_bit_rate_disabled = is_disabled(context, 'video_bit_rate')
|
||||||
|
for match in matches.named('audio_bit_rate'):
|
||||||
|
previous = matches.previous(match, index=0,
|
||||||
|
predicate=lambda m: m.name in ('source', 'screen_size', 'video_codec'))
|
||||||
|
if previous and not matches.holes(previous.end, match.start, predicate=lambda m: m.value.strip(seps)):
|
||||||
|
after = matches.next(match, index=0, predicate=lambda m: m.name == 'audio_codec')
|
||||||
|
if after and not matches.holes(match.end, after.start, predicate=lambda m: m.value.strip(seps)):
|
||||||
|
bitrate = match.value
|
||||||
|
if bitrate.units == 'Kbps' or (bitrate.units == 'Mbps' and bitrate.magnitude < 10):
|
||||||
|
continue
|
||||||
|
|
||||||
|
if video_bit_rate_disabled:
|
||||||
|
to_remove.append(match)
|
||||||
|
else:
|
||||||
|
to_rename.append(match)
|
||||||
|
|
||||||
|
if to_rename or to_remove:
|
||||||
|
return to_rename, to_remove
|
||||||
|
return False
|
||||||
56
lib/guessit/rules/properties/bonus.py
Normal file
56
lib/guessit/rules/properties/bonus.py
Normal file
@@ -0,0 +1,56 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
"""
|
||||||
|
bonus property
|
||||||
|
"""
|
||||||
|
from rebulk.remodule import re
|
||||||
|
|
||||||
|
from rebulk import Rebulk, AppendMatch, Rule
|
||||||
|
|
||||||
|
from .title import TitleFromPosition
|
||||||
|
from ..common.formatters import cleanup
|
||||||
|
from ..common.pattern import is_disabled
|
||||||
|
from ..common.validators import seps_surround
|
||||||
|
|
||||||
|
|
||||||
|
def bonus(config): # pylint:disable=unused-argument
|
||||||
|
"""
|
||||||
|
Builder for rebulk object.
|
||||||
|
|
||||||
|
:param config: rule configuration
|
||||||
|
:type config: dict
|
||||||
|
:return: Created Rebulk object
|
||||||
|
:rtype: Rebulk
|
||||||
|
"""
|
||||||
|
rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'bonus'))
|
||||||
|
rebulk = rebulk.regex_defaults(flags=re.IGNORECASE)
|
||||||
|
|
||||||
|
rebulk.regex(r'x(\d+)', name='bonus', private_parent=True, children=True, formatter=int,
|
||||||
|
validator={'__parent__': seps_surround},
|
||||||
|
validate_all=True,
|
||||||
|
conflict_solver=lambda match, conflicting: match
|
||||||
|
if conflicting.name in ('video_codec', 'episode') and 'weak-episode' not in conflicting.tags
|
||||||
|
else '__default__')
|
||||||
|
|
||||||
|
rebulk.rules(BonusTitleRule)
|
||||||
|
|
||||||
|
return rebulk
|
||||||
|
|
||||||
|
|
||||||
|
class BonusTitleRule(Rule):
|
||||||
|
"""
|
||||||
|
Find bonus title after bonus.
|
||||||
|
"""
|
||||||
|
dependency = TitleFromPosition
|
||||||
|
consequence = AppendMatch
|
||||||
|
|
||||||
|
properties = {'bonus_title': [None]}
|
||||||
|
|
||||||
|
def when(self, matches, context): # pylint:disable=inconsistent-return-statements
|
||||||
|
bonus_number = matches.named('bonus', lambda match: not match.private, index=0)
|
||||||
|
if bonus_number:
|
||||||
|
filepath = matches.markers.at_match(bonus_number, lambda marker: marker.name == 'path', 0)
|
||||||
|
hole = matches.holes(bonus_number.end, filepath.end + 1, formatter=cleanup, index=0)
|
||||||
|
if hole and hole.value:
|
||||||
|
hole.name = 'bonus_title'
|
||||||
|
return hole
|
||||||
41
lib/guessit/rules/properties/cds.py
Normal file
41
lib/guessit/rules/properties/cds.py
Normal file
@@ -0,0 +1,41 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
"""
|
||||||
|
cd and cd_count properties
|
||||||
|
"""
|
||||||
|
from rebulk.remodule import re
|
||||||
|
|
||||||
|
from rebulk import Rebulk
|
||||||
|
|
||||||
|
from ..common import dash
|
||||||
|
from ..common.pattern import is_disabled
|
||||||
|
|
||||||
|
|
||||||
|
def cds(config): # pylint:disable=unused-argument
|
||||||
|
"""
|
||||||
|
Builder for rebulk object.
|
||||||
|
|
||||||
|
:param config: rule configuration
|
||||||
|
:type config: dict
|
||||||
|
:return: Created Rebulk object
|
||||||
|
:rtype: Rebulk
|
||||||
|
"""
|
||||||
|
rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'cd'))
|
||||||
|
rebulk = rebulk.regex_defaults(flags=re.IGNORECASE, abbreviations=[dash])
|
||||||
|
|
||||||
|
rebulk.regex(r'cd-?(?P<cd>\d+)(?:-?of-?(?P<cd_count>\d+))?',
|
||||||
|
validator={'cd': lambda match: 0 < match.value < 100,
|
||||||
|
'cd_count': lambda match: 0 < match.value < 100},
|
||||||
|
formatter={'cd': int, 'cd_count': int},
|
||||||
|
children=True,
|
||||||
|
private_parent=True,
|
||||||
|
properties={'cd': [None], 'cd_count': [None]})
|
||||||
|
rebulk.regex(r'(?P<cd_count>\d+)-?cds?',
|
||||||
|
validator={'cd': lambda match: 0 < match.value < 100,
|
||||||
|
'cd_count': lambda match: 0 < match.value < 100},
|
||||||
|
formatter={'cd_count': int},
|
||||||
|
children=True,
|
||||||
|
private_parent=True,
|
||||||
|
properties={'cd': [None], 'cd_count': [None]})
|
||||||
|
|
||||||
|
return rebulk
|
||||||
61
lib/guessit/rules/properties/container.py
Normal file
61
lib/guessit/rules/properties/container.py
Normal file
@@ -0,0 +1,61 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
"""
|
||||||
|
container property
|
||||||
|
"""
|
||||||
|
from rebulk.remodule import re
|
||||||
|
|
||||||
|
from rebulk import Rebulk
|
||||||
|
|
||||||
|
from ..common import seps
|
||||||
|
from ..common.pattern import is_disabled
|
||||||
|
from ..common.validators import seps_surround
|
||||||
|
from ...reutils import build_or_pattern
|
||||||
|
|
||||||
|
|
||||||
|
def container(config):
|
||||||
|
"""
|
||||||
|
Builder for rebulk object.
|
||||||
|
|
||||||
|
:param config: rule configuration
|
||||||
|
:type config: dict
|
||||||
|
:return: Created Rebulk object
|
||||||
|
:rtype: Rebulk
|
||||||
|
"""
|
||||||
|
rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'container'))
|
||||||
|
rebulk = rebulk.regex_defaults(flags=re.IGNORECASE).string_defaults(ignore_case=True)
|
||||||
|
rebulk.defaults(name='container',
|
||||||
|
formatter=lambda value: value.strip(seps),
|
||||||
|
tags=['extension'],
|
||||||
|
conflict_solver=lambda match, other: other
|
||||||
|
if other.name in ('source', 'video_codec') or
|
||||||
|
other.name == 'container' and 'extension' not in other.tags
|
||||||
|
else '__default__')
|
||||||
|
|
||||||
|
subtitles = config['subtitles']
|
||||||
|
info = config['info']
|
||||||
|
videos = config['videos']
|
||||||
|
torrent = config['torrent']
|
||||||
|
nzb = config['nzb']
|
||||||
|
|
||||||
|
rebulk.regex(r'\.'+build_or_pattern(subtitles)+'$', exts=subtitles, tags=['extension', 'subtitle'])
|
||||||
|
rebulk.regex(r'\.'+build_or_pattern(info)+'$', exts=info, tags=['extension', 'info'])
|
||||||
|
rebulk.regex(r'\.'+build_or_pattern(videos)+'$', exts=videos, tags=['extension', 'video'])
|
||||||
|
rebulk.regex(r'\.'+build_or_pattern(torrent)+'$', exts=torrent, tags=['extension', 'torrent'])
|
||||||
|
rebulk.regex(r'\.'+build_or_pattern(nzb)+'$', exts=nzb, tags=['extension', 'nzb'])
|
||||||
|
|
||||||
|
rebulk.defaults(clear=True,
|
||||||
|
name='container',
|
||||||
|
validator=seps_surround,
|
||||||
|
formatter=lambda s: s.lower(),
|
||||||
|
conflict_solver=lambda match, other: match
|
||||||
|
if other.name in ('source',
|
||||||
|
'video_codec') or other.name == 'container' and 'extension' in other.tags
|
||||||
|
else '__default__')
|
||||||
|
|
||||||
|
rebulk.string(*[sub for sub in subtitles if sub not in ('sub', 'ass')], tags=['subtitle'])
|
||||||
|
rebulk.string(*videos, tags=['video'])
|
||||||
|
rebulk.string(*torrent, tags=['torrent'])
|
||||||
|
rebulk.string(*nzb, tags=['nzb'])
|
||||||
|
|
||||||
|
return rebulk
|
||||||
114
lib/guessit/rules/properties/country.py
Normal file
114
lib/guessit/rules/properties/country.py
Normal file
@@ -0,0 +1,114 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
"""
|
||||||
|
country property
|
||||||
|
"""
|
||||||
|
# pylint: disable=no-member
|
||||||
|
import babelfish
|
||||||
|
|
||||||
|
from rebulk import Rebulk
|
||||||
|
from ..common.pattern import is_disabled
|
||||||
|
from ..common.words import iter_words
|
||||||
|
|
||||||
|
|
||||||
|
def country(config, common_words):
|
||||||
|
"""
|
||||||
|
Builder for rebulk object.
|
||||||
|
|
||||||
|
:param config: rule configuration
|
||||||
|
:type config: dict
|
||||||
|
:param common_words: common words
|
||||||
|
:type common_words: set
|
||||||
|
:return: Created Rebulk object
|
||||||
|
:rtype: Rebulk
|
||||||
|
"""
|
||||||
|
rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'country'))
|
||||||
|
rebulk = rebulk.defaults(name='country')
|
||||||
|
|
||||||
|
def find_countries(string, context=None):
|
||||||
|
"""
|
||||||
|
Find countries in given string.
|
||||||
|
"""
|
||||||
|
allowed_countries = context.get('allowed_countries') if context else None
|
||||||
|
return CountryFinder(allowed_countries, common_words).find(string)
|
||||||
|
|
||||||
|
rebulk.functional(find_countries,
|
||||||
|
# Prefer language and any other property over country if not US or GB.
|
||||||
|
conflict_solver=lambda match, other: match
|
||||||
|
if other.name != 'language' or match.value not in (babelfish.Country('US'),
|
||||||
|
babelfish.Country('GB'))
|
||||||
|
else other,
|
||||||
|
properties={'country': [None]},
|
||||||
|
disabled=lambda context: not context.get('allowed_countries'))
|
||||||
|
|
||||||
|
babelfish.country_converters['guessit'] = GuessitCountryConverter(config['synonyms'])
|
||||||
|
|
||||||
|
return rebulk
|
||||||
|
|
||||||
|
|
||||||
|
class GuessitCountryConverter(babelfish.CountryReverseConverter): # pylint: disable=missing-docstring
|
||||||
|
def __init__(self, synonyms):
|
||||||
|
self.guessit_exceptions = {}
|
||||||
|
|
||||||
|
for alpha2, synlist in synonyms.items():
|
||||||
|
for syn in synlist:
|
||||||
|
self.guessit_exceptions[syn.lower()] = alpha2
|
||||||
|
|
||||||
|
@property
|
||||||
|
def codes(self): # pylint: disable=missing-docstring
|
||||||
|
return (babelfish.country_converters['name'].codes |
|
||||||
|
frozenset(babelfish.COUNTRIES.values()) |
|
||||||
|
frozenset(self.guessit_exceptions.keys()))
|
||||||
|
|
||||||
|
def convert(self, alpha2):
|
||||||
|
if alpha2 == 'GB':
|
||||||
|
return 'UK'
|
||||||
|
return str(babelfish.Country(alpha2))
|
||||||
|
|
||||||
|
def reverse(self, name): # pylint:disable=arguments-differ
|
||||||
|
# exceptions come first, as they need to override a potential match
|
||||||
|
# with any of the other guessers
|
||||||
|
try:
|
||||||
|
return self.guessit_exceptions[name.lower()]
|
||||||
|
except KeyError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
try:
|
||||||
|
return babelfish.Country(name.upper()).alpha2
|
||||||
|
except ValueError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
for conv in [babelfish.Country.fromname]:
|
||||||
|
try:
|
||||||
|
return conv(name).alpha2
|
||||||
|
except babelfish.CountryReverseError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
raise babelfish.CountryReverseError(name)
|
||||||
|
|
||||||
|
|
||||||
|
class CountryFinder(object):
|
||||||
|
"""Helper class to search and return country matches."""
|
||||||
|
|
||||||
|
def __init__(self, allowed_countries, common_words):
|
||||||
|
self.allowed_countries = {l.lower() for l in allowed_countries or []}
|
||||||
|
self.common_words = common_words
|
||||||
|
|
||||||
|
def find(self, string):
|
||||||
|
"""Return all matches for country."""
|
||||||
|
for word_match in iter_words(string.strip().lower()):
|
||||||
|
word = word_match.value
|
||||||
|
if word.lower() in self.common_words:
|
||||||
|
continue
|
||||||
|
|
||||||
|
try:
|
||||||
|
country_object = babelfish.Country.fromguessit(word)
|
||||||
|
if (country_object.name.lower() in self.allowed_countries or
|
||||||
|
country_object.alpha2.lower() in self.allowed_countries):
|
||||||
|
yield self._to_rebulk_match(word_match, country_object)
|
||||||
|
except babelfish.Error:
|
||||||
|
continue
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def _to_rebulk_match(cls, word, value):
|
||||||
|
return word.span[0], word.span[1], {'value': value}
|
||||||
90
lib/guessit/rules/properties/crc.py
Normal file
90
lib/guessit/rules/properties/crc.py
Normal file
@@ -0,0 +1,90 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
"""
|
||||||
|
crc and uuid properties
|
||||||
|
"""
|
||||||
|
from rebulk.remodule import re
|
||||||
|
|
||||||
|
from rebulk import Rebulk
|
||||||
|
from ..common.pattern import is_disabled
|
||||||
|
from ..common.validators import seps_surround
|
||||||
|
|
||||||
|
|
||||||
|
def crc(config): # pylint:disable=unused-argument
|
||||||
|
"""
|
||||||
|
Builder for rebulk object.
|
||||||
|
|
||||||
|
:param config: rule configuration
|
||||||
|
:type config: dict
|
||||||
|
:return: Created Rebulk object
|
||||||
|
:rtype: Rebulk
|
||||||
|
"""
|
||||||
|
rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'crc32'))
|
||||||
|
rebulk = rebulk.regex_defaults(flags=re.IGNORECASE)
|
||||||
|
rebulk.defaults(validator=seps_surround)
|
||||||
|
|
||||||
|
rebulk.regex('(?:[a-fA-F]|[0-9]){8}', name='crc32',
|
||||||
|
conflict_solver=lambda match, other: other
|
||||||
|
if other.name in ['episode', 'season']
|
||||||
|
else '__default__')
|
||||||
|
|
||||||
|
rebulk.functional(guess_idnumber, name='uuid',
|
||||||
|
conflict_solver=lambda match, other: match
|
||||||
|
if other.name in ['episode', 'season']
|
||||||
|
else '__default__')
|
||||||
|
return rebulk
|
||||||
|
|
||||||
|
|
||||||
|
_DIGIT = 0
|
||||||
|
_LETTER = 1
|
||||||
|
_OTHER = 2
|
||||||
|
|
||||||
|
_idnum = re.compile(r'(?P<uuid>[a-zA-Z0-9-]{20,})') # 1.0, (0, 0))
|
||||||
|
|
||||||
|
|
||||||
|
def guess_idnumber(string):
|
||||||
|
"""
|
||||||
|
Guess id number function
|
||||||
|
:param string:
|
||||||
|
:type string:
|
||||||
|
:return:
|
||||||
|
:rtype:
|
||||||
|
"""
|
||||||
|
# pylint:disable=invalid-name
|
||||||
|
ret = []
|
||||||
|
|
||||||
|
matches = list(_idnum.finditer(string))
|
||||||
|
for match in matches:
|
||||||
|
result = match.groupdict()
|
||||||
|
switch_count = 0
|
||||||
|
switch_letter_count = 0
|
||||||
|
letter_count = 0
|
||||||
|
last_letter = None
|
||||||
|
|
||||||
|
last = _LETTER
|
||||||
|
for c in result['uuid']:
|
||||||
|
if c in '0123456789':
|
||||||
|
ci = _DIGIT
|
||||||
|
elif c in 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ':
|
||||||
|
ci = _LETTER
|
||||||
|
if c != last_letter:
|
||||||
|
switch_letter_count += 1
|
||||||
|
last_letter = c
|
||||||
|
letter_count += 1
|
||||||
|
else:
|
||||||
|
ci = _OTHER
|
||||||
|
|
||||||
|
if ci != last:
|
||||||
|
switch_count += 1
|
||||||
|
|
||||||
|
last = ci
|
||||||
|
|
||||||
|
# only return the result as probable if we alternate often between
|
||||||
|
# char type (more likely for hash values than for common words)
|
||||||
|
switch_ratio = float(switch_count) / len(result['uuid'])
|
||||||
|
letters_ratio = (float(switch_letter_count) / letter_count) if letter_count > 0 else 1
|
||||||
|
|
||||||
|
if switch_ratio > 0.4 and letters_ratio > 0.4:
|
||||||
|
ret.append(match.span())
|
||||||
|
|
||||||
|
return ret
|
||||||
84
lib/guessit/rules/properties/date.py
Normal file
84
lib/guessit/rules/properties/date.py
Normal file
@@ -0,0 +1,84 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
"""
|
||||||
|
date and year properties
|
||||||
|
"""
|
||||||
|
from rebulk import Rebulk, RemoveMatch, Rule
|
||||||
|
|
||||||
|
from ..common.date import search_date, valid_year
|
||||||
|
from ..common.pattern import is_disabled
|
||||||
|
from ..common.validators import seps_surround
|
||||||
|
|
||||||
|
|
||||||
|
def date(config): # pylint:disable=unused-argument
|
||||||
|
"""
|
||||||
|
Builder for rebulk object.
|
||||||
|
|
||||||
|
:param config: rule configuration
|
||||||
|
:type config: dict
|
||||||
|
:return: Created Rebulk object
|
||||||
|
:rtype: Rebulk
|
||||||
|
"""
|
||||||
|
rebulk = Rebulk().defaults(validator=seps_surround)
|
||||||
|
|
||||||
|
rebulk.regex(r"\d{4}", name="year", formatter=int,
|
||||||
|
disabled=lambda context: is_disabled(context, 'year'),
|
||||||
|
conflict_solver=lambda match, other: other
|
||||||
|
if other.name in ('episode', 'season') and len(other.raw) < len(match.raw)
|
||||||
|
else '__default__',
|
||||||
|
validator=lambda match: seps_surround(match) and valid_year(match.value))
|
||||||
|
|
||||||
|
def date_functional(string, context): # pylint:disable=inconsistent-return-statements
|
||||||
|
"""
|
||||||
|
Search for date in the string and retrieves match
|
||||||
|
|
||||||
|
:param string:
|
||||||
|
:return:
|
||||||
|
"""
|
||||||
|
|
||||||
|
ret = search_date(string, context.get('date_year_first'), context.get('date_day_first'))
|
||||||
|
if ret:
|
||||||
|
return ret[0], ret[1], {'value': ret[2]}
|
||||||
|
|
||||||
|
rebulk.functional(date_functional, name="date", properties={'date': [None]},
|
||||||
|
disabled=lambda context: is_disabled(context, 'date'),
|
||||||
|
conflict_solver=lambda match, other: other
|
||||||
|
if other.name in ('episode', 'season', 'crc32')
|
||||||
|
else '__default__')
|
||||||
|
|
||||||
|
rebulk.rules(KeepMarkedYearInFilepart)
|
||||||
|
|
||||||
|
return rebulk
|
||||||
|
|
||||||
|
|
||||||
|
class KeepMarkedYearInFilepart(Rule):
|
||||||
|
"""
|
||||||
|
Keep first years marked with [](){} in filepart, or if no year is marked, ensure it won't override titles.
|
||||||
|
"""
|
||||||
|
priority = 64
|
||||||
|
consequence = RemoveMatch
|
||||||
|
|
||||||
|
def enabled(self, context):
|
||||||
|
return not is_disabled(context, 'year')
|
||||||
|
|
||||||
|
def when(self, matches, context):
|
||||||
|
ret = []
|
||||||
|
if len(matches.named('year')) > 1:
|
||||||
|
for filepart in matches.markers.named('path'):
|
||||||
|
years = matches.range(filepart.start, filepart.end, lambda match: match.name == 'year')
|
||||||
|
if len(years) > 1:
|
||||||
|
group_years = []
|
||||||
|
ungroup_years = []
|
||||||
|
for year in years:
|
||||||
|
if matches.markers.at_match(year, lambda marker: marker.name == 'group'):
|
||||||
|
group_years.append(year)
|
||||||
|
else:
|
||||||
|
ungroup_years.append(year)
|
||||||
|
if group_years and ungroup_years:
|
||||||
|
ret.extend(ungroup_years)
|
||||||
|
ret.extend(group_years[1:]) # Keep the first year in marker.
|
||||||
|
elif not group_years:
|
||||||
|
ret.append(ungroup_years[0]) # Keep first year for title.
|
||||||
|
if len(ungroup_years) > 2:
|
||||||
|
ret.extend(ungroup_years[2:])
|
||||||
|
return ret
|
||||||
52
lib/guessit/rules/properties/edition.py
Normal file
52
lib/guessit/rules/properties/edition.py
Normal file
@@ -0,0 +1,52 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
"""
|
||||||
|
edition property
|
||||||
|
"""
|
||||||
|
from rebulk.remodule import re
|
||||||
|
|
||||||
|
from rebulk import Rebulk
|
||||||
|
from ..common import dash
|
||||||
|
from ..common.pattern import is_disabled
|
||||||
|
from ..common.validators import seps_surround
|
||||||
|
|
||||||
|
|
||||||
|
def edition(config): # pylint:disable=unused-argument
|
||||||
|
"""
|
||||||
|
Builder for rebulk object.
|
||||||
|
|
||||||
|
:param config: rule configuration
|
||||||
|
:type config: dict
|
||||||
|
:return: Created Rebulk object
|
||||||
|
:rtype: Rebulk
|
||||||
|
"""
|
||||||
|
rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'edition'))
|
||||||
|
rebulk = rebulk.regex_defaults(flags=re.IGNORECASE, abbreviations=[dash]).string_defaults(ignore_case=True)
|
||||||
|
rebulk.defaults(name='edition', validator=seps_surround)
|
||||||
|
|
||||||
|
rebulk.regex('collector', "collector'?s?-edition", 'edition-collector', value='Collector')
|
||||||
|
rebulk.regex('special-edition', 'edition-special', value='Special',
|
||||||
|
conflict_solver=lambda match, other: other
|
||||||
|
if other.name == 'episode_details' and other.value == 'Special'
|
||||||
|
else '__default__')
|
||||||
|
rebulk.string('se', value='Special', tags='has-neighbor')
|
||||||
|
rebulk.string('ddc', value="Director's Definitive Cut")
|
||||||
|
rebulk.regex('criterion-edition', 'edition-criterion', 'CC', value='Criterion')
|
||||||
|
rebulk.regex('deluxe', 'deluxe-edition', 'edition-deluxe', value='Deluxe')
|
||||||
|
rebulk.regex('limited', 'limited-edition', value='Limited', tags=['has-neighbor', 'release-group-prefix'])
|
||||||
|
rebulk.regex(r'theatrical-cut', r'theatrical-edition', r'theatrical', value='Theatrical')
|
||||||
|
rebulk.regex(r"director'?s?-cut", r"director'?s?-cut-edition", r"edition-director'?s?-cut", 'DC',
|
||||||
|
value="Director's Cut")
|
||||||
|
rebulk.regex('extended', 'extended-?cut', 'extended-?version',
|
||||||
|
value='Extended', tags=['has-neighbor', 'release-group-prefix'])
|
||||||
|
rebulk.regex('alternat(e|ive)(?:-?Cut)?', value='Alternative Cut', tags=['has-neighbor', 'release-group-prefix'])
|
||||||
|
for value in ('Remastered', 'Uncensored', 'Uncut', 'Unrated'):
|
||||||
|
rebulk.string(value, value=value, tags=['has-neighbor', 'release-group-prefix'])
|
||||||
|
rebulk.string('Festival', value='Festival', tags=['has-neighbor-before', 'has-neighbor-after'])
|
||||||
|
rebulk.regex('imax', 'imax-edition', value='IMAX')
|
||||||
|
rebulk.regex('fan-edit(?:ion)?', 'fan-collection', value='Fan')
|
||||||
|
rebulk.regex('ultimate-edition', value='Ultimate')
|
||||||
|
rebulk.regex("ultimate-collector'?s?-edition", value=['Ultimate', 'Collector'])
|
||||||
|
rebulk.regex('ultimate-fan-edit(?:ion)?', 'ultimate-fan-collection', value=['Ultimate', 'Fan'])
|
||||||
|
|
||||||
|
return rebulk
|
||||||
300
lib/guessit/rules/properties/episode_title.py
Normal file
300
lib/guessit/rules/properties/episode_title.py
Normal file
@@ -0,0 +1,300 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
"""
|
||||||
|
Episode title
|
||||||
|
"""
|
||||||
|
from collections import defaultdict
|
||||||
|
|
||||||
|
from rebulk import Rebulk, Rule, AppendMatch, RemoveMatch, RenameMatch, POST_PROCESS
|
||||||
|
|
||||||
|
from ..common import seps, title_seps
|
||||||
|
from ..common.formatters import cleanup
|
||||||
|
from ..common.pattern import is_disabled
|
||||||
|
from ..common.validators import or_
|
||||||
|
from ..properties.title import TitleFromPosition, TitleBaseRule
|
||||||
|
from ..properties.type import TypeProcessor
|
||||||
|
|
||||||
|
|
||||||
|
def episode_title(config): # pylint:disable=unused-argument
|
||||||
|
"""
|
||||||
|
Builder for rebulk object.
|
||||||
|
|
||||||
|
:param config: rule configuration
|
||||||
|
:type config: dict
|
||||||
|
:return: Created Rebulk object
|
||||||
|
:rtype: Rebulk
|
||||||
|
"""
|
||||||
|
previous_names = ('episode', 'episode_count',
|
||||||
|
'season', 'season_count', 'date', 'title', 'year')
|
||||||
|
|
||||||
|
rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'episode_title'))
|
||||||
|
rebulk = rebulk.rules(RemoveConflictsWithEpisodeTitle(previous_names),
|
||||||
|
EpisodeTitleFromPosition(previous_names),
|
||||||
|
AlternativeTitleReplace(previous_names),
|
||||||
|
TitleToEpisodeTitle,
|
||||||
|
Filepart3EpisodeTitle,
|
||||||
|
Filepart2EpisodeTitle,
|
||||||
|
RenameEpisodeTitleWhenMovieType)
|
||||||
|
return rebulk
|
||||||
|
|
||||||
|
|
||||||
|
class RemoveConflictsWithEpisodeTitle(Rule):
|
||||||
|
"""
|
||||||
|
Remove conflicting matches that might lead to wrong episode_title parsing.
|
||||||
|
"""
|
||||||
|
|
||||||
|
priority = 64
|
||||||
|
consequence = RemoveMatch
|
||||||
|
|
||||||
|
def __init__(self, previous_names):
|
||||||
|
super(RemoveConflictsWithEpisodeTitle, self).__init__()
|
||||||
|
self.previous_names = previous_names
|
||||||
|
self.next_names = ('streaming_service', 'screen_size', 'source',
|
||||||
|
'video_codec', 'audio_codec', 'other', 'container')
|
||||||
|
self.affected_if_holes_after = ('part', )
|
||||||
|
self.affected_names = ('part', 'year')
|
||||||
|
|
||||||
|
def when(self, matches, context):
|
||||||
|
to_remove = []
|
||||||
|
for filepart in matches.markers.named('path'):
|
||||||
|
for match in matches.range(filepart.start, filepart.end,
|
||||||
|
predicate=lambda m: m.name in self.affected_names):
|
||||||
|
before = matches.range(filepart.start, match.start, predicate=lambda m: not m.private, index=-1)
|
||||||
|
if not before or before.name not in self.previous_names:
|
||||||
|
continue
|
||||||
|
|
||||||
|
after = matches.range(match.end, filepart.end, predicate=lambda m: not m.private, index=0)
|
||||||
|
if not after or after.name not in self.next_names:
|
||||||
|
continue
|
||||||
|
|
||||||
|
group = matches.markers.at_match(match, predicate=lambda m: m.name == 'group', index=0)
|
||||||
|
|
||||||
|
def has_value_in_same_group(current_match, current_group=group):
|
||||||
|
"""Return true if current match has value and belongs to the current group."""
|
||||||
|
return current_match.value.strip(seps) and (
|
||||||
|
current_group == matches.markers.at_match(current_match,
|
||||||
|
predicate=lambda mm: mm.name == 'group', index=0)
|
||||||
|
)
|
||||||
|
|
||||||
|
holes_before = matches.holes(before.end, match.start, predicate=has_value_in_same_group)
|
||||||
|
holes_after = matches.holes(match.end, after.start, predicate=has_value_in_same_group)
|
||||||
|
|
||||||
|
if not holes_before and not holes_after:
|
||||||
|
continue
|
||||||
|
|
||||||
|
if match.name in self.affected_if_holes_after and not holes_after:
|
||||||
|
continue
|
||||||
|
|
||||||
|
to_remove.append(match)
|
||||||
|
if match.parent:
|
||||||
|
to_remove.append(match.parent)
|
||||||
|
|
||||||
|
return to_remove
|
||||||
|
|
||||||
|
|
||||||
|
class TitleToEpisodeTitle(Rule):
|
||||||
|
"""
|
||||||
|
If multiple different title are found, convert the one following episode number to episode_title.
|
||||||
|
"""
|
||||||
|
dependency = TitleFromPosition
|
||||||
|
|
||||||
|
def when(self, matches, context):
|
||||||
|
titles = matches.named('title')
|
||||||
|
title_groups = defaultdict(list)
|
||||||
|
for title in titles:
|
||||||
|
title_groups[title.value].append(title)
|
||||||
|
|
||||||
|
episode_titles = []
|
||||||
|
if len(title_groups) < 2:
|
||||||
|
return episode_titles
|
||||||
|
|
||||||
|
for title in titles:
|
||||||
|
if matches.previous(title, lambda match: match.name == 'episode'):
|
||||||
|
episode_titles.append(title)
|
||||||
|
|
||||||
|
return episode_titles
|
||||||
|
|
||||||
|
def then(self, matches, when_response, context):
|
||||||
|
for title in when_response:
|
||||||
|
matches.remove(title)
|
||||||
|
title.name = 'episode_title'
|
||||||
|
matches.append(title)
|
||||||
|
|
||||||
|
|
||||||
|
class EpisodeTitleFromPosition(TitleBaseRule):
|
||||||
|
"""
|
||||||
|
Add episode title match in existing matches
|
||||||
|
Must run after TitleFromPosition rule.
|
||||||
|
"""
|
||||||
|
dependency = TitleToEpisodeTitle
|
||||||
|
|
||||||
|
def __init__(self, previous_names):
|
||||||
|
super(EpisodeTitleFromPosition, self).__init__('episode_title', ['title'])
|
||||||
|
self.previous_names = previous_names
|
||||||
|
|
||||||
|
def hole_filter(self, hole, matches):
|
||||||
|
episode = matches.previous(hole,
|
||||||
|
lambda previous: previous.named(*self.previous_names),
|
||||||
|
0)
|
||||||
|
|
||||||
|
crc32 = matches.named('crc32')
|
||||||
|
|
||||||
|
return episode or crc32
|
||||||
|
|
||||||
|
def filepart_filter(self, filepart, matches):
|
||||||
|
# Filepart where title was found.
|
||||||
|
if matches.range(filepart.start, filepart.end, lambda match: match.name == 'title'):
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
def should_remove(self, match, matches, filepart, hole, context):
|
||||||
|
if match.name == 'episode_details':
|
||||||
|
return False
|
||||||
|
return super(EpisodeTitleFromPosition, self).should_remove(match, matches, filepart, hole, context)
|
||||||
|
|
||||||
|
def when(self, matches, context): # pylint:disable=inconsistent-return-statements
|
||||||
|
if matches.named('episode_title'):
|
||||||
|
return
|
||||||
|
return super(EpisodeTitleFromPosition, self).when(matches, context)
|
||||||
|
|
||||||
|
|
||||||
|
class AlternativeTitleReplace(Rule):
|
||||||
|
"""
|
||||||
|
If alternateTitle was found and title is next to episode, season or date, replace it with episode_title.
|
||||||
|
"""
|
||||||
|
dependency = EpisodeTitleFromPosition
|
||||||
|
consequence = RenameMatch
|
||||||
|
|
||||||
|
def __init__(self, previous_names):
|
||||||
|
super(AlternativeTitleReplace, self).__init__()
|
||||||
|
self.previous_names = previous_names
|
||||||
|
|
||||||
|
def when(self, matches, context): # pylint:disable=inconsistent-return-statements
|
||||||
|
if matches.named('episode_title'):
|
||||||
|
return
|
||||||
|
|
||||||
|
alternative_title = matches.range(predicate=lambda match: match.name == 'alternative_title', index=0)
|
||||||
|
if alternative_title:
|
||||||
|
main_title = matches.chain_before(alternative_title.start, seps=seps,
|
||||||
|
predicate=lambda match: 'title' in match.tags, index=0)
|
||||||
|
if main_title:
|
||||||
|
episode = matches.previous(main_title,
|
||||||
|
lambda previous: previous.named(*self.previous_names),
|
||||||
|
0)
|
||||||
|
|
||||||
|
crc32 = matches.named('crc32')
|
||||||
|
|
||||||
|
if episode or crc32:
|
||||||
|
return alternative_title
|
||||||
|
|
||||||
|
def then(self, matches, when_response, context):
|
||||||
|
matches.remove(when_response)
|
||||||
|
when_response.name = 'episode_title'
|
||||||
|
when_response.tags.append('alternative-replaced')
|
||||||
|
matches.append(when_response)
|
||||||
|
|
||||||
|
|
||||||
|
class RenameEpisodeTitleWhenMovieType(Rule):
|
||||||
|
"""
|
||||||
|
Rename episode_title by alternative_title when type is movie.
|
||||||
|
"""
|
||||||
|
priority = POST_PROCESS
|
||||||
|
|
||||||
|
dependency = TypeProcessor
|
||||||
|
consequence = RenameMatch
|
||||||
|
|
||||||
|
def when(self, matches, context): # pylint:disable=inconsistent-return-statements
|
||||||
|
if matches.named('episode_title', lambda m: 'alternative-replaced' not in m.tags) \
|
||||||
|
and not matches.named('type', lambda m: m.value == 'episode'):
|
||||||
|
return matches.named('episode_title')
|
||||||
|
|
||||||
|
def then(self, matches, when_response, context):
|
||||||
|
for match in when_response:
|
||||||
|
matches.remove(match)
|
||||||
|
match.name = 'alternative_title'
|
||||||
|
matches.append(match)
|
||||||
|
|
||||||
|
|
||||||
|
class Filepart3EpisodeTitle(Rule):
|
||||||
|
"""
|
||||||
|
If we have at least 3 filepart structured like this:
|
||||||
|
|
||||||
|
Serie name/SO1/E01-episode_title.mkv
|
||||||
|
AAAAAAAAAA/BBB/CCCCCCCCCCCCCCCCCCCC
|
||||||
|
|
||||||
|
Serie name/SO1/episode_title-E01.mkv
|
||||||
|
AAAAAAAAAA/BBB/CCCCCCCCCCCCCCCCCCCC
|
||||||
|
|
||||||
|
If CCCC contains episode and BBB contains seasonNumber
|
||||||
|
Then title is to be found in AAAA.
|
||||||
|
"""
|
||||||
|
consequence = AppendMatch('title')
|
||||||
|
|
||||||
|
def when(self, matches, context): # pylint:disable=inconsistent-return-statements
|
||||||
|
if matches.tagged('filepart-title'):
|
||||||
|
return
|
||||||
|
|
||||||
|
fileparts = matches.markers.named('path')
|
||||||
|
if len(fileparts) < 3:
|
||||||
|
return
|
||||||
|
|
||||||
|
filename = fileparts[-1]
|
||||||
|
directory = fileparts[-2]
|
||||||
|
subdirectory = fileparts[-3]
|
||||||
|
|
||||||
|
episode_number = matches.range(filename.start, filename.end, lambda match: match.name == 'episode', 0)
|
||||||
|
if episode_number:
|
||||||
|
season = matches.range(directory.start, directory.end, lambda match: match.name == 'season', 0)
|
||||||
|
|
||||||
|
if season:
|
||||||
|
hole = matches.holes(subdirectory.start, subdirectory.end,
|
||||||
|
ignore=or_(lambda match: 'weak-episode' in match.tags, TitleBaseRule.is_ignored),
|
||||||
|
formatter=cleanup, seps=title_seps, predicate=lambda match: match.value,
|
||||||
|
index=0)
|
||||||
|
if hole:
|
||||||
|
return hole
|
||||||
|
|
||||||
|
|
||||||
|
class Filepart2EpisodeTitle(Rule):
|
||||||
|
"""
|
||||||
|
If we have at least 2 filepart structured like this:
|
||||||
|
|
||||||
|
Serie name SO1/E01-episode_title.mkv
|
||||||
|
AAAAAAAAAAAAA/BBBBBBBBBBBBBBBBBBBBB
|
||||||
|
|
||||||
|
If BBBB contains episode and AAA contains a hole followed by seasonNumber
|
||||||
|
then title is to be found in AAAA.
|
||||||
|
|
||||||
|
or
|
||||||
|
|
||||||
|
Serie name/SO1E01-episode_title.mkv
|
||||||
|
AAAAAAAAAA/BBBBBBBBBBBBBBBBBBBBB
|
||||||
|
|
||||||
|
If BBBB contains season and episode and AAA contains a hole
|
||||||
|
then title is to be found in AAAA.
|
||||||
|
"""
|
||||||
|
consequence = AppendMatch('title')
|
||||||
|
|
||||||
|
def when(self, matches, context): # pylint:disable=inconsistent-return-statements
|
||||||
|
if matches.tagged('filepart-title'):
|
||||||
|
return
|
||||||
|
|
||||||
|
fileparts = matches.markers.named('path')
|
||||||
|
if len(fileparts) < 2:
|
||||||
|
return
|
||||||
|
|
||||||
|
filename = fileparts[-1]
|
||||||
|
directory = fileparts[-2]
|
||||||
|
|
||||||
|
episode_number = matches.range(filename.start, filename.end, lambda match: match.name == 'episode', 0)
|
||||||
|
if episode_number:
|
||||||
|
season = (matches.range(directory.start, directory.end, lambda match: match.name == 'season', 0) or
|
||||||
|
matches.range(filename.start, filename.end, lambda match: match.name == 'season', 0))
|
||||||
|
if season:
|
||||||
|
hole = matches.holes(directory.start, directory.end,
|
||||||
|
ignore=or_(lambda match: 'weak-episode' in match.tags, TitleBaseRule.is_ignored),
|
||||||
|
formatter=cleanup, seps=title_seps,
|
||||||
|
predicate=lambda match: match.value, index=0)
|
||||||
|
if hole:
|
||||||
|
hole.tags.append('filepart-title')
|
||||||
|
return hole
|
||||||
912
lib/guessit/rules/properties/episodes.py
Normal file
912
lib/guessit/rules/properties/episodes.py
Normal file
@@ -0,0 +1,912 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
"""
|
||||||
|
episode, season, disc, episode_count, season_count and episode_details properties
|
||||||
|
"""
|
||||||
|
import copy
|
||||||
|
from collections import defaultdict
|
||||||
|
|
||||||
|
from rebulk import Rebulk, RemoveMatch, Rule, AppendMatch, RenameMatch
|
||||||
|
from rebulk.match import Match
|
||||||
|
from rebulk.remodule import re
|
||||||
|
from rebulk.utils import is_iterable
|
||||||
|
|
||||||
|
from guessit.rules import match_processors
|
||||||
|
from guessit.rules.common.numeral import parse_numeral, numeral
|
||||||
|
from .title import TitleFromPosition
|
||||||
|
from ..common import dash, alt_dash, seps, seps_no_fs
|
||||||
|
from ..common.formatters import strip
|
||||||
|
from ..common.pattern import is_disabled
|
||||||
|
from ..common.validators import seps_surround, int_coercable, and_
|
||||||
|
from ...reutils import build_or_pattern
|
||||||
|
|
||||||
|
|
||||||
|
def episodes(config):
|
||||||
|
"""
|
||||||
|
Builder for rebulk object.
|
||||||
|
|
||||||
|
:param config: rule configuration
|
||||||
|
:type config: dict
|
||||||
|
:return: Created Rebulk object
|
||||||
|
:rtype: Rebulk
|
||||||
|
"""
|
||||||
|
|
||||||
|
# pylint: disable=too-many-branches,too-many-statements,too-many-locals
|
||||||
|
def is_season_episode_disabled(context):
|
||||||
|
"""Whether season and episode rules should be enabled."""
|
||||||
|
return is_disabled(context, 'episode') or is_disabled(context, 'season')
|
||||||
|
|
||||||
|
def episodes_season_chain_breaker(matches):
|
||||||
|
"""
|
||||||
|
Break chains if there's more than 100 offset between two neighbor values.
|
||||||
|
:param matches:
|
||||||
|
:type matches:
|
||||||
|
:return:
|
||||||
|
:rtype:
|
||||||
|
"""
|
||||||
|
eps = matches.named('episode')
|
||||||
|
if len(eps) > 1 and abs(eps[-1].value - eps[-2].value) > episode_max_range:
|
||||||
|
return True
|
||||||
|
|
||||||
|
seasons = matches.named('season')
|
||||||
|
if len(seasons) > 1 and abs(seasons[-1].value - seasons[-2].value) > season_max_range:
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
def season_episode_conflict_solver(match, other):
|
||||||
|
"""
|
||||||
|
Conflict solver for episode/season patterns
|
||||||
|
|
||||||
|
:param match:
|
||||||
|
:param other:
|
||||||
|
:return:
|
||||||
|
"""
|
||||||
|
if match.name != other.name:
|
||||||
|
if match.name == 'episode' and other.name == 'year':
|
||||||
|
return match
|
||||||
|
if match.name in ('season', 'episode'):
|
||||||
|
if other.name in ('video_codec', 'audio_codec', 'container', 'date'):
|
||||||
|
return match
|
||||||
|
if (other.name == 'audio_channels' and 'weak-audio_channels' not in other.tags
|
||||||
|
and not match.initiator.children.named(match.name + 'Marker')) or (
|
||||||
|
other.name == 'screen_size' and not int_coercable(other.raw)):
|
||||||
|
return match
|
||||||
|
if other.name in ('season', 'episode') and match.initiator != other.initiator:
|
||||||
|
if (match.initiator.name in ('weak_episode', 'weak_duplicate')
|
||||||
|
and other.initiator.name in ('weak_episode', 'weak_duplicate')):
|
||||||
|
return '__default__'
|
||||||
|
for current in (match, other):
|
||||||
|
if 'weak-episode' in current.tags or 'x' in current.initiator.raw.lower():
|
||||||
|
return current
|
||||||
|
return '__default__'
|
||||||
|
|
||||||
|
def ordering_validator(match):
|
||||||
|
"""
|
||||||
|
Validator for season list. They should be in natural order to be validated.
|
||||||
|
|
||||||
|
episode/season separated by a weak discrete separator should be consecutive, unless a strong discrete separator
|
||||||
|
or a range separator is present in the chain (1.3&5 is valid, but 1.3-5 is not valid and 1.3.5 is not valid)
|
||||||
|
"""
|
||||||
|
values = match.children.to_dict()
|
||||||
|
if 'season' in values and is_iterable(values['season']):
|
||||||
|
# Season numbers must be in natural order to be validated.
|
||||||
|
if not list(sorted(values['season'])) == values['season']:
|
||||||
|
return False
|
||||||
|
if 'episode' in values and is_iterable(values['episode']):
|
||||||
|
# Season numbers must be in natural order to be validated.
|
||||||
|
if not list(sorted(values['episode'])) == values['episode']:
|
||||||
|
return False
|
||||||
|
|
||||||
|
def is_consecutive(property_name):
|
||||||
|
"""
|
||||||
|
Check if the property season or episode has valid consecutive values.
|
||||||
|
:param property_name:
|
||||||
|
:type property_name:
|
||||||
|
:return:
|
||||||
|
:rtype:
|
||||||
|
"""
|
||||||
|
previous_match = None
|
||||||
|
valid = True
|
||||||
|
for current_match in match.children.named(property_name):
|
||||||
|
if previous_match:
|
||||||
|
match.children.previous(current_match,
|
||||||
|
lambda m: m.name == property_name + 'Separator')
|
||||||
|
separator = match.children.previous(current_match,
|
||||||
|
lambda m: m.name == property_name + 'Separator', 0)
|
||||||
|
if separator:
|
||||||
|
if separator.raw not in range_separators and separator.raw in weak_discrete_separators:
|
||||||
|
if not 0 < current_match.value - previous_match.value <= max_range_gap + 1:
|
||||||
|
valid = False
|
||||||
|
if separator.raw in strong_discrete_separators:
|
||||||
|
valid = True
|
||||||
|
break
|
||||||
|
previous_match = current_match
|
||||||
|
return valid
|
||||||
|
|
||||||
|
return is_consecutive('episode') and is_consecutive('season')
|
||||||
|
|
||||||
|
def validate_roman(match):
|
||||||
|
"""
|
||||||
|
Validate a roman match if surrounded by separators
|
||||||
|
:param match:
|
||||||
|
:type match:
|
||||||
|
:return:
|
||||||
|
:rtype:
|
||||||
|
"""
|
||||||
|
if int_coercable(match.raw):
|
||||||
|
return True
|
||||||
|
return seps_surround(match)
|
||||||
|
|
||||||
|
season_words = config['season_words']
|
||||||
|
episode_words = config['episode_words']
|
||||||
|
of_words = config['of_words']
|
||||||
|
all_words = config['all_words']
|
||||||
|
season_markers = config['season_markers']
|
||||||
|
season_ep_markers = config['season_ep_markers']
|
||||||
|
disc_markers = config['disc_markers']
|
||||||
|
episode_markers = config['episode_markers']
|
||||||
|
range_separators = config['range_separators']
|
||||||
|
weak_discrete_separators = list(sep for sep in seps_no_fs if sep not in range_separators)
|
||||||
|
strong_discrete_separators = config['discrete_separators']
|
||||||
|
discrete_separators = strong_discrete_separators + weak_discrete_separators
|
||||||
|
episode_max_range = config['episode_max_range']
|
||||||
|
season_max_range = config['season_max_range']
|
||||||
|
max_range_gap = config['max_range_gap']
|
||||||
|
|
||||||
|
rebulk = Rebulk() \
|
||||||
|
.regex_defaults(flags=re.IGNORECASE) \
|
||||||
|
.string_defaults(ignore_case=True) \
|
||||||
|
.chain_defaults(chain_breaker=episodes_season_chain_breaker) \
|
||||||
|
.defaults(private_names=['episodeSeparator', 'seasonSeparator', 'episodeMarker', 'seasonMarker'],
|
||||||
|
formatter={'season': int, 'episode': int, 'version': int, 'count': int},
|
||||||
|
children=True,
|
||||||
|
private_parent=True,
|
||||||
|
conflict_solver=season_episode_conflict_solver,
|
||||||
|
abbreviations=[alt_dash])
|
||||||
|
|
||||||
|
# S01E02, 01x02, S01S02S03
|
||||||
|
rebulk.chain(
|
||||||
|
tags=['SxxExx'],
|
||||||
|
validate_all=True,
|
||||||
|
validator={'__parent__': and_(seps_surround, ordering_validator)},
|
||||||
|
disabled=is_season_episode_disabled) \
|
||||||
|
.defaults(tags=['SxxExx']) \
|
||||||
|
.regex(build_or_pattern(season_markers, name='seasonMarker') + r'(?P<season>\d+)@?' +
|
||||||
|
build_or_pattern(episode_markers + disc_markers, name='episodeMarker') + r'@?(?P<episode>\d+)')\
|
||||||
|
.repeater('+') \
|
||||||
|
.regex(build_or_pattern(episode_markers + disc_markers + discrete_separators + range_separators,
|
||||||
|
name='episodeSeparator',
|
||||||
|
escape=True) +
|
||||||
|
r'(?P<episode>\d+)').repeater('*')
|
||||||
|
|
||||||
|
rebulk.chain(tags=['SxxExx'],
|
||||||
|
validate_all=True,
|
||||||
|
validator={'__parent__': and_(seps_surround, ordering_validator)},
|
||||||
|
disabled=is_season_episode_disabled) \
|
||||||
|
.defaults(tags=['SxxExx']) \
|
||||||
|
.regex(r'(?P<season>\d+)@?' +
|
||||||
|
build_or_pattern(season_ep_markers, name='episodeMarker') +
|
||||||
|
r'@?(?P<episode>\d+)').repeater('+') \
|
||||||
|
|
||||||
|
rebulk.chain(tags=['SxxExx'],
|
||||||
|
validate_all=True,
|
||||||
|
validator={'__parent__': and_(seps_surround, ordering_validator)},
|
||||||
|
disabled=is_season_episode_disabled) \
|
||||||
|
.defaults(tags=['SxxExx']) \
|
||||||
|
.regex(r'(?P<season>\d+)@?' +
|
||||||
|
build_or_pattern(season_ep_markers, name='episodeMarker') +
|
||||||
|
r'@?(?P<episode>\d+)') \
|
||||||
|
.regex(build_or_pattern(season_ep_markers + discrete_separators + range_separators,
|
||||||
|
name='episodeSeparator',
|
||||||
|
escape=True) +
|
||||||
|
r'(?P<episode>\d+)').repeater('*')
|
||||||
|
|
||||||
|
rebulk.chain(tags=['SxxExx'],
|
||||||
|
validate_all=True,
|
||||||
|
validator={'__parent__': and_(seps_surround, ordering_validator)},
|
||||||
|
disabled=is_season_episode_disabled) \
|
||||||
|
.defaults(tags=['SxxExx']) \
|
||||||
|
.regex(build_or_pattern(season_markers, name='seasonMarker') + r'(?P<season>\d+)') \
|
||||||
|
.regex('(?P<other>Extras)', name='other', value='Extras', tags=['no-release-group-prefix']).repeater('?') \
|
||||||
|
.regex(build_or_pattern(season_markers + discrete_separators + range_separators,
|
||||||
|
name='seasonSeparator',
|
||||||
|
escape=True) +
|
||||||
|
r'(?P<season>\d+)').repeater('*')
|
||||||
|
|
||||||
|
# episode_details property
|
||||||
|
for episode_detail in ('Special', 'Pilot', 'Unaired', 'Final'):
|
||||||
|
rebulk.string(episode_detail,
|
||||||
|
private_parent=False,
|
||||||
|
children=False,
|
||||||
|
value=episode_detail,
|
||||||
|
name='episode_details',
|
||||||
|
disabled=lambda context: is_disabled(context, 'episode_details'))
|
||||||
|
|
||||||
|
rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator', 'episodeMarker', 'seasonMarker'],
|
||||||
|
validate_all=True,
|
||||||
|
validator={'__parent__': and_(seps_surround, ordering_validator)},
|
||||||
|
children=True,
|
||||||
|
private_parent=True,
|
||||||
|
conflict_solver=season_episode_conflict_solver)
|
||||||
|
|
||||||
|
rebulk.chain(validate_all=True,
|
||||||
|
conflict_solver=season_episode_conflict_solver,
|
||||||
|
formatter={'season': parse_numeral, 'count': parse_numeral},
|
||||||
|
validator={'__parent__': and_(seps_surround, ordering_validator),
|
||||||
|
'season': validate_roman,
|
||||||
|
'count': validate_roman},
|
||||||
|
disabled=lambda context: context.get('type') == 'movie' or is_disabled(context, 'season')) \
|
||||||
|
.defaults(formatter={'season': parse_numeral, 'count': parse_numeral},
|
||||||
|
validator={'season': validate_roman, 'count': validate_roman},
|
||||||
|
conflict_solver=season_episode_conflict_solver) \
|
||||||
|
.regex(build_or_pattern(season_words, name='seasonMarker') + '@?(?P<season>' + numeral + ')') \
|
||||||
|
.regex(r'' + build_or_pattern(of_words) + '@?(?P<count>' + numeral + ')').repeater('?') \
|
||||||
|
.regex(r'@?' + build_or_pattern(range_separators + discrete_separators + ['@'],
|
||||||
|
name='seasonSeparator', escape=True) +
|
||||||
|
r'@?(?P<season>\d+)').repeater('*')
|
||||||
|
|
||||||
|
rebulk.defaults(abbreviations=[dash])
|
||||||
|
|
||||||
|
rebulk.regex(build_or_pattern(episode_words, name='episodeMarker') + r'-?(?P<episode>\d+)' +
|
||||||
|
r'(?:v(?P<version>\d+))?' +
|
||||||
|
r'(?:-?' + build_or_pattern(of_words) + r'-?(?P<count>\d+))?', # Episode 4
|
||||||
|
disabled=lambda context: context.get('type') == 'episode' or is_disabled(context, 'episode'))
|
||||||
|
|
||||||
|
rebulk.regex(build_or_pattern(episode_words, name='episodeMarker') + r'-?(?P<episode>' + numeral + ')' +
|
||||||
|
r'(?:v(?P<version>\d+))?' +
|
||||||
|
r'(?:-?' + build_or_pattern(of_words) + r'-?(?P<count>\d+))?', # Episode 4
|
||||||
|
validator={'episode': validate_roman},
|
||||||
|
formatter={'episode': parse_numeral},
|
||||||
|
disabled=lambda context: context.get('type') != 'episode' or is_disabled(context, 'episode'))
|
||||||
|
|
||||||
|
rebulk.regex(r'S?(?P<season>\d+)-?(?:xE|Ex|E|x)-?(?P<other>' + build_or_pattern(all_words) + ')',
|
||||||
|
tags=['SxxExx'],
|
||||||
|
formatter={'other': lambda match: 'Complete'},
|
||||||
|
disabled=lambda context: is_disabled(context, 'season'))
|
||||||
|
|
||||||
|
# 12, 13
|
||||||
|
rebulk.chain(tags=['weak-episode'],
|
||||||
|
disabled=lambda context: context.get('type') == 'movie' or is_disabled(context, 'episode')) \
|
||||||
|
.defaults(validator=None, tags=['weak-episode']) \
|
||||||
|
.regex(r'(?P<episode>\d{2})') \
|
||||||
|
.regex(r'v(?P<version>\d+)').repeater('?') \
|
||||||
|
.regex(r'(?P<episodeSeparator>[x-])(?P<episode>\d{2})', abbreviations=None).repeater('*')
|
||||||
|
|
||||||
|
# 012, 013
|
||||||
|
rebulk.chain(tags=['weak-episode'],
|
||||||
|
disabled=lambda context: context.get('type') == 'movie' or is_disabled(context, 'episode')) \
|
||||||
|
.defaults(validator=None, tags=['weak-episode']) \
|
||||||
|
.regex(r'0(?P<episode>\d{1,2})') \
|
||||||
|
.regex(r'v(?P<version>\d+)').repeater('?') \
|
||||||
|
.regex(r'(?P<episodeSeparator>[x-])0(?P<episode>\d{1,2})', abbreviations=None).repeater('*')
|
||||||
|
|
||||||
|
# 112, 113
|
||||||
|
rebulk.chain(tags=['weak-episode'],
|
||||||
|
name='weak_episode',
|
||||||
|
disabled=lambda context: context.get('type') == 'movie' or is_disabled(context, 'episode')) \
|
||||||
|
.defaults(validator=None, tags=['weak-episode'], name='weak_episode') \
|
||||||
|
.regex(r'(?P<episode>\d{3,4})') \
|
||||||
|
.regex(r'v(?P<version>\d+)').repeater('?') \
|
||||||
|
.regex(r'(?P<episodeSeparator>[x-])(?P<episode>\d{3,4})', abbreviations=None).repeater('*')
|
||||||
|
|
||||||
|
# 1, 2, 3
|
||||||
|
rebulk.chain(tags=['weak-episode'],
|
||||||
|
disabled=lambda context: context.get('type') != 'episode' or is_disabled(context, 'episode')) \
|
||||||
|
.defaults(validator=None, tags=['weak-episode']) \
|
||||||
|
.regex(r'(?P<episode>\d)') \
|
||||||
|
.regex(r'v(?P<version>\d+)').repeater('?') \
|
||||||
|
.regex(r'(?P<episodeSeparator>[x-])(?P<episode>\d{1,2})', abbreviations=None).repeater('*')
|
||||||
|
|
||||||
|
# e112, e113, 1e18, 3e19
|
||||||
|
rebulk.chain(disabled=lambda context: is_disabled(context, 'episode')) \
|
||||||
|
.defaults(validator=None) \
|
||||||
|
.regex(r'(?P<season>\d{1,2})?(?P<episodeMarker>e)(?P<episode>\d{1,4})') \
|
||||||
|
.regex(r'v(?P<version>\d+)').repeater('?') \
|
||||||
|
.regex(r'(?P<episodeSeparator>e|x|-)(?P<episode>\d{1,4})', abbreviations=None).repeater('*')
|
||||||
|
|
||||||
|
# ep 112, ep113, ep112, ep113
|
||||||
|
rebulk.chain(disabled=lambda context: is_disabled(context, 'episode')) \
|
||||||
|
.defaults(validator=None) \
|
||||||
|
.regex(r'ep-?(?P<episode>\d{1,4})') \
|
||||||
|
.regex(r'v(?P<version>\d+)').repeater('?') \
|
||||||
|
.regex(r'(?P<episodeSeparator>ep|e|x|-)(?P<episode>\d{1,4})', abbreviations=None).repeater('*')
|
||||||
|
|
||||||
|
# cap 112, cap 112_114
|
||||||
|
rebulk.chain(tags=['see-pattern'],
|
||||||
|
disabled=is_season_episode_disabled) \
|
||||||
|
.defaults(validator=None, tags=['see-pattern']) \
|
||||||
|
.regex(r'(?P<seasonMarker>cap)-?(?P<season>\d{1,2})(?P<episode>\d{2})') \
|
||||||
|
.regex(r'(?P<episodeSeparator>-)(?P<season>\d{1,2})(?P<episode>\d{2})').repeater('?')
|
||||||
|
|
||||||
|
# 102, 0102
|
||||||
|
rebulk.chain(tags=['weak-episode', 'weak-duplicate'],
|
||||||
|
name='weak_duplicate',
|
||||||
|
conflict_solver=season_episode_conflict_solver,
|
||||||
|
disabled=lambda context: (context.get('episode_prefer_number', False) or
|
||||||
|
context.get('type') == 'movie') or is_season_episode_disabled(context)) \
|
||||||
|
.defaults(tags=['weak-episode', 'weak-duplicate'],
|
||||||
|
name='weak_duplicate',
|
||||||
|
validator=None,
|
||||||
|
conflict_solver=season_episode_conflict_solver) \
|
||||||
|
.regex(r'(?P<season>\d{1,2})(?P<episode>\d{2})') \
|
||||||
|
.regex(r'v(?P<version>\d+)').repeater('?') \
|
||||||
|
.regex(r'(?P<episodeSeparator>x|-)(?P<episode>\d{2})', abbreviations=None).repeater('*')
|
||||||
|
|
||||||
|
rebulk.regex(r'v(?P<version>\d+)',
|
||||||
|
formatter=int,
|
||||||
|
disabled=lambda context: is_disabled(context, 'version'))
|
||||||
|
|
||||||
|
rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator'])
|
||||||
|
|
||||||
|
# TODO: List of words
|
||||||
|
# detached of X count (season/episode)
|
||||||
|
rebulk.regex(r'(?P<episode>\d+)-?' + build_or_pattern(of_words) +
|
||||||
|
r'-?(?P<count>\d+)-?' + build_or_pattern(episode_words) + '?',
|
||||||
|
formatter=int,
|
||||||
|
pre_match_processor=match_processors.strip,
|
||||||
|
disabled=lambda context: is_disabled(context, 'episode'))
|
||||||
|
|
||||||
|
rebulk.regex(r'Minisodes?',
|
||||||
|
children=False,
|
||||||
|
private_parent=False,
|
||||||
|
name='episode_format',
|
||||||
|
value="Minisode",
|
||||||
|
disabled=lambda context: is_disabled(context, 'episode_format'))
|
||||||
|
|
||||||
|
rebulk.rules(WeakConflictSolver, RemoveInvalidSeason, RemoveInvalidEpisode,
|
||||||
|
SeePatternRange(range_separators + ['_']),
|
||||||
|
EpisodeNumberSeparatorRange(range_separators),
|
||||||
|
SeasonSeparatorRange(range_separators), RemoveWeakIfMovie, RemoveWeakIfSxxExx, RemoveWeakDuplicate,
|
||||||
|
EpisodeDetailValidator, RemoveDetachedEpisodeNumber, VersionValidator, RemoveWeak(episode_words),
|
||||||
|
RenameToAbsoluteEpisode, CountValidator, EpisodeSingleDigitValidator, RenameToDiscMatch)
|
||||||
|
|
||||||
|
return rebulk
|
||||||
|
|
||||||
|
|
||||||
|
class WeakConflictSolver(Rule):
|
||||||
|
"""
|
||||||
|
Rule to decide whether weak-episode or weak-duplicate matches should be kept.
|
||||||
|
|
||||||
|
If an anime is detected:
|
||||||
|
- weak-duplicate matches should be removed
|
||||||
|
- weak-episode matches should be tagged as anime
|
||||||
|
Otherwise:
|
||||||
|
- weak-episode matches are removed unless they're part of an episode range match.
|
||||||
|
"""
|
||||||
|
priority = 128
|
||||||
|
consequence = [RemoveMatch, AppendMatch]
|
||||||
|
|
||||||
|
def enabled(self, context):
|
||||||
|
return context.get('type') != 'movie'
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def is_anime(cls, matches):
|
||||||
|
"""Return True if it seems to be an anime.
|
||||||
|
|
||||||
|
Anime characteristics:
|
||||||
|
- version, crc32 matches
|
||||||
|
- screen_size inside brackets
|
||||||
|
- release_group at start and inside brackets
|
||||||
|
"""
|
||||||
|
if matches.named('version') or matches.named('crc32'):
|
||||||
|
return True
|
||||||
|
|
||||||
|
for group in matches.markers.named('group'):
|
||||||
|
if matches.range(group.start, group.end, predicate=lambda m: m.name == 'screen_size'):
|
||||||
|
return True
|
||||||
|
if matches.markers.starting(group.start, predicate=lambda m: m.name == 'path'):
|
||||||
|
hole = matches.holes(group.start, group.end, index=0)
|
||||||
|
if hole and hole.raw == group.raw:
|
||||||
|
return True
|
||||||
|
|
||||||
|
return False
|
||||||
|
|
||||||
|
def when(self, matches, context):
|
||||||
|
to_remove = []
|
||||||
|
to_append = []
|
||||||
|
anime_detected = self.is_anime(matches)
|
||||||
|
for filepart in matches.markers.named('path'):
|
||||||
|
weak_matches = matches.range(filepart.start, filepart.end, predicate=(
|
||||||
|
lambda m: m.initiator.name == 'weak_episode'))
|
||||||
|
weak_dup_matches = matches.range(filepart.start, filepart.end, predicate=(
|
||||||
|
lambda m: m.initiator.name == 'weak_duplicate'))
|
||||||
|
if anime_detected:
|
||||||
|
if weak_matches:
|
||||||
|
to_remove.extend(weak_dup_matches)
|
||||||
|
for match in matches.range(filepart.start, filepart.end, predicate=(
|
||||||
|
lambda m: m.name == 'episode' and m.initiator.name != 'weak_duplicate')):
|
||||||
|
episode = copy.copy(match)
|
||||||
|
episode.tags = episode.tags + ['anime']
|
||||||
|
to_append.append(episode)
|
||||||
|
to_remove.append(match)
|
||||||
|
elif weak_dup_matches:
|
||||||
|
episodes_in_range = matches.range(filepart.start, filepart.end, predicate=(
|
||||||
|
lambda m:
|
||||||
|
m.name == 'episode' and m.initiator.name == 'weak_episode'
|
||||||
|
and m.initiator.children.named('episodeSeparator')
|
||||||
|
))
|
||||||
|
if not episodes_in_range and not matches.range(filepart.start, filepart.end,
|
||||||
|
predicate=lambda m: 'SxxExx' in m.tags):
|
||||||
|
to_remove.extend(weak_matches)
|
||||||
|
else:
|
||||||
|
for match in episodes_in_range:
|
||||||
|
episode = copy.copy(match)
|
||||||
|
episode.tags = []
|
||||||
|
to_append.append(episode)
|
||||||
|
to_remove.append(match)
|
||||||
|
|
||||||
|
if to_append:
|
||||||
|
to_remove.extend(weak_dup_matches)
|
||||||
|
|
||||||
|
if to_remove or to_append:
|
||||||
|
return to_remove, to_append
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
class CountValidator(Rule):
|
||||||
|
"""
|
||||||
|
Validate count property and rename it
|
||||||
|
"""
|
||||||
|
priority = 64
|
||||||
|
consequence = [RemoveMatch, RenameMatch('episode_count'), RenameMatch('season_count')]
|
||||||
|
|
||||||
|
properties = {'episode_count': [None], 'season_count': [None]}
|
||||||
|
|
||||||
|
def when(self, matches, context):
|
||||||
|
to_remove = []
|
||||||
|
episode_count = []
|
||||||
|
season_count = []
|
||||||
|
|
||||||
|
for count in matches.named('count'):
|
||||||
|
previous = matches.previous(count, lambda match: match.name in ['episode', 'season'], 0)
|
||||||
|
if previous:
|
||||||
|
if previous.name == 'episode':
|
||||||
|
episode_count.append(count)
|
||||||
|
elif previous.name == 'season':
|
||||||
|
season_count.append(count)
|
||||||
|
else:
|
||||||
|
to_remove.append(count)
|
||||||
|
if to_remove or episode_count or season_count:
|
||||||
|
return to_remove, episode_count, season_count
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
class SeePatternRange(Rule):
|
||||||
|
"""
|
||||||
|
Create matches for episode range for SEE pattern. E.g.: Cap.102_104
|
||||||
|
"""
|
||||||
|
priority = 128
|
||||||
|
consequence = [RemoveMatch, AppendMatch]
|
||||||
|
|
||||||
|
def __init__(self, range_separators):
|
||||||
|
super(SeePatternRange, self).__init__()
|
||||||
|
self.range_separators = range_separators
|
||||||
|
|
||||||
|
def when(self, matches, context):
|
||||||
|
to_remove = []
|
||||||
|
to_append = []
|
||||||
|
|
||||||
|
for separator in matches.tagged('see-pattern', lambda m: m.name == 'episodeSeparator'):
|
||||||
|
previous_match = matches.previous(separator, lambda m: m.name == 'episode' and 'see-pattern' in m.tags, 0)
|
||||||
|
next_match = matches.next(separator, lambda m: m.name == 'season' and 'see-pattern' in m.tags, 0)
|
||||||
|
if not next_match:
|
||||||
|
continue
|
||||||
|
|
||||||
|
next_match = matches.next(next_match, lambda m: m.name == 'episode' and 'see-pattern' in m.tags, 0)
|
||||||
|
if previous_match and next_match and separator.value in self.range_separators:
|
||||||
|
to_remove.append(next_match)
|
||||||
|
|
||||||
|
for episode_number in range(previous_match.value + 1, next_match.value + 1):
|
||||||
|
match = copy.copy(next_match)
|
||||||
|
match.value = episode_number
|
||||||
|
to_append.append(match)
|
||||||
|
|
||||||
|
to_remove.append(separator)
|
||||||
|
|
||||||
|
if to_remove or to_append:
|
||||||
|
return to_remove, to_append
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
class AbstractSeparatorRange(Rule):
|
||||||
|
"""
|
||||||
|
Remove separator matches and create matches for season range.
|
||||||
|
"""
|
||||||
|
priority = 128
|
||||||
|
consequence = [RemoveMatch, AppendMatch]
|
||||||
|
|
||||||
|
def __init__(self, range_separators, property_name):
|
||||||
|
super(AbstractSeparatorRange, self).__init__()
|
||||||
|
self.range_separators = range_separators
|
||||||
|
self.property_name = property_name
|
||||||
|
|
||||||
|
def when(self, matches, context):
|
||||||
|
to_remove = []
|
||||||
|
to_append = []
|
||||||
|
|
||||||
|
for separator in matches.named(self.property_name + 'Separator'):
|
||||||
|
previous_match = matches.previous(separator, lambda m: m.name == self.property_name, 0)
|
||||||
|
next_match = matches.next(separator, lambda m: m.name == self.property_name, 0)
|
||||||
|
initiator = separator.initiator
|
||||||
|
|
||||||
|
if previous_match and next_match and separator.value in self.range_separators:
|
||||||
|
to_remove.append(next_match)
|
||||||
|
for episode_number in range(previous_match.value + 1, next_match.value):
|
||||||
|
match = copy.copy(next_match)
|
||||||
|
match.value = episode_number
|
||||||
|
initiator.children.append(match)
|
||||||
|
to_append.append(match)
|
||||||
|
to_append.append(next_match)
|
||||||
|
to_remove.append(separator)
|
||||||
|
|
||||||
|
previous_match = None
|
||||||
|
for next_match in matches.named(self.property_name):
|
||||||
|
if previous_match:
|
||||||
|
separator = matches.input_string[previous_match.initiator.end:next_match.initiator.start]
|
||||||
|
if separator not in self.range_separators:
|
||||||
|
separator = strip(separator)
|
||||||
|
if separator in self.range_separators:
|
||||||
|
initiator = previous_match.initiator
|
||||||
|
for episode_number in range(previous_match.value + 1, next_match.value):
|
||||||
|
match = copy.copy(next_match)
|
||||||
|
match.value = episode_number
|
||||||
|
initiator.children.append(match)
|
||||||
|
to_append.append(match)
|
||||||
|
to_append.append(Match(previous_match.end, next_match.start - 1,
|
||||||
|
name=self.property_name + 'Separator',
|
||||||
|
private=True,
|
||||||
|
input_string=matches.input_string))
|
||||||
|
to_remove.append(next_match) # Remove and append match to support proper ordering
|
||||||
|
to_append.append(next_match)
|
||||||
|
|
||||||
|
previous_match = next_match
|
||||||
|
|
||||||
|
if to_remove or to_append:
|
||||||
|
return to_remove, to_append
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
class RenameToAbsoluteEpisode(Rule):
|
||||||
|
"""
|
||||||
|
Rename episode to absolute_episodes.
|
||||||
|
|
||||||
|
Absolute episodes are only used if two groups of episodes are detected:
|
||||||
|
S02E04-06 25-27
|
||||||
|
25-27 S02E04-06
|
||||||
|
2x04-06 25-27
|
||||||
|
28. Anime Name S02E05
|
||||||
|
The matches in the group with higher episode values are renamed to absolute_episode.
|
||||||
|
"""
|
||||||
|
|
||||||
|
consequence = RenameMatch('absolute_episode')
|
||||||
|
|
||||||
|
def when(self, matches, context): # pylint:disable=inconsistent-return-statements
|
||||||
|
initiators = {match.initiator for match in matches.named('episode')
|
||||||
|
if len(match.initiator.children.named('episode')) > 1}
|
||||||
|
if len(initiators) != 2:
|
||||||
|
ret = []
|
||||||
|
for filepart in matches.markers.named('path'):
|
||||||
|
if matches.range(filepart.start + 1, filepart.end, predicate=lambda m: m.name == 'episode'):
|
||||||
|
ret.extend(
|
||||||
|
matches.starting(filepart.start, predicate=lambda m: m.initiator.name == 'weak_episode'))
|
||||||
|
return ret
|
||||||
|
|
||||||
|
initiators = sorted(initiators, key=lambda item: item.end)
|
||||||
|
if not matches.holes(initiators[0].end, initiators[1].start, predicate=lambda m: m.raw.strip(seps)):
|
||||||
|
first_range = matches.named('episode', predicate=lambda m: m.initiator == initiators[0])
|
||||||
|
second_range = matches.named('episode', predicate=lambda m: m.initiator == initiators[1])
|
||||||
|
if len(first_range) == len(second_range):
|
||||||
|
if second_range[0].value > first_range[0].value:
|
||||||
|
return second_range
|
||||||
|
if first_range[0].value > second_range[0].value:
|
||||||
|
return first_range
|
||||||
|
|
||||||
|
|
||||||
|
class EpisodeNumberSeparatorRange(AbstractSeparatorRange):
|
||||||
|
"""
|
||||||
|
Remove separator matches and create matches for episoderNumber range.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, range_separators):
|
||||||
|
super(EpisodeNumberSeparatorRange, self).__init__(range_separators, "episode")
|
||||||
|
|
||||||
|
|
||||||
|
class SeasonSeparatorRange(AbstractSeparatorRange):
|
||||||
|
"""
|
||||||
|
Remove separator matches and create matches for season range.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, range_separators):
|
||||||
|
super(SeasonSeparatorRange, self).__init__(range_separators, "season")
|
||||||
|
|
||||||
|
|
||||||
|
class RemoveWeakIfMovie(Rule):
|
||||||
|
"""
|
||||||
|
Remove weak-episode tagged matches if it seems to be a movie.
|
||||||
|
"""
|
||||||
|
priority = 64
|
||||||
|
consequence = RemoveMatch
|
||||||
|
|
||||||
|
def enabled(self, context):
|
||||||
|
return context.get('type') != 'episode'
|
||||||
|
|
||||||
|
def when(self, matches, context):
|
||||||
|
to_remove = []
|
||||||
|
to_ignore = set()
|
||||||
|
remove = False
|
||||||
|
for filepart in matches.markers.named('path'):
|
||||||
|
year = matches.range(filepart.start, filepart.end, predicate=lambda m: m.name == 'year', index=0)
|
||||||
|
if year:
|
||||||
|
remove = True
|
||||||
|
next_match = matches.range(year.end, filepart.end, predicate=lambda m: m.private, index=0)
|
||||||
|
if (next_match and not matches.holes(year.end, next_match.start, predicate=lambda m: m.raw.strip(seps))
|
||||||
|
and not matches.at_match(next_match, predicate=lambda m: m.name == 'year')):
|
||||||
|
to_ignore.add(next_match.initiator)
|
||||||
|
|
||||||
|
to_ignore.update(matches.range(filepart.start, filepart.end,
|
||||||
|
predicate=lambda m: len(m.children.named('episode')) > 1))
|
||||||
|
|
||||||
|
to_remove.extend(matches.conflicting(year))
|
||||||
|
if remove:
|
||||||
|
to_remove.extend(matches.tagged('weak-episode', predicate=(
|
||||||
|
lambda m: m.initiator not in to_ignore and 'anime' not in m.tags)))
|
||||||
|
|
||||||
|
return to_remove
|
||||||
|
|
||||||
|
|
||||||
|
class RemoveWeak(Rule):
|
||||||
|
"""
|
||||||
|
Remove weak-episode matches which appears after video, source, and audio matches.
|
||||||
|
"""
|
||||||
|
priority = 16
|
||||||
|
consequence = RemoveMatch, AppendMatch
|
||||||
|
|
||||||
|
def __init__(self, episode_words):
|
||||||
|
super(RemoveWeak, self).__init__()
|
||||||
|
self.episode_words = episode_words
|
||||||
|
|
||||||
|
def when(self, matches, context):
|
||||||
|
to_remove = []
|
||||||
|
to_append = []
|
||||||
|
for filepart in matches.markers.named('path'):
|
||||||
|
weaks = matches.range(filepart.start, filepart.end, predicate=lambda m: 'weak-episode' in m.tags)
|
||||||
|
if weaks:
|
||||||
|
weak = weaks[0]
|
||||||
|
previous = matches.previous(weak, predicate=lambda m: m.name in (
|
||||||
|
'audio_codec', 'screen_size', 'streaming_service', 'source', 'video_profile',
|
||||||
|
'audio_channels', 'audio_profile'), index=0)
|
||||||
|
if previous and not matches.holes(
|
||||||
|
previous.end, weak.start, predicate=lambda m: m.raw.strip(seps)):
|
||||||
|
if previous.raw.lower() in self.episode_words:
|
||||||
|
try:
|
||||||
|
episode = copy.copy(weak)
|
||||||
|
episode.name = 'episode'
|
||||||
|
episode.value = int(weak.value)
|
||||||
|
episode.start = previous.start
|
||||||
|
episode.private = False
|
||||||
|
episode.tags = []
|
||||||
|
|
||||||
|
to_append.append(episode)
|
||||||
|
except ValueError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
to_remove.extend(weaks)
|
||||||
|
if to_remove or to_append:
|
||||||
|
return to_remove, to_append
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
class RemoveWeakIfSxxExx(Rule):
|
||||||
|
"""
|
||||||
|
Remove weak-episode tagged matches if SxxExx pattern is matched.
|
||||||
|
|
||||||
|
Weak episodes at beginning of filepart are kept.
|
||||||
|
"""
|
||||||
|
priority = 64
|
||||||
|
consequence = RemoveMatch
|
||||||
|
|
||||||
|
def when(self, matches, context):
|
||||||
|
to_remove = []
|
||||||
|
for filepart in matches.markers.named('path'):
|
||||||
|
if matches.range(filepart.start, filepart.end,
|
||||||
|
predicate=lambda m: not m.private and 'SxxExx' in m.tags):
|
||||||
|
for match in matches.range(filepart.start, filepart.end, predicate=lambda m: 'weak-episode' in m.tags):
|
||||||
|
if match.start != filepart.start or match.initiator.name != 'weak_episode':
|
||||||
|
to_remove.append(match)
|
||||||
|
return to_remove
|
||||||
|
|
||||||
|
|
||||||
|
class RemoveInvalidSeason(Rule):
|
||||||
|
"""
|
||||||
|
Remove invalid season matches.
|
||||||
|
"""
|
||||||
|
priority = 64
|
||||||
|
consequence = RemoveMatch
|
||||||
|
|
||||||
|
def when(self, matches, context):
|
||||||
|
to_remove = []
|
||||||
|
for filepart in matches.markers.named('path'):
|
||||||
|
strong_season = matches.range(filepart.start, filepart.end, index=0,
|
||||||
|
predicate=lambda m: m.name == 'season'
|
||||||
|
and not m.private and 'SxxExx' in m.tags)
|
||||||
|
if strong_season:
|
||||||
|
if strong_season.initiator.children.named('episode'):
|
||||||
|
for season in matches.range(strong_season.end, filepart.end,
|
||||||
|
predicate=lambda m: m.name == 'season' and not m.private):
|
||||||
|
# remove weak season or seasons without episode matches
|
||||||
|
if 'SxxExx' not in season.tags or not season.initiator.children.named('episode'):
|
||||||
|
if season.initiator:
|
||||||
|
to_remove.append(season.initiator)
|
||||||
|
to_remove.extend(season.initiator.children)
|
||||||
|
else:
|
||||||
|
to_remove.append(season)
|
||||||
|
|
||||||
|
return to_remove
|
||||||
|
|
||||||
|
|
||||||
|
class RemoveInvalidEpisode(Rule):
|
||||||
|
"""
|
||||||
|
Remove invalid episode matches.
|
||||||
|
"""
|
||||||
|
priority = 64
|
||||||
|
consequence = RemoveMatch
|
||||||
|
|
||||||
|
def when(self, matches, context):
|
||||||
|
to_remove = []
|
||||||
|
for filepart in matches.markers.named('path'):
|
||||||
|
strong_episode = matches.range(filepart.start, filepart.end, index=0,
|
||||||
|
predicate=lambda m: m.name == 'episode'
|
||||||
|
and not m.private and 'SxxExx' in m.tags)
|
||||||
|
if strong_episode:
|
||||||
|
strong_ep_marker = RemoveInvalidEpisode.get_episode_prefix(matches, strong_episode)
|
||||||
|
for episode in matches.range(strong_episode.end, filepart.end,
|
||||||
|
predicate=lambda m: m.name == 'episode' and not m.private):
|
||||||
|
ep_marker = RemoveInvalidEpisode.get_episode_prefix(matches, episode)
|
||||||
|
if strong_ep_marker and ep_marker and strong_ep_marker.value.lower() != ep_marker.value.lower():
|
||||||
|
if episode.initiator:
|
||||||
|
to_remove.append(episode.initiator)
|
||||||
|
to_remove.extend(episode.initiator.children)
|
||||||
|
else:
|
||||||
|
to_remove.append(ep_marker)
|
||||||
|
to_remove.append(episode)
|
||||||
|
|
||||||
|
return to_remove
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def get_episode_prefix(matches, episode):
|
||||||
|
"""
|
||||||
|
Return episode prefix: episodeMarker or episodeSeparator
|
||||||
|
"""
|
||||||
|
return matches.previous(episode, index=0,
|
||||||
|
predicate=lambda m: m.name in ('episodeMarker', 'episodeSeparator'))
|
||||||
|
|
||||||
|
|
||||||
|
class RemoveWeakDuplicate(Rule):
|
||||||
|
"""
|
||||||
|
Remove weak-duplicate tagged matches if duplicate patterns, for example The 100.109
|
||||||
|
"""
|
||||||
|
priority = 64
|
||||||
|
consequence = RemoveMatch
|
||||||
|
|
||||||
|
def when(self, matches, context):
|
||||||
|
to_remove = []
|
||||||
|
for filepart in matches.markers.named('path'):
|
||||||
|
patterns = defaultdict(list)
|
||||||
|
for match in reversed(matches.range(filepart.start, filepart.end,
|
||||||
|
predicate=lambda m: 'weak-duplicate' in m.tags)):
|
||||||
|
if match.pattern in patterns[match.name]:
|
||||||
|
to_remove.append(match)
|
||||||
|
else:
|
||||||
|
patterns[match.name].append(match.pattern)
|
||||||
|
return to_remove
|
||||||
|
|
||||||
|
|
||||||
|
class EpisodeDetailValidator(Rule):
|
||||||
|
"""
|
||||||
|
Validate episode_details if they are detached or next to season or episode.
|
||||||
|
"""
|
||||||
|
priority = 64
|
||||||
|
consequence = RemoveMatch
|
||||||
|
|
||||||
|
def when(self, matches, context):
|
||||||
|
ret = []
|
||||||
|
for detail in matches.named('episode_details'):
|
||||||
|
if not seps_surround(detail) \
|
||||||
|
and not matches.previous(detail, lambda match: match.name in ['season', 'episode']) \
|
||||||
|
and not matches.next(detail, lambda match: match.name in ['season', 'episode']):
|
||||||
|
ret.append(detail)
|
||||||
|
return ret
|
||||||
|
|
||||||
|
|
||||||
|
class RemoveDetachedEpisodeNumber(Rule):
|
||||||
|
"""
|
||||||
|
If multiple episode are found, remove those that are not detached from a range and less than 10.
|
||||||
|
|
||||||
|
Fairy Tail 2 - 16-20, 2 should be removed.
|
||||||
|
"""
|
||||||
|
priority = 64
|
||||||
|
consequence = RemoveMatch
|
||||||
|
dependency = [RemoveWeakIfSxxExx, RemoveWeakDuplicate]
|
||||||
|
|
||||||
|
def when(self, matches, context):
|
||||||
|
ret = []
|
||||||
|
|
||||||
|
episode_numbers = []
|
||||||
|
episode_values = set()
|
||||||
|
for match in matches.named('episode', lambda m: not m.private and 'weak-episode' in m.tags):
|
||||||
|
if match.value not in episode_values:
|
||||||
|
episode_numbers.append(match)
|
||||||
|
episode_values.add(match.value)
|
||||||
|
|
||||||
|
episode_numbers = list(sorted(episode_numbers, key=lambda m: m.value))
|
||||||
|
if len(episode_numbers) > 1 and \
|
||||||
|
episode_numbers[0].value < 10 and \
|
||||||
|
episode_numbers[1].value - episode_numbers[0].value != 1:
|
||||||
|
parent = episode_numbers[0]
|
||||||
|
while parent: # TODO: Add a feature in rebulk to avoid this ...
|
||||||
|
ret.append(parent)
|
||||||
|
parent = parent.parent
|
||||||
|
return ret
|
||||||
|
|
||||||
|
|
||||||
|
class VersionValidator(Rule):
|
||||||
|
"""
|
||||||
|
Validate version if previous match is episode or if surrounded by separators.
|
||||||
|
"""
|
||||||
|
priority = 64
|
||||||
|
dependency = [RemoveWeakIfMovie, RemoveWeakIfSxxExx]
|
||||||
|
consequence = RemoveMatch
|
||||||
|
|
||||||
|
def when(self, matches, context):
|
||||||
|
ret = []
|
||||||
|
for version in matches.named('version'):
|
||||||
|
episode_number = matches.previous(version, lambda match: match.name == 'episode', 0)
|
||||||
|
if not episode_number and not seps_surround(version.initiator):
|
||||||
|
ret.append(version)
|
||||||
|
return ret
|
||||||
|
|
||||||
|
|
||||||
|
class EpisodeSingleDigitValidator(Rule):
|
||||||
|
"""
|
||||||
|
Remove single digit episode when inside a group that doesn't own title.
|
||||||
|
"""
|
||||||
|
dependency = [TitleFromPosition]
|
||||||
|
|
||||||
|
consequence = RemoveMatch
|
||||||
|
|
||||||
|
def when(self, matches, context):
|
||||||
|
ret = []
|
||||||
|
for episode in matches.named('episode', lambda match: len(match.initiator) == 1):
|
||||||
|
group = matches.markers.at_match(episode, lambda marker: marker.name == 'group', index=0)
|
||||||
|
if group:
|
||||||
|
if not matches.range(*group.span, predicate=lambda match: match.name == 'title'):
|
||||||
|
ret.append(episode)
|
||||||
|
return ret
|
||||||
|
|
||||||
|
|
||||||
|
class RenameToDiscMatch(Rule):
|
||||||
|
"""
|
||||||
|
Rename episodes detected with `d` episodeMarkers to `disc`.
|
||||||
|
"""
|
||||||
|
|
||||||
|
consequence = [RenameMatch('disc'), RenameMatch('discMarker'), RemoveMatch]
|
||||||
|
|
||||||
|
def when(self, matches, context):
|
||||||
|
discs = []
|
||||||
|
markers = []
|
||||||
|
to_remove = []
|
||||||
|
|
||||||
|
disc_disabled = is_disabled(context, 'disc')
|
||||||
|
|
||||||
|
for marker in matches.named('episodeMarker', predicate=lambda m: m.value.lower() == 'd'):
|
||||||
|
if disc_disabled:
|
||||||
|
to_remove.append(marker)
|
||||||
|
to_remove.extend(marker.initiator.children)
|
||||||
|
continue
|
||||||
|
|
||||||
|
markers.append(marker)
|
||||||
|
discs.extend(sorted(marker.initiator.children.named('episode'), key=lambda m: m.value))
|
||||||
|
|
||||||
|
if discs or markers or to_remove:
|
||||||
|
return discs, markers, to_remove
|
||||||
|
return False
|
||||||
48
lib/guessit/rules/properties/film.py
Normal file
48
lib/guessit/rules/properties/film.py
Normal file
@@ -0,0 +1,48 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
"""
|
||||||
|
film property
|
||||||
|
"""
|
||||||
|
from rebulk import Rebulk, AppendMatch, Rule
|
||||||
|
from rebulk.remodule import re
|
||||||
|
|
||||||
|
from ..common.formatters import cleanup
|
||||||
|
from ..common.pattern import is_disabled
|
||||||
|
from ..common.validators import seps_surround
|
||||||
|
|
||||||
|
|
||||||
|
def film(config): # pylint:disable=unused-argument
|
||||||
|
"""
|
||||||
|
Builder for rebulk object.
|
||||||
|
:return: Created Rebulk object
|
||||||
|
:rtype: Rebulk
|
||||||
|
"""
|
||||||
|
rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE, validate_all=True, validator={'__parent__': seps_surround})
|
||||||
|
|
||||||
|
rebulk.regex(r'f(\d{1,2})', name='film', private_parent=True, children=True, formatter=int,
|
||||||
|
disabled=lambda context: is_disabled(context, 'film'))
|
||||||
|
|
||||||
|
rebulk.rules(FilmTitleRule)
|
||||||
|
|
||||||
|
return rebulk
|
||||||
|
|
||||||
|
|
||||||
|
class FilmTitleRule(Rule):
|
||||||
|
"""
|
||||||
|
Rule to find out film_title (hole after film property
|
||||||
|
"""
|
||||||
|
consequence = AppendMatch
|
||||||
|
|
||||||
|
properties = {'film_title': [None]}
|
||||||
|
|
||||||
|
def enabled(self, context):
|
||||||
|
return not is_disabled(context, 'film_title')
|
||||||
|
|
||||||
|
def when(self, matches, context): # pylint:disable=inconsistent-return-statements
|
||||||
|
bonus_number = matches.named('film', lambda match: not match.private, index=0)
|
||||||
|
if bonus_number:
|
||||||
|
filepath = matches.markers.at_match(bonus_number, lambda marker: marker.name == 'path', 0)
|
||||||
|
hole = matches.holes(filepath.start, bonus_number.start + 1, formatter=cleanup, index=0)
|
||||||
|
if hole and hole.value:
|
||||||
|
hole.name = 'film_title'
|
||||||
|
return hole
|
||||||
510
lib/guessit/rules/properties/language.py
Normal file
510
lib/guessit/rules/properties/language.py
Normal file
@@ -0,0 +1,510 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
"""
|
||||||
|
language and subtitle_language properties
|
||||||
|
"""
|
||||||
|
# pylint: disable=no-member
|
||||||
|
import copy
|
||||||
|
from collections import defaultdict, namedtuple
|
||||||
|
|
||||||
|
import babelfish
|
||||||
|
from rebulk import Rebulk, Rule, RemoveMatch, RenameMatch
|
||||||
|
from rebulk.remodule import re
|
||||||
|
|
||||||
|
from ..common import seps
|
||||||
|
from ..common.pattern import is_disabled
|
||||||
|
from ..common.words import iter_words
|
||||||
|
from ..common.validators import seps_surround
|
||||||
|
|
||||||
|
|
||||||
|
def language(config, common_words):
|
||||||
|
"""
|
||||||
|
Builder for rebulk object.
|
||||||
|
|
||||||
|
:param config: rule configuration
|
||||||
|
:type config: dict
|
||||||
|
:param common_words: common words
|
||||||
|
:type common_words: set
|
||||||
|
:return: Created Rebulk object
|
||||||
|
:rtype: Rebulk
|
||||||
|
"""
|
||||||
|
subtitle_both = config['subtitle_affixes']
|
||||||
|
subtitle_prefixes = sorted(subtitle_both + config['subtitle_prefixes'], key=length_comparator)
|
||||||
|
subtitle_suffixes = sorted(subtitle_both + config['subtitle_suffixes'], key=length_comparator)
|
||||||
|
lang_both = config['language_affixes']
|
||||||
|
lang_prefixes = sorted(lang_both + config['language_prefixes'], key=length_comparator)
|
||||||
|
lang_suffixes = sorted(lang_both + config['language_suffixes'], key=length_comparator)
|
||||||
|
weak_affixes = frozenset(config['weak_affixes'])
|
||||||
|
|
||||||
|
rebulk = Rebulk(disabled=lambda context: (is_disabled(context, 'language') and
|
||||||
|
is_disabled(context, 'subtitle_language')))
|
||||||
|
|
||||||
|
rebulk.string(*subtitle_prefixes, name="subtitle_language.prefix", ignore_case=True, private=True,
|
||||||
|
validator=seps_surround, tags=['release-group-prefix'],
|
||||||
|
disabled=lambda context: is_disabled(context, 'subtitle_language'))
|
||||||
|
rebulk.string(*subtitle_suffixes, name="subtitle_language.suffix", ignore_case=True, private=True,
|
||||||
|
validator=seps_surround,
|
||||||
|
disabled=lambda context: is_disabled(context, 'subtitle_language'))
|
||||||
|
rebulk.string(*lang_suffixes, name="language.suffix", ignore_case=True, private=True,
|
||||||
|
validator=seps_surround, tags=['source-suffix'],
|
||||||
|
disabled=lambda context: is_disabled(context, 'language'))
|
||||||
|
|
||||||
|
def find_languages(string, context=None):
|
||||||
|
"""Find languages in the string
|
||||||
|
|
||||||
|
:return: list of tuple (property, Language, lang_word, word)
|
||||||
|
"""
|
||||||
|
return LanguageFinder(context, subtitle_prefixes, subtitle_suffixes,
|
||||||
|
lang_prefixes, lang_suffixes, weak_affixes).find(string)
|
||||||
|
|
||||||
|
rebulk.functional(find_languages,
|
||||||
|
properties={'language': [None]},
|
||||||
|
disabled=lambda context: not context.get('allowed_languages'))
|
||||||
|
rebulk.rules(SubtitleExtensionRule,
|
||||||
|
SubtitlePrefixLanguageRule,
|
||||||
|
SubtitleSuffixLanguageRule,
|
||||||
|
RemoveLanguage,
|
||||||
|
RemoveInvalidLanguages(common_words))
|
||||||
|
|
||||||
|
babelfish.language_converters['guessit'] = GuessitConverter(config['synonyms'])
|
||||||
|
|
||||||
|
return rebulk
|
||||||
|
|
||||||
|
|
||||||
|
UNDETERMINED = babelfish.Language('und')
|
||||||
|
MULTIPLE = babelfish.Language('mul')
|
||||||
|
NON_SPECIFIC_LANGUAGES = frozenset([UNDETERMINED, MULTIPLE])
|
||||||
|
|
||||||
|
|
||||||
|
class GuessitConverter(babelfish.LanguageReverseConverter): # pylint: disable=missing-docstring
|
||||||
|
_with_country_regexp = re.compile(r'(.*)\((.*)\)')
|
||||||
|
_with_country_regexp2 = re.compile(r'(.*)-(.*)')
|
||||||
|
|
||||||
|
def __init__(self, synonyms):
|
||||||
|
self.guessit_exceptions = {}
|
||||||
|
for code, synlist in synonyms.items():
|
||||||
|
if '_' in code:
|
||||||
|
(alpha3, country) = code.split('_')
|
||||||
|
else:
|
||||||
|
(alpha3, country) = (code, None)
|
||||||
|
for syn in synlist:
|
||||||
|
self.guessit_exceptions[syn.lower()] = (alpha3, country, None)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def codes(self): # pylint: disable=missing-docstring
|
||||||
|
return (babelfish.language_converters['alpha3b'].codes |
|
||||||
|
babelfish.language_converters['alpha2'].codes |
|
||||||
|
babelfish.language_converters['name'].codes |
|
||||||
|
babelfish.language_converters['opensubtitles'].codes |
|
||||||
|
babelfish.country_converters['name'].codes |
|
||||||
|
frozenset(self.guessit_exceptions.keys()))
|
||||||
|
|
||||||
|
def convert(self, alpha3, country=None, script=None):
|
||||||
|
return str(babelfish.Language(alpha3, country, script))
|
||||||
|
|
||||||
|
def reverse(self, name): # pylint:disable=arguments-differ
|
||||||
|
name = name.lower()
|
||||||
|
# exceptions come first, as they need to override a potential match
|
||||||
|
# with any of the other guessers
|
||||||
|
try:
|
||||||
|
return self.guessit_exceptions[name]
|
||||||
|
except KeyError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
for conv in [babelfish.Language,
|
||||||
|
babelfish.Language.fromalpha3b,
|
||||||
|
babelfish.Language.fromalpha2,
|
||||||
|
babelfish.Language.fromname,
|
||||||
|
babelfish.Language.fromopensubtitles,
|
||||||
|
babelfish.Language.fromietf]:
|
||||||
|
try:
|
||||||
|
reverse = conv(name)
|
||||||
|
return reverse.alpha3, reverse.country, reverse.script
|
||||||
|
except (ValueError, babelfish.LanguageReverseError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
raise babelfish.LanguageReverseError(name)
|
||||||
|
|
||||||
|
|
||||||
|
def length_comparator(value):
|
||||||
|
"""
|
||||||
|
Return value length.
|
||||||
|
"""
|
||||||
|
return len(value)
|
||||||
|
|
||||||
|
|
||||||
|
_LanguageMatch = namedtuple('_LanguageMatch', ['property_name', 'word', 'lang'])
|
||||||
|
|
||||||
|
|
||||||
|
class LanguageWord(object):
|
||||||
|
"""
|
||||||
|
Extension to the Word namedtuple in order to create compound words.
|
||||||
|
|
||||||
|
E.g.: pt-BR, soft subtitles, custom subs
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, start, end, value, input_string, next_word=None):
|
||||||
|
self.start = start
|
||||||
|
self.end = end
|
||||||
|
self.value = value
|
||||||
|
self.input_string = input_string
|
||||||
|
self.next_word = next_word
|
||||||
|
|
||||||
|
@property
|
||||||
|
def extended_word(self): # pylint:disable=inconsistent-return-statements
|
||||||
|
"""
|
||||||
|
Return the extended word for this instance, if any.
|
||||||
|
"""
|
||||||
|
if self.next_word:
|
||||||
|
separator = self.input_string[self.end:self.next_word.start]
|
||||||
|
next_separator = self.input_string[self.next_word.end:self.next_word.end + 1]
|
||||||
|
|
||||||
|
if (separator == '-' and separator != next_separator) or separator in (' ', '.'):
|
||||||
|
value = self.input_string[self.start:self.next_word.end].replace('.', ' ')
|
||||||
|
|
||||||
|
return LanguageWord(self.start, self.next_word.end, value, self.input_string, self.next_word.next_word)
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return '<({start},{end}): {value}'.format(start=self.start, end=self.end, value=self.value)
|
||||||
|
|
||||||
|
|
||||||
|
def to_rebulk_match(language_match):
|
||||||
|
"""
|
||||||
|
Convert language match to rebulk Match: start, end, dict
|
||||||
|
"""
|
||||||
|
word = language_match.word
|
||||||
|
start = word.start
|
||||||
|
end = word.end
|
||||||
|
name = language_match.property_name
|
||||||
|
if language_match.lang == UNDETERMINED:
|
||||||
|
return start, end, {
|
||||||
|
'name': name,
|
||||||
|
'value': word.value.lower(),
|
||||||
|
'formatter': babelfish.Language,
|
||||||
|
'tags': ['weak-language']
|
||||||
|
}
|
||||||
|
|
||||||
|
return start, end, {
|
||||||
|
'name': name,
|
||||||
|
'value': language_match.lang
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class LanguageFinder(object):
|
||||||
|
"""
|
||||||
|
Helper class to search and return language matches: 'language' and 'subtitle_language' properties
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, context,
|
||||||
|
subtitle_prefixes, subtitle_suffixes,
|
||||||
|
lang_prefixes, lang_suffixes, weak_affixes):
|
||||||
|
allowed_languages = context.get('allowed_languages') if context else None
|
||||||
|
self.allowed_languages = {l.lower() for l in allowed_languages or []}
|
||||||
|
self.weak_affixes = weak_affixes
|
||||||
|
self.prefixes_map = {}
|
||||||
|
self.suffixes_map = {}
|
||||||
|
|
||||||
|
if not is_disabled(context, 'subtitle_language'):
|
||||||
|
self.prefixes_map['subtitle_language'] = subtitle_prefixes
|
||||||
|
self.suffixes_map['subtitle_language'] = subtitle_suffixes
|
||||||
|
|
||||||
|
self.prefixes_map['language'] = lang_prefixes
|
||||||
|
self.suffixes_map['language'] = lang_suffixes
|
||||||
|
|
||||||
|
def find(self, string):
|
||||||
|
"""
|
||||||
|
Return all matches for language and subtitle_language.
|
||||||
|
|
||||||
|
Undetermined language matches are removed if a regular language is found.
|
||||||
|
Multi language matches are removed if there are only undetermined language matches
|
||||||
|
"""
|
||||||
|
regular_lang_map = defaultdict(set)
|
||||||
|
undetermined_map = defaultdict(set)
|
||||||
|
multi_map = defaultdict(set)
|
||||||
|
|
||||||
|
for match in self.iter_language_matches(string):
|
||||||
|
key = match.property_name
|
||||||
|
if match.lang == UNDETERMINED:
|
||||||
|
undetermined_map[key].add(match)
|
||||||
|
elif match.lang == 'mul':
|
||||||
|
multi_map[key].add(match)
|
||||||
|
else:
|
||||||
|
regular_lang_map[key].add(match)
|
||||||
|
|
||||||
|
for key, values in multi_map.items():
|
||||||
|
if key in regular_lang_map or key not in undetermined_map:
|
||||||
|
for value in values:
|
||||||
|
yield to_rebulk_match(value)
|
||||||
|
|
||||||
|
for key, values in undetermined_map.items():
|
||||||
|
if key not in regular_lang_map:
|
||||||
|
for value in values:
|
||||||
|
yield to_rebulk_match(value)
|
||||||
|
|
||||||
|
for values in regular_lang_map.values():
|
||||||
|
for value in values:
|
||||||
|
yield to_rebulk_match(value)
|
||||||
|
|
||||||
|
def iter_language_matches(self, string):
|
||||||
|
"""
|
||||||
|
Return language matches for the given string.
|
||||||
|
"""
|
||||||
|
candidates = []
|
||||||
|
previous = None
|
||||||
|
for word in iter_words(string):
|
||||||
|
language_word = LanguageWord(start=word.span[0], end=word.span[1], value=word.value, input_string=string)
|
||||||
|
if previous:
|
||||||
|
previous.next_word = language_word
|
||||||
|
candidates.append(previous)
|
||||||
|
previous = language_word
|
||||||
|
if previous:
|
||||||
|
candidates.append(previous)
|
||||||
|
|
||||||
|
for candidate in candidates:
|
||||||
|
for match in self.iter_matches_for_candidate(candidate):
|
||||||
|
yield match
|
||||||
|
|
||||||
|
def iter_matches_for_candidate(self, language_word):
|
||||||
|
"""
|
||||||
|
Return language matches for the given candidate word.
|
||||||
|
"""
|
||||||
|
tuples = [
|
||||||
|
(language_word, language_word.next_word,
|
||||||
|
self.prefixes_map,
|
||||||
|
lambda string, prefix: string.startswith(prefix),
|
||||||
|
lambda string, prefix: string[len(prefix):]),
|
||||||
|
(language_word.next_word, language_word,
|
||||||
|
self.suffixes_map,
|
||||||
|
lambda string, suffix: string.endswith(suffix),
|
||||||
|
lambda string, suffix: string[:len(string) - len(suffix)])
|
||||||
|
]
|
||||||
|
|
||||||
|
for word, fallback_word, affixes, is_affix, strip_affix in tuples:
|
||||||
|
if not word:
|
||||||
|
continue
|
||||||
|
|
||||||
|
match = self.find_match_for_word(word, fallback_word, affixes, is_affix, strip_affix)
|
||||||
|
if match:
|
||||||
|
yield match
|
||||||
|
|
||||||
|
match = self.find_language_match_for_word(language_word)
|
||||||
|
if match:
|
||||||
|
yield match
|
||||||
|
|
||||||
|
def find_match_for_word(self, word, fallback_word, affixes, is_affix, strip_affix): # pylint:disable=inconsistent-return-statements
|
||||||
|
"""
|
||||||
|
Return the language match for the given word and affixes.
|
||||||
|
"""
|
||||||
|
for current_word in (word.extended_word, word):
|
||||||
|
if not current_word:
|
||||||
|
continue
|
||||||
|
|
||||||
|
word_lang = current_word.value.lower()
|
||||||
|
|
||||||
|
for key, parts in affixes.items():
|
||||||
|
for part in parts:
|
||||||
|
if not is_affix(word_lang, part):
|
||||||
|
continue
|
||||||
|
|
||||||
|
match = None
|
||||||
|
value = strip_affix(word_lang, part)
|
||||||
|
if not value:
|
||||||
|
if fallback_word and (
|
||||||
|
abs(fallback_word.start - word.end) <= 1 or abs(word.start - fallback_word.end) <= 1):
|
||||||
|
match = self.find_language_match_for_word(fallback_word, key=key)
|
||||||
|
|
||||||
|
if not match and part not in self.weak_affixes:
|
||||||
|
match = self.create_language_match(key, LanguageWord(current_word.start, current_word.end,
|
||||||
|
'und', current_word.input_string))
|
||||||
|
else:
|
||||||
|
match = self.create_language_match(key, LanguageWord(current_word.start, current_word.end,
|
||||||
|
value, current_word.input_string))
|
||||||
|
|
||||||
|
if match:
|
||||||
|
return match
|
||||||
|
|
||||||
|
def find_language_match_for_word(self, word, key='language'): # pylint:disable=inconsistent-return-statements
|
||||||
|
"""
|
||||||
|
Return the language match for the given word.
|
||||||
|
"""
|
||||||
|
for current_word in (word.extended_word, word):
|
||||||
|
if current_word:
|
||||||
|
match = self.create_language_match(key, current_word)
|
||||||
|
if match:
|
||||||
|
return match
|
||||||
|
|
||||||
|
def create_language_match(self, key, word): # pylint:disable=inconsistent-return-statements
|
||||||
|
"""
|
||||||
|
Create a LanguageMatch for a given word
|
||||||
|
"""
|
||||||
|
lang = self.parse_language(word.value.lower())
|
||||||
|
|
||||||
|
if lang is not None:
|
||||||
|
return _LanguageMatch(property_name=key, word=word, lang=lang)
|
||||||
|
|
||||||
|
def parse_language(self, lang_word): # pylint:disable=inconsistent-return-statements
|
||||||
|
"""
|
||||||
|
Parse the lang_word into a valid Language.
|
||||||
|
|
||||||
|
Multi and Undetermined languages are also valid languages.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
lang = babelfish.Language.fromguessit(lang_word)
|
||||||
|
if ((hasattr(lang, 'name') and lang.name.lower() in self.allowed_languages) or
|
||||||
|
(hasattr(lang, 'alpha2') and lang.alpha2.lower() in self.allowed_languages) or
|
||||||
|
lang.alpha3.lower() in self.allowed_languages):
|
||||||
|
return lang
|
||||||
|
|
||||||
|
except babelfish.Error:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class SubtitlePrefixLanguageRule(Rule):
|
||||||
|
"""
|
||||||
|
Convert language guess as subtitle_language if previous match is a subtitle language prefix
|
||||||
|
"""
|
||||||
|
consequence = RemoveMatch
|
||||||
|
|
||||||
|
properties = {'subtitle_language': [None]}
|
||||||
|
|
||||||
|
def enabled(self, context):
|
||||||
|
return not is_disabled(context, 'subtitle_language')
|
||||||
|
|
||||||
|
def when(self, matches, context):
|
||||||
|
to_rename = []
|
||||||
|
to_remove = matches.named('subtitle_language.prefix')
|
||||||
|
for lang in matches.named('language'):
|
||||||
|
prefix = matches.previous(lang, lambda match: match.name == 'subtitle_language.prefix', 0)
|
||||||
|
if not prefix:
|
||||||
|
group_marker = matches.markers.at_match(lang, lambda marker: marker.name == 'group', 0)
|
||||||
|
if group_marker:
|
||||||
|
# Find prefix if placed just before the group
|
||||||
|
prefix = matches.previous(group_marker, lambda match: match.name == 'subtitle_language.prefix',
|
||||||
|
0)
|
||||||
|
if not prefix:
|
||||||
|
# Find prefix if placed before in the group
|
||||||
|
prefix = matches.range(group_marker.start, lang.start,
|
||||||
|
lambda match: match.name == 'subtitle_language.prefix', 0)
|
||||||
|
if prefix:
|
||||||
|
to_rename.append((prefix, lang))
|
||||||
|
to_remove.extend(matches.conflicting(lang))
|
||||||
|
if prefix in to_remove:
|
||||||
|
to_remove.remove(prefix)
|
||||||
|
if to_rename or to_remove:
|
||||||
|
return to_rename, to_remove
|
||||||
|
return False
|
||||||
|
|
||||||
|
def then(self, matches, when_response, context):
|
||||||
|
to_rename, to_remove = when_response
|
||||||
|
super(SubtitlePrefixLanguageRule, self).then(matches, to_remove, context)
|
||||||
|
for prefix, match in to_rename:
|
||||||
|
# Remove suffix equivalent of prefix.
|
||||||
|
suffix = copy.copy(prefix)
|
||||||
|
suffix.name = 'subtitle_language.suffix'
|
||||||
|
if suffix in matches:
|
||||||
|
matches.remove(suffix)
|
||||||
|
matches.remove(match)
|
||||||
|
match.name = 'subtitle_language'
|
||||||
|
matches.append(match)
|
||||||
|
|
||||||
|
|
||||||
|
class SubtitleSuffixLanguageRule(Rule):
|
||||||
|
"""
|
||||||
|
Convert language guess as subtitle_language if next match is a subtitle language suffix
|
||||||
|
"""
|
||||||
|
dependency = SubtitlePrefixLanguageRule
|
||||||
|
consequence = RemoveMatch
|
||||||
|
|
||||||
|
properties = {'subtitle_language': [None]}
|
||||||
|
|
||||||
|
def enabled(self, context):
|
||||||
|
return not is_disabled(context, 'subtitle_language')
|
||||||
|
|
||||||
|
def when(self, matches, context):
|
||||||
|
to_append = []
|
||||||
|
to_remove = matches.named('subtitle_language.suffix')
|
||||||
|
for lang in matches.named('language'):
|
||||||
|
suffix = matches.next(lang, lambda match: match.name == 'subtitle_language.suffix', 0)
|
||||||
|
if suffix:
|
||||||
|
to_append.append(lang)
|
||||||
|
if suffix in to_remove:
|
||||||
|
to_remove.remove(suffix)
|
||||||
|
if to_append or to_remove:
|
||||||
|
return to_append, to_remove
|
||||||
|
return False
|
||||||
|
|
||||||
|
def then(self, matches, when_response, context):
|
||||||
|
to_rename, to_remove = when_response
|
||||||
|
super(SubtitleSuffixLanguageRule, self).then(matches, to_remove, context)
|
||||||
|
for match in to_rename:
|
||||||
|
matches.remove(match)
|
||||||
|
match.name = 'subtitle_language'
|
||||||
|
matches.append(match)
|
||||||
|
|
||||||
|
|
||||||
|
class SubtitleExtensionRule(Rule):
|
||||||
|
"""
|
||||||
|
Convert language guess as subtitle_language if next match is a subtitle extension.
|
||||||
|
|
||||||
|
Since it's a strong match, it also removes any conflicting source with it.
|
||||||
|
"""
|
||||||
|
consequence = [RemoveMatch, RenameMatch('subtitle_language')]
|
||||||
|
|
||||||
|
properties = {'subtitle_language': [None]}
|
||||||
|
|
||||||
|
def enabled(self, context):
|
||||||
|
return not is_disabled(context, 'subtitle_language')
|
||||||
|
|
||||||
|
def when(self, matches, context): # pylint:disable=inconsistent-return-statements
|
||||||
|
subtitle_extension = matches.named('container',
|
||||||
|
lambda match: 'extension' in match.tags and 'subtitle' in match.tags,
|
||||||
|
0)
|
||||||
|
if subtitle_extension:
|
||||||
|
subtitle_lang = matches.previous(subtitle_extension, lambda match: match.name == 'language', 0)
|
||||||
|
if subtitle_lang:
|
||||||
|
for weak in matches.named('subtitle_language', predicate=lambda m: 'weak-language' in m.tags):
|
||||||
|
weak.private = True
|
||||||
|
|
||||||
|
return matches.conflicting(subtitle_lang, lambda m: m.name == 'source'), subtitle_lang
|
||||||
|
|
||||||
|
|
||||||
|
class RemoveLanguage(Rule):
|
||||||
|
"""Remove language matches that were not converted to subtitle_language when language is disabled."""
|
||||||
|
|
||||||
|
consequence = RemoveMatch
|
||||||
|
|
||||||
|
def enabled(self, context):
|
||||||
|
return is_disabled(context, 'language')
|
||||||
|
|
||||||
|
def when(self, matches, context):
|
||||||
|
return matches.named('language')
|
||||||
|
|
||||||
|
|
||||||
|
class RemoveInvalidLanguages(Rule):
|
||||||
|
"""Remove language matches that matches the blacklisted common words."""
|
||||||
|
|
||||||
|
consequence = RemoveMatch
|
||||||
|
priority = 32
|
||||||
|
|
||||||
|
def __init__(self, common_words):
|
||||||
|
"""Constructor."""
|
||||||
|
super(RemoveInvalidLanguages, self).__init__()
|
||||||
|
self.common_words = common_words
|
||||||
|
|
||||||
|
def when(self, matches, context):
|
||||||
|
to_remove = []
|
||||||
|
for match in matches.range(0, len(matches.input_string),
|
||||||
|
predicate=lambda m: m.name in ('language', 'subtitle_language')):
|
||||||
|
if match.raw.lower() not in self.common_words:
|
||||||
|
continue
|
||||||
|
|
||||||
|
group = matches.markers.at_match(match, index=0, predicate=lambda m: m.name == 'group')
|
||||||
|
if group and (
|
||||||
|
not matches.range(
|
||||||
|
group.start, group.end, predicate=lambda m: m.name not in ('language', 'subtitle_language')
|
||||||
|
) and (not matches.holes(group.start, group.end, predicate=lambda m: m.value.strip(seps)))):
|
||||||
|
continue
|
||||||
|
|
||||||
|
to_remove.append(match)
|
||||||
|
|
||||||
|
return to_remove
|
||||||
55
lib/guessit/rules/properties/mimetype.py
Normal file
55
lib/guessit/rules/properties/mimetype.py
Normal file
@@ -0,0 +1,55 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
"""
|
||||||
|
mimetype property
|
||||||
|
"""
|
||||||
|
import mimetypes
|
||||||
|
|
||||||
|
from rebulk import Rebulk, CustomRule, POST_PROCESS
|
||||||
|
from rebulk.match import Match
|
||||||
|
|
||||||
|
from ..common.pattern import is_disabled
|
||||||
|
from ...rules.processors import Processors
|
||||||
|
|
||||||
|
|
||||||
|
def mimetype(config): # pylint:disable=unused-argument
|
||||||
|
"""
|
||||||
|
Builder for rebulk object.
|
||||||
|
|
||||||
|
:param config: rule configuration
|
||||||
|
:type config: dict
|
||||||
|
:return: Created Rebulk object
|
||||||
|
:rtype: Rebulk
|
||||||
|
"""
|
||||||
|
rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'mimetype'))
|
||||||
|
rebulk.rules(Mimetype)
|
||||||
|
|
||||||
|
return rebulk
|
||||||
|
|
||||||
|
|
||||||
|
class Mimetype(CustomRule):
|
||||||
|
"""
|
||||||
|
Mimetype post processor
|
||||||
|
:param matches:
|
||||||
|
:type matches:
|
||||||
|
:return:
|
||||||
|
:rtype:
|
||||||
|
"""
|
||||||
|
priority = POST_PROCESS
|
||||||
|
|
||||||
|
dependency = Processors
|
||||||
|
|
||||||
|
def when(self, matches, context):
|
||||||
|
mime, _ = mimetypes.guess_type(matches.input_string, strict=False)
|
||||||
|
return mime
|
||||||
|
|
||||||
|
def then(self, matches, when_response, context):
|
||||||
|
mime = when_response
|
||||||
|
matches.append(Match(len(matches.input_string), len(matches.input_string), name='mimetype', value=mime))
|
||||||
|
|
||||||
|
@property
|
||||||
|
def properties(self):
|
||||||
|
"""
|
||||||
|
Properties for this rule.
|
||||||
|
"""
|
||||||
|
return {'mimetype': [None]}
|
||||||
383
lib/guessit/rules/properties/other.py
Normal file
383
lib/guessit/rules/properties/other.py
Normal file
@@ -0,0 +1,383 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
"""
|
||||||
|
other property
|
||||||
|
"""
|
||||||
|
import copy
|
||||||
|
|
||||||
|
from rebulk import Rebulk, Rule, RemoveMatch, RenameMatch, POST_PROCESS, AppendMatch
|
||||||
|
from rebulk.remodule import re
|
||||||
|
|
||||||
|
from ..common import dash
|
||||||
|
from ..common import seps
|
||||||
|
from ..common.pattern import is_disabled
|
||||||
|
from ..common.validators import seps_after, seps_before, seps_surround, and_
|
||||||
|
from ...reutils import build_or_pattern
|
||||||
|
from ...rules.common.formatters import raw_cleanup
|
||||||
|
|
||||||
|
|
||||||
|
def other(config): # pylint:disable=unused-argument,too-many-statements
|
||||||
|
"""
|
||||||
|
Builder for rebulk object.
|
||||||
|
|
||||||
|
:param config: rule configuration
|
||||||
|
:type config: dict
|
||||||
|
:return: Created Rebulk object
|
||||||
|
:rtype: Rebulk
|
||||||
|
"""
|
||||||
|
rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'other'))
|
||||||
|
rebulk = rebulk.regex_defaults(flags=re.IGNORECASE, abbreviations=[dash]).string_defaults(ignore_case=True)
|
||||||
|
rebulk.defaults(name="other", validator=seps_surround)
|
||||||
|
|
||||||
|
rebulk.regex('Audio-?Fix', 'Audio-?Fixed', value='Audio Fixed')
|
||||||
|
rebulk.regex('Sync-?Fix', 'Sync-?Fixed', value='Sync Fixed')
|
||||||
|
rebulk.regex('Dual', 'Dual-?Audio', value='Dual Audio')
|
||||||
|
rebulk.regex('ws', 'wide-?screen', value='Widescreen')
|
||||||
|
rebulk.regex('Re-?Enc(?:oded)?', value='Reencoded')
|
||||||
|
|
||||||
|
rebulk.string('Repack', 'Rerip', value='Proper',
|
||||||
|
tags=['streaming_service.prefix', 'streaming_service.suffix'])
|
||||||
|
rebulk.string('Proper', value='Proper',
|
||||||
|
tags=['has-neighbor', 'streaming_service.prefix', 'streaming_service.suffix'])
|
||||||
|
|
||||||
|
rebulk.regex('Real-Proper', 'Real-Repack', 'Real-Rerip', value='Proper',
|
||||||
|
tags=['streaming_service.prefix', 'streaming_service.suffix', 'real'])
|
||||||
|
rebulk.regex('Real', value='Proper',
|
||||||
|
tags=['has-neighbor', 'streaming_service.prefix', 'streaming_service.suffix', 'real'])
|
||||||
|
|
||||||
|
rebulk.string('Fix', 'Fixed', value='Fix', tags=['has-neighbor-before', 'has-neighbor-after',
|
||||||
|
'streaming_service.prefix', 'streaming_service.suffix'])
|
||||||
|
rebulk.string('Dirfix', 'Nfofix', 'Prooffix', value='Fix',
|
||||||
|
tags=['streaming_service.prefix', 'streaming_service.suffix'])
|
||||||
|
rebulk.regex('(?:Proof-?)?Sample-?Fix', value='Fix',
|
||||||
|
tags=['streaming_service.prefix', 'streaming_service.suffix'])
|
||||||
|
|
||||||
|
rebulk.string('Fansub', value='Fan Subtitled', tags='has-neighbor')
|
||||||
|
rebulk.string('Fastsub', value='Fast Subtitled', tags='has-neighbor')
|
||||||
|
|
||||||
|
season_words = build_or_pattern(["seasons?", "series?"])
|
||||||
|
complete_articles = build_or_pattern(["The"])
|
||||||
|
|
||||||
|
def validate_complete(match):
|
||||||
|
"""
|
||||||
|
Make sure season word is are defined.
|
||||||
|
:param match:
|
||||||
|
:type match:
|
||||||
|
:return:
|
||||||
|
:rtype:
|
||||||
|
"""
|
||||||
|
children = match.children
|
||||||
|
if not children.named('completeWordsBefore') and not children.named('completeWordsAfter'):
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
|
||||||
|
rebulk.regex('(?P<completeArticle>' + complete_articles + '-)?' +
|
||||||
|
'(?P<completeWordsBefore>' + season_words + '-)?' +
|
||||||
|
'Complete' + '(?P<completeWordsAfter>-' + season_words + ')?',
|
||||||
|
private_names=['completeArticle', 'completeWordsBefore', 'completeWordsAfter'],
|
||||||
|
value={'other': 'Complete'},
|
||||||
|
tags=['release-group-prefix'],
|
||||||
|
validator={'__parent__': and_(seps_surround, validate_complete)})
|
||||||
|
rebulk.string('R5', value='Region 5')
|
||||||
|
rebulk.string('RC', value='Region C')
|
||||||
|
rebulk.regex('Pre-?Air', value='Preair')
|
||||||
|
rebulk.regex('(?:PS-?)Vita', value='PS Vita')
|
||||||
|
rebulk.regex('Vita', value='PS Vita', tags='has-neighbor')
|
||||||
|
rebulk.regex('(HD)(?P<another>Rip)', value={'other': 'HD', 'another': 'Rip'},
|
||||||
|
private_parent=True, children=True, validator={'__parent__': seps_surround}, validate_all=True)
|
||||||
|
|
||||||
|
for value in ('Screener', 'Remux', 'PAL', 'SECAM', 'NTSC', 'XXX'):
|
||||||
|
rebulk.string(value, value=value)
|
||||||
|
rebulk.string('3D', value='3D', tags='has-neighbor')
|
||||||
|
|
||||||
|
rebulk.string('HQ', value='High Quality', tags='uhdbluray-neighbor')
|
||||||
|
rebulk.string('HR', value='High Resolution')
|
||||||
|
rebulk.string('LD', value='Line Dubbed')
|
||||||
|
rebulk.string('MD', value='Mic Dubbed')
|
||||||
|
rebulk.string('mHD', 'HDLight', value='Micro HD')
|
||||||
|
rebulk.string('LDTV', value='Low Definition')
|
||||||
|
rebulk.string('HFR', value='High Frame Rate')
|
||||||
|
rebulk.string('VFR', value='Variable Frame Rate')
|
||||||
|
rebulk.string('HD', value='HD', validator=None,
|
||||||
|
tags=['streaming_service.prefix', 'streaming_service.suffix'])
|
||||||
|
rebulk.regex('Full-?HD', 'FHD', value='Full HD', validator=None,
|
||||||
|
tags=['streaming_service.prefix', 'streaming_service.suffix'])
|
||||||
|
rebulk.regex('Ultra-?(?:HD)?', 'UHD', value='Ultra HD', validator=None,
|
||||||
|
tags=['streaming_service.prefix', 'streaming_service.suffix'])
|
||||||
|
rebulk.regex('Upscaled?', value='Upscaled')
|
||||||
|
|
||||||
|
for value in ('Complete', 'Classic', 'Bonus', 'Trailer', 'Retail',
|
||||||
|
'Colorized', 'Internal'):
|
||||||
|
rebulk.string(value, value=value, tags=['has-neighbor', 'release-group-prefix'])
|
||||||
|
rebulk.regex('LiNE', value='Line Audio', tags=['has-neighbor-before', 'has-neighbor-after', 'release-group-prefix'])
|
||||||
|
rebulk.regex('Read-?NFO', value='Read NFO')
|
||||||
|
rebulk.string('CONVERT', value='Converted', tags='has-neighbor')
|
||||||
|
rebulk.string('DOCU', 'DOKU', value='Documentary', tags='has-neighbor')
|
||||||
|
rebulk.string('OM', value='Open Matte', tags='has-neighbor')
|
||||||
|
rebulk.string('STV', value='Straight to Video', tags='has-neighbor')
|
||||||
|
rebulk.string('OAR', value='Original Aspect Ratio', tags='has-neighbor')
|
||||||
|
rebulk.string('Complet', value='Complete', tags=['has-neighbor', 'release-group-prefix'])
|
||||||
|
|
||||||
|
for coast in ('East', 'West'):
|
||||||
|
rebulk.regex(r'(?:Live-)?(?:Episode-)?' + coast + '-?(?:Coast-)?Feed', value=coast + ' Coast Feed')
|
||||||
|
|
||||||
|
rebulk.string('VO', 'OV', value='Original Video', tags='has-neighbor')
|
||||||
|
rebulk.string('Ova', 'Oav', value='Original Animated Video')
|
||||||
|
|
||||||
|
rebulk.regex('Scr(?:eener)?', value='Screener', validator=None,
|
||||||
|
tags=['other.validate.screener', 'source-prefix', 'source-suffix'])
|
||||||
|
rebulk.string('Mux', value='Mux', validator=seps_after,
|
||||||
|
tags=['other.validate.mux', 'video-codec-prefix', 'source-suffix'])
|
||||||
|
rebulk.string('HC', 'vost', value='Hardcoded Subtitles')
|
||||||
|
|
||||||
|
rebulk.string('SDR', value='Standard Dynamic Range', tags='uhdbluray-neighbor')
|
||||||
|
rebulk.regex('HDR(?:10)?', value='HDR10', tags='uhdbluray-neighbor')
|
||||||
|
rebulk.regex('Dolby-?Vision', value='Dolby Vision', tags='uhdbluray-neighbor')
|
||||||
|
rebulk.regex('BT-?2020', value='BT.2020', tags='uhdbluray-neighbor')
|
||||||
|
|
||||||
|
rebulk.string('Sample', value='Sample', tags=['at-end', 'not-a-release-group'])
|
||||||
|
rebulk.string('Extras', value='Extras', tags='has-neighbor')
|
||||||
|
rebulk.regex('Digital-?Extras?', value='Extras')
|
||||||
|
rebulk.string('Proof', value='Proof', tags=['at-end', 'not-a-release-group'])
|
||||||
|
rebulk.string('Obfuscated', 'Scrambled', value='Obfuscated', tags=['at-end', 'not-a-release-group'])
|
||||||
|
rebulk.string('xpost', 'postbot', 'asrequested', value='Repost', tags='not-a-release-group')
|
||||||
|
|
||||||
|
rebulk.rules(RenameAnotherToOther, ValidateHasNeighbor, ValidateHasNeighborAfter, ValidateHasNeighborBefore,
|
||||||
|
ValidateScreenerRule, ValidateMuxRule, ValidateHardcodedSubs, ValidateStreamingServiceNeighbor,
|
||||||
|
ValidateAtEnd, ValidateReal, ProperCountRule)
|
||||||
|
|
||||||
|
return rebulk
|
||||||
|
|
||||||
|
|
||||||
|
class ProperCountRule(Rule):
|
||||||
|
"""
|
||||||
|
Add proper_count property
|
||||||
|
"""
|
||||||
|
priority = POST_PROCESS
|
||||||
|
|
||||||
|
consequence = AppendMatch
|
||||||
|
|
||||||
|
properties = {'proper_count': [None]}
|
||||||
|
|
||||||
|
def when(self, matches, context): # pylint:disable=inconsistent-return-statements
|
||||||
|
propers = matches.named('other', lambda match: match.value == 'Proper')
|
||||||
|
if propers:
|
||||||
|
raws = {} # Count distinct raw values
|
||||||
|
for proper in propers:
|
||||||
|
raws[raw_cleanup(proper.raw)] = proper
|
||||||
|
proper_count_match = copy.copy(propers[-1])
|
||||||
|
proper_count_match.name = 'proper_count'
|
||||||
|
|
||||||
|
value = 0
|
||||||
|
for raw in raws.values():
|
||||||
|
value += 2 if 'real' in raw.tags else 1
|
||||||
|
|
||||||
|
proper_count_match.value = value
|
||||||
|
return proper_count_match
|
||||||
|
|
||||||
|
|
||||||
|
class RenameAnotherToOther(Rule):
|
||||||
|
"""
|
||||||
|
Rename `another` properties to `other`
|
||||||
|
"""
|
||||||
|
priority = 32
|
||||||
|
consequence = RenameMatch('other')
|
||||||
|
|
||||||
|
def when(self, matches, context):
|
||||||
|
return matches.named('another')
|
||||||
|
|
||||||
|
|
||||||
|
class ValidateHasNeighbor(Rule):
|
||||||
|
"""
|
||||||
|
Validate tag has-neighbor
|
||||||
|
"""
|
||||||
|
consequence = RemoveMatch
|
||||||
|
priority = 64
|
||||||
|
|
||||||
|
def when(self, matches, context):
|
||||||
|
ret = []
|
||||||
|
for to_check in matches.range(predicate=lambda match: 'has-neighbor' in match.tags):
|
||||||
|
previous_match = matches.previous(to_check, index=0)
|
||||||
|
previous_group = matches.markers.previous(to_check, lambda marker: marker.name == 'group', 0)
|
||||||
|
if previous_group and (not previous_match or previous_group.end > previous_match.end):
|
||||||
|
previous_match = previous_group
|
||||||
|
if previous_match and not matches.input_string[previous_match.end:to_check.start].strip(seps):
|
||||||
|
break
|
||||||
|
next_match = matches.next(to_check, index=0)
|
||||||
|
next_group = matches.markers.next(to_check, lambda marker: marker.name == 'group', 0)
|
||||||
|
if next_group and (not next_match or next_group.start < next_match.start):
|
||||||
|
next_match = next_group
|
||||||
|
if next_match and not matches.input_string[to_check.end:next_match.start].strip(seps):
|
||||||
|
break
|
||||||
|
ret.append(to_check)
|
||||||
|
return ret
|
||||||
|
|
||||||
|
|
||||||
|
class ValidateHasNeighborBefore(Rule):
|
||||||
|
"""
|
||||||
|
Validate tag has-neighbor-before that previous match exists.
|
||||||
|
"""
|
||||||
|
consequence = RemoveMatch
|
||||||
|
priority = 64
|
||||||
|
|
||||||
|
def when(self, matches, context):
|
||||||
|
ret = []
|
||||||
|
for to_check in matches.range(predicate=lambda match: 'has-neighbor-before' in match.tags):
|
||||||
|
next_match = matches.next(to_check, index=0)
|
||||||
|
next_group = matches.markers.next(to_check, lambda marker: marker.name == 'group', 0)
|
||||||
|
if next_group and (not next_match or next_group.start < next_match.start):
|
||||||
|
next_match = next_group
|
||||||
|
if next_match and not matches.input_string[to_check.end:next_match.start].strip(seps):
|
||||||
|
break
|
||||||
|
ret.append(to_check)
|
||||||
|
return ret
|
||||||
|
|
||||||
|
|
||||||
|
class ValidateHasNeighborAfter(Rule):
|
||||||
|
"""
|
||||||
|
Validate tag has-neighbor-after that next match exists.
|
||||||
|
"""
|
||||||
|
consequence = RemoveMatch
|
||||||
|
priority = 64
|
||||||
|
|
||||||
|
def when(self, matches, context):
|
||||||
|
ret = []
|
||||||
|
for to_check in matches.range(predicate=lambda match: 'has-neighbor-after' in match.tags):
|
||||||
|
previous_match = matches.previous(to_check, index=0)
|
||||||
|
previous_group = matches.markers.previous(to_check, lambda marker: marker.name == 'group', 0)
|
||||||
|
if previous_group and (not previous_match or previous_group.end > previous_match.end):
|
||||||
|
previous_match = previous_group
|
||||||
|
if previous_match and not matches.input_string[previous_match.end:to_check.start].strip(seps):
|
||||||
|
break
|
||||||
|
ret.append(to_check)
|
||||||
|
return ret
|
||||||
|
|
||||||
|
|
||||||
|
class ValidateScreenerRule(Rule):
|
||||||
|
"""
|
||||||
|
Validate tag other.validate.screener
|
||||||
|
"""
|
||||||
|
consequence = RemoveMatch
|
||||||
|
priority = 64
|
||||||
|
|
||||||
|
def when(self, matches, context):
|
||||||
|
ret = []
|
||||||
|
for screener in matches.named('other', lambda match: 'other.validate.screener' in match.tags):
|
||||||
|
source_match = matches.previous(screener, lambda match: match.initiator.name == 'source', 0)
|
||||||
|
if not source_match or matches.input_string[source_match.end:screener.start].strip(seps):
|
||||||
|
ret.append(screener)
|
||||||
|
return ret
|
||||||
|
|
||||||
|
|
||||||
|
class ValidateMuxRule(Rule):
|
||||||
|
"""
|
||||||
|
Validate tag other.validate.mux
|
||||||
|
"""
|
||||||
|
consequence = RemoveMatch
|
||||||
|
priority = 64
|
||||||
|
|
||||||
|
def when(self, matches, context):
|
||||||
|
ret = []
|
||||||
|
for mux in matches.named('other', lambda match: 'other.validate.mux' in match.tags):
|
||||||
|
source_match = matches.previous(mux, lambda match: match.initiator.name == 'source', 0)
|
||||||
|
if not source_match:
|
||||||
|
ret.append(mux)
|
||||||
|
return ret
|
||||||
|
|
||||||
|
|
||||||
|
class ValidateHardcodedSubs(Rule):
|
||||||
|
"""Validate HC matches."""
|
||||||
|
|
||||||
|
priority = 32
|
||||||
|
consequence = RemoveMatch
|
||||||
|
|
||||||
|
def when(self, matches, context):
|
||||||
|
to_remove = []
|
||||||
|
for hc_match in matches.named('other', predicate=lambda match: match.value == 'Hardcoded Subtitles'):
|
||||||
|
next_match = matches.next(hc_match, predicate=lambda match: match.name == 'subtitle_language', index=0)
|
||||||
|
if next_match and not matches.holes(hc_match.end, next_match.start,
|
||||||
|
predicate=lambda match: match.value.strip(seps)):
|
||||||
|
continue
|
||||||
|
|
||||||
|
previous_match = matches.previous(hc_match,
|
||||||
|
predicate=lambda match: match.name == 'subtitle_language', index=0)
|
||||||
|
if previous_match and not matches.holes(previous_match.end, hc_match.start,
|
||||||
|
predicate=lambda match: match.value.strip(seps)):
|
||||||
|
continue
|
||||||
|
|
||||||
|
to_remove.append(hc_match)
|
||||||
|
|
||||||
|
return to_remove
|
||||||
|
|
||||||
|
|
||||||
|
class ValidateStreamingServiceNeighbor(Rule):
|
||||||
|
"""Validate streaming service's neighbors."""
|
||||||
|
|
||||||
|
priority = 32
|
||||||
|
consequence = RemoveMatch
|
||||||
|
|
||||||
|
def when(self, matches, context):
|
||||||
|
to_remove = []
|
||||||
|
for match in matches.named('other',
|
||||||
|
predicate=lambda m: (m.initiator.name != 'source'
|
||||||
|
and ('streaming_service.prefix' in m.tags
|
||||||
|
or 'streaming_service.suffix' in m.tags))):
|
||||||
|
match = match.initiator
|
||||||
|
if not seps_after(match):
|
||||||
|
if 'streaming_service.prefix' in match.tags:
|
||||||
|
next_match = matches.next(match, lambda m: m.name == 'streaming_service', 0)
|
||||||
|
if next_match and not matches.holes(match.end, next_match.start,
|
||||||
|
predicate=lambda m: m.value.strip(seps)):
|
||||||
|
continue
|
||||||
|
if match.children:
|
||||||
|
to_remove.extend(match.children)
|
||||||
|
to_remove.append(match)
|
||||||
|
|
||||||
|
elif not seps_before(match):
|
||||||
|
if 'streaming_service.suffix' in match.tags:
|
||||||
|
previous_match = matches.previous(match, lambda m: m.name == 'streaming_service', 0)
|
||||||
|
if previous_match and not matches.holes(previous_match.end, match.start,
|
||||||
|
predicate=lambda m: m.value.strip(seps)):
|
||||||
|
continue
|
||||||
|
|
||||||
|
if match.children:
|
||||||
|
to_remove.extend(match.children)
|
||||||
|
to_remove.append(match)
|
||||||
|
|
||||||
|
return to_remove
|
||||||
|
|
||||||
|
|
||||||
|
class ValidateAtEnd(Rule):
|
||||||
|
"""Validate other which should occur at the end of a filepart."""
|
||||||
|
|
||||||
|
priority = 32
|
||||||
|
consequence = RemoveMatch
|
||||||
|
|
||||||
|
def when(self, matches, context):
|
||||||
|
to_remove = []
|
||||||
|
for filepart in matches.markers.named('path'):
|
||||||
|
for match in matches.range(filepart.start, filepart.end,
|
||||||
|
predicate=lambda m: m.name == 'other' and 'at-end' in m.tags):
|
||||||
|
if (matches.holes(match.end, filepart.end, predicate=lambda m: m.value.strip(seps)) or
|
||||||
|
matches.range(match.end, filepart.end, predicate=lambda m: m.name not in (
|
||||||
|
'other', 'container'))):
|
||||||
|
to_remove.append(match)
|
||||||
|
|
||||||
|
return to_remove
|
||||||
|
|
||||||
|
|
||||||
|
class ValidateReal(Rule):
|
||||||
|
"""
|
||||||
|
Validate Real
|
||||||
|
"""
|
||||||
|
consequence = RemoveMatch
|
||||||
|
priority = 64
|
||||||
|
|
||||||
|
def when(self, matches, context):
|
||||||
|
ret = []
|
||||||
|
for filepart in matches.markers.named('path'):
|
||||||
|
for match in matches.range(filepart.start, filepart.end, lambda m: m.name == 'other' and 'real' in m.tags):
|
||||||
|
if not matches.range(filepart.start, match.start):
|
||||||
|
ret.append(match)
|
||||||
|
|
||||||
|
return ret
|
||||||
46
lib/guessit/rules/properties/part.py
Normal file
46
lib/guessit/rules/properties/part.py
Normal file
@@ -0,0 +1,46 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
"""
|
||||||
|
part property
|
||||||
|
"""
|
||||||
|
from rebulk.remodule import re
|
||||||
|
|
||||||
|
from rebulk import Rebulk
|
||||||
|
from ..common import dash
|
||||||
|
from ..common.pattern import is_disabled
|
||||||
|
from ..common.validators import seps_surround, int_coercable, and_
|
||||||
|
from ..common.numeral import numeral, parse_numeral
|
||||||
|
from ...reutils import build_or_pattern
|
||||||
|
|
||||||
|
|
||||||
|
def part(config): # pylint:disable=unused-argument
|
||||||
|
"""
|
||||||
|
Builder for rebulk object.
|
||||||
|
|
||||||
|
:param config: rule configuration
|
||||||
|
:type config: dict
|
||||||
|
:return: Created Rebulk object
|
||||||
|
:rtype: Rebulk
|
||||||
|
"""
|
||||||
|
rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'part'))
|
||||||
|
rebulk.regex_defaults(flags=re.IGNORECASE, abbreviations=[dash], validator={'__parent__': seps_surround})
|
||||||
|
|
||||||
|
prefixes = config['prefixes']
|
||||||
|
|
||||||
|
def validate_roman(match):
|
||||||
|
"""
|
||||||
|
Validate a roman match if surrounded by separators
|
||||||
|
:param match:
|
||||||
|
:type match:
|
||||||
|
:return:
|
||||||
|
:rtype:
|
||||||
|
"""
|
||||||
|
if int_coercable(match.raw):
|
||||||
|
return True
|
||||||
|
return seps_surround(match)
|
||||||
|
|
||||||
|
rebulk.regex(build_or_pattern(prefixes) + r'-?(?P<part>' + numeral + r')',
|
||||||
|
prefixes=prefixes, validate_all=True, private_parent=True, children=True, formatter=parse_numeral,
|
||||||
|
validator={'part': and_(validate_roman, lambda m: 0 < m.value < 100)})
|
||||||
|
|
||||||
|
return rebulk
|
||||||
347
lib/guessit/rules/properties/release_group.py
Normal file
347
lib/guessit/rules/properties/release_group.py
Normal file
@@ -0,0 +1,347 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
"""
|
||||||
|
release_group property
|
||||||
|
"""
|
||||||
|
import copy
|
||||||
|
|
||||||
|
from rebulk import Rebulk, Rule, AppendMatch, RemoveMatch
|
||||||
|
from rebulk.match import Match
|
||||||
|
|
||||||
|
from ..common import seps
|
||||||
|
from ..common.comparators import marker_sorted
|
||||||
|
from ..common.expected import build_expected_function
|
||||||
|
from ..common.formatters import cleanup
|
||||||
|
from ..common.pattern import is_disabled
|
||||||
|
from ..common.validators import int_coercable, seps_surround
|
||||||
|
from ..properties.title import TitleFromPosition
|
||||||
|
|
||||||
|
|
||||||
|
def release_group(config):
|
||||||
|
"""
|
||||||
|
Builder for rebulk object.
|
||||||
|
|
||||||
|
:param config: rule configuration
|
||||||
|
:type config: dict
|
||||||
|
:return: Created Rebulk object
|
||||||
|
:rtype: Rebulk
|
||||||
|
"""
|
||||||
|
forbidden_groupnames = config['forbidden_names']
|
||||||
|
|
||||||
|
groupname_ignore_seps = config['ignored_seps']
|
||||||
|
groupname_seps = ''.join([c for c in seps if c not in groupname_ignore_seps])
|
||||||
|
|
||||||
|
def clean_groupname(string):
|
||||||
|
"""
|
||||||
|
Removes and strip separators from input_string
|
||||||
|
:param string:
|
||||||
|
:type string:
|
||||||
|
:return:
|
||||||
|
:rtype:
|
||||||
|
"""
|
||||||
|
string = string.strip(groupname_seps)
|
||||||
|
if not (string.endswith(tuple(groupname_ignore_seps)) and string.startswith(tuple(groupname_ignore_seps))) \
|
||||||
|
and not any(i in string.strip(groupname_ignore_seps) for i in groupname_ignore_seps):
|
||||||
|
string = string.strip(groupname_ignore_seps)
|
||||||
|
for forbidden in forbidden_groupnames:
|
||||||
|
if string.lower().startswith(forbidden) and string[len(forbidden):len(forbidden) + 1] in seps:
|
||||||
|
string = string[len(forbidden):]
|
||||||
|
string = string.strip(groupname_seps)
|
||||||
|
if string.lower().endswith(forbidden) and string[-len(forbidden) - 1:-len(forbidden)] in seps:
|
||||||
|
string = string[:len(forbidden)]
|
||||||
|
string = string.strip(groupname_seps)
|
||||||
|
return string.strip()
|
||||||
|
|
||||||
|
rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'release_group'))
|
||||||
|
|
||||||
|
expected_group = build_expected_function('expected_group')
|
||||||
|
|
||||||
|
rebulk.functional(expected_group, name='release_group', tags=['expected'],
|
||||||
|
validator=seps_surround,
|
||||||
|
conflict_solver=lambda match, other: other,
|
||||||
|
disabled=lambda context: not context.get('expected_group'))
|
||||||
|
|
||||||
|
return rebulk.rules(
|
||||||
|
DashSeparatedReleaseGroup(clean_groupname),
|
||||||
|
SceneReleaseGroup(clean_groupname),
|
||||||
|
AnimeReleaseGroup
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
_scene_previous_names = ('video_codec', 'source', 'video_api', 'audio_codec', 'audio_profile', 'video_profile',
|
||||||
|
'audio_channels', 'screen_size', 'other', 'container', 'language', 'subtitle_language',
|
||||||
|
'subtitle_language.suffix', 'subtitle_language.prefix', 'language.suffix')
|
||||||
|
|
||||||
|
_scene_previous_tags = ('release-group-prefix',)
|
||||||
|
|
||||||
|
_scene_no_previous_tags = ('no-release-group-prefix',)
|
||||||
|
|
||||||
|
|
||||||
|
class DashSeparatedReleaseGroup(Rule):
|
||||||
|
"""
|
||||||
|
Detect dash separated release groups that might appear at the end or at the beginning of a release name.
|
||||||
|
|
||||||
|
Series.S01E02.Pilot.DVDRip.x264-CS.mkv
|
||||||
|
release_group: CS
|
||||||
|
abc-the.title.name.1983.1080p.bluray.x264.mkv
|
||||||
|
release_group: abc
|
||||||
|
|
||||||
|
At the end: Release groups should be dash-separated and shouldn't contain spaces nor
|
||||||
|
appear in a group with other matches. The preceding matches should be separated by dot.
|
||||||
|
If a release group is found, the conflicting matches are removed.
|
||||||
|
|
||||||
|
At the beginning: Release groups should be dash-separated and shouldn't contain spaces nor appear in a group.
|
||||||
|
It should be followed by a hole with dot-separated words.
|
||||||
|
Detection only happens if no matches exist at the beginning.
|
||||||
|
"""
|
||||||
|
consequence = [RemoveMatch, AppendMatch]
|
||||||
|
|
||||||
|
def __init__(self, value_formatter):
|
||||||
|
"""Default constructor."""
|
||||||
|
super(DashSeparatedReleaseGroup, self).__init__()
|
||||||
|
self.value_formatter = value_formatter
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def is_valid(cls, matches, candidate, start, end, at_end): # pylint:disable=inconsistent-return-statements
|
||||||
|
"""
|
||||||
|
Whether a candidate is a valid release group.
|
||||||
|
"""
|
||||||
|
if not at_end:
|
||||||
|
if len(candidate.value) <= 1:
|
||||||
|
return False
|
||||||
|
|
||||||
|
if matches.markers.at_match(candidate, predicate=lambda m: m.name == 'group'):
|
||||||
|
return False
|
||||||
|
|
||||||
|
first_hole = matches.holes(candidate.end, end, predicate=lambda m: m.start == candidate.end, index=0)
|
||||||
|
if not first_hole:
|
||||||
|
return False
|
||||||
|
|
||||||
|
raw_value = first_hole.raw
|
||||||
|
return raw_value[0] == '-' and '-' not in raw_value[1:] and '.' in raw_value and ' ' not in raw_value
|
||||||
|
|
||||||
|
group = matches.markers.at_match(candidate, predicate=lambda m: m.name == 'group', index=0)
|
||||||
|
if group and matches.at_match(group, predicate=lambda m: not m.private and m.span != candidate.span):
|
||||||
|
return False
|
||||||
|
|
||||||
|
count = 0
|
||||||
|
match = candidate
|
||||||
|
while match:
|
||||||
|
current = matches.range(start,
|
||||||
|
match.start,
|
||||||
|
index=-1,
|
||||||
|
predicate=lambda m: not m.private and not 'expected' in m.tags)
|
||||||
|
if not current:
|
||||||
|
break
|
||||||
|
|
||||||
|
separator = match.input_string[current.end:match.start]
|
||||||
|
if not separator and match.raw[0] == '-':
|
||||||
|
separator = '-'
|
||||||
|
|
||||||
|
match = current
|
||||||
|
|
||||||
|
if count == 0:
|
||||||
|
if separator != '-':
|
||||||
|
break
|
||||||
|
|
||||||
|
count += 1
|
||||||
|
continue
|
||||||
|
|
||||||
|
if separator == '.':
|
||||||
|
return True
|
||||||
|
|
||||||
|
def detect(self, matches, start, end, at_end): # pylint:disable=inconsistent-return-statements
|
||||||
|
"""
|
||||||
|
Detect release group at the end or at the beginning of a filepart.
|
||||||
|
"""
|
||||||
|
candidate = None
|
||||||
|
if at_end:
|
||||||
|
container = matches.ending(end, lambda m: m.name == 'container', index=0)
|
||||||
|
if container:
|
||||||
|
end = container.start
|
||||||
|
|
||||||
|
candidate = matches.ending(end, index=0, predicate=(
|
||||||
|
lambda m: not m.private and not (
|
||||||
|
m.name == 'other' and 'not-a-release-group' in m.tags
|
||||||
|
) and '-' not in m.raw and m.raw.strip() == m.raw))
|
||||||
|
|
||||||
|
if not candidate:
|
||||||
|
if at_end:
|
||||||
|
candidate = matches.holes(start, end, seps=seps, index=-1,
|
||||||
|
predicate=lambda m: m.end == end and m.raw.strip(seps) and m.raw[0] == '-')
|
||||||
|
else:
|
||||||
|
candidate = matches.holes(start, end, seps=seps, index=0,
|
||||||
|
predicate=lambda m: m.start == start and m.raw.strip(seps))
|
||||||
|
|
||||||
|
if candidate and self.is_valid(matches, candidate, start, end, at_end):
|
||||||
|
return candidate
|
||||||
|
|
||||||
|
def when(self, matches, context): # pylint:disable=inconsistent-return-statements
|
||||||
|
if matches.named('release_group'):
|
||||||
|
return
|
||||||
|
|
||||||
|
to_remove = []
|
||||||
|
to_append = []
|
||||||
|
for filepart in matches.markers.named('path'):
|
||||||
|
candidate = self.detect(matches, filepart.start, filepart.end, True)
|
||||||
|
if candidate:
|
||||||
|
to_remove.extend(matches.at_match(candidate))
|
||||||
|
else:
|
||||||
|
candidate = self.detect(matches, filepart.start, filepart.end, False)
|
||||||
|
|
||||||
|
if candidate:
|
||||||
|
releasegroup = Match(candidate.start, candidate.end, name='release_group',
|
||||||
|
formatter=self.value_formatter, input_string=candidate.input_string)
|
||||||
|
|
||||||
|
if releasegroup.value:
|
||||||
|
to_append.append(releasegroup)
|
||||||
|
if to_remove or to_append:
|
||||||
|
return to_remove, to_append
|
||||||
|
|
||||||
|
|
||||||
|
class SceneReleaseGroup(Rule):
|
||||||
|
"""
|
||||||
|
Add release_group match in existing matches (scene format).
|
||||||
|
|
||||||
|
Something.XViD-ReleaseGroup.mkv
|
||||||
|
"""
|
||||||
|
dependency = [TitleFromPosition]
|
||||||
|
consequence = AppendMatch
|
||||||
|
|
||||||
|
properties = {'release_group': [None]}
|
||||||
|
|
||||||
|
def __init__(self, value_formatter):
|
||||||
|
"""Default constructor."""
|
||||||
|
super(SceneReleaseGroup, self).__init__()
|
||||||
|
self.value_formatter = value_formatter
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def is_previous_match(match):
|
||||||
|
"""
|
||||||
|
Check if match can precede release_group
|
||||||
|
|
||||||
|
:param match:
|
||||||
|
:return:
|
||||||
|
"""
|
||||||
|
return not match.tagged(*_scene_no_previous_tags) if match.name in _scene_previous_names else \
|
||||||
|
match.tagged(*_scene_previous_tags)
|
||||||
|
|
||||||
|
def when(self, matches, context): # pylint:disable=too-many-locals
|
||||||
|
# If a release_group is found before, ignore this kind of release_group rule.
|
||||||
|
|
||||||
|
ret = []
|
||||||
|
|
||||||
|
for filepart in marker_sorted(matches.markers.named('path'), matches):
|
||||||
|
# pylint:disable=cell-var-from-loop
|
||||||
|
start, end = filepart.span
|
||||||
|
if matches.named('release_group', predicate=lambda m: m.start >= start and m.end <= end):
|
||||||
|
continue
|
||||||
|
|
||||||
|
titles = matches.named('title', predicate=lambda m: m.start >= start and m.end <= end)
|
||||||
|
|
||||||
|
def keep_only_first_title(match):
|
||||||
|
"""
|
||||||
|
Keep only first title from this filepart, as other ones are most likely release group.
|
||||||
|
|
||||||
|
:param match:
|
||||||
|
:type match:
|
||||||
|
:return:
|
||||||
|
:rtype:
|
||||||
|
"""
|
||||||
|
return match in titles[1:]
|
||||||
|
|
||||||
|
last_hole = matches.holes(start, end + 1, formatter=self.value_formatter,
|
||||||
|
ignore=keep_only_first_title,
|
||||||
|
predicate=lambda hole: cleanup(hole.value), index=-1)
|
||||||
|
|
||||||
|
if last_hole:
|
||||||
|
def previous_match_filter(match):
|
||||||
|
"""
|
||||||
|
Filter to apply to find previous match
|
||||||
|
|
||||||
|
:param match:
|
||||||
|
:type match:
|
||||||
|
:return:
|
||||||
|
:rtype:
|
||||||
|
"""
|
||||||
|
|
||||||
|
if match.start < filepart.start:
|
||||||
|
return False
|
||||||
|
return not match.private or self.is_previous_match(match)
|
||||||
|
|
||||||
|
previous_match = matches.previous(last_hole,
|
||||||
|
previous_match_filter,
|
||||||
|
index=0)
|
||||||
|
if previous_match and (self.is_previous_match(previous_match)) and \
|
||||||
|
not matches.input_string[previous_match.end:last_hole.start].strip(seps) \
|
||||||
|
and not int_coercable(last_hole.value.strip(seps)):
|
||||||
|
|
||||||
|
last_hole.name = 'release_group'
|
||||||
|
last_hole.tags = ['scene']
|
||||||
|
|
||||||
|
# if hole is inside a group marker with same value, remove [](){} ...
|
||||||
|
group = matches.markers.at_match(last_hole, lambda marker: marker.name == 'group', 0)
|
||||||
|
if group:
|
||||||
|
group.formatter = self.value_formatter
|
||||||
|
if group.value == last_hole.value:
|
||||||
|
last_hole.start = group.start + 1
|
||||||
|
last_hole.end = group.end - 1
|
||||||
|
last_hole.tags = ['anime']
|
||||||
|
|
||||||
|
ignored_matches = matches.range(last_hole.start, last_hole.end, keep_only_first_title)
|
||||||
|
|
||||||
|
for ignored_match in ignored_matches:
|
||||||
|
matches.remove(ignored_match)
|
||||||
|
|
||||||
|
ret.append(last_hole)
|
||||||
|
return ret
|
||||||
|
|
||||||
|
|
||||||
|
class AnimeReleaseGroup(Rule):
|
||||||
|
"""
|
||||||
|
Add release_group match in existing matches (anime format)
|
||||||
|
...[ReleaseGroup] Something.mkv
|
||||||
|
"""
|
||||||
|
dependency = [SceneReleaseGroup, TitleFromPosition]
|
||||||
|
consequence = [RemoveMatch, AppendMatch]
|
||||||
|
|
||||||
|
properties = {'release_group': [None]}
|
||||||
|
|
||||||
|
def when(self, matches, context):
|
||||||
|
to_remove = []
|
||||||
|
to_append = []
|
||||||
|
|
||||||
|
# If a release_group is found before, ignore this kind of release_group rule.
|
||||||
|
if matches.named('release_group'):
|
||||||
|
return False
|
||||||
|
|
||||||
|
if not matches.named('episode') and not matches.named('season') and matches.named('release_group'):
|
||||||
|
# This doesn't seems to be an anime, and we already found another release_group.
|
||||||
|
return False
|
||||||
|
|
||||||
|
for filepart in marker_sorted(matches.markers.named('path'), matches):
|
||||||
|
|
||||||
|
# pylint:disable=bad-continuation
|
||||||
|
empty_group = matches.markers.range(filepart.start,
|
||||||
|
filepart.end,
|
||||||
|
lambda marker: (marker.name == 'group'
|
||||||
|
and not matches.range(marker.start, marker.end,
|
||||||
|
lambda m:
|
||||||
|
'weak-language' not in m.tags)
|
||||||
|
and marker.value.strip(seps)
|
||||||
|
and not int_coercable(marker.value.strip(seps))), 0)
|
||||||
|
|
||||||
|
if empty_group:
|
||||||
|
group = copy.copy(empty_group)
|
||||||
|
group.marker = False
|
||||||
|
group.raw_start += 1
|
||||||
|
group.raw_end -= 1
|
||||||
|
group.tags = ['anime']
|
||||||
|
group.name = 'release_group'
|
||||||
|
to_append.append(group)
|
||||||
|
to_remove.extend(matches.range(empty_group.start, empty_group.end,
|
||||||
|
lambda m: 'weak-language' in m.tags))
|
||||||
|
|
||||||
|
if to_remove or to_append:
|
||||||
|
return to_remove, to_append
|
||||||
|
return False
|
||||||
163
lib/guessit/rules/properties/screen_size.py
Normal file
163
lib/guessit/rules/properties/screen_size.py
Normal file
@@ -0,0 +1,163 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
"""
|
||||||
|
screen_size property
|
||||||
|
"""
|
||||||
|
from rebulk.match import Match
|
||||||
|
from rebulk.remodule import re
|
||||||
|
|
||||||
|
from rebulk import Rebulk, Rule, RemoveMatch, AppendMatch
|
||||||
|
|
||||||
|
from ..common.pattern import is_disabled
|
||||||
|
from ..common.quantity import FrameRate
|
||||||
|
from ..common.validators import seps_surround
|
||||||
|
from ..common import dash, seps
|
||||||
|
from ...reutils import build_or_pattern
|
||||||
|
|
||||||
|
|
||||||
|
def screen_size(config):
|
||||||
|
"""
|
||||||
|
Builder for rebulk object.
|
||||||
|
|
||||||
|
:param config: rule configuration
|
||||||
|
:type config: dict
|
||||||
|
:return: Created Rebulk object
|
||||||
|
:rtype: Rebulk
|
||||||
|
"""
|
||||||
|
interlaced = frozenset(config['interlaced'])
|
||||||
|
progressive = frozenset(config['progressive'])
|
||||||
|
frame_rates = [re.escape(rate) for rate in config['frame_rates']]
|
||||||
|
min_ar = config['min_ar']
|
||||||
|
max_ar = config['max_ar']
|
||||||
|
|
||||||
|
rebulk = Rebulk()
|
||||||
|
rebulk = rebulk.string_defaults(ignore_case=True).regex_defaults(flags=re.IGNORECASE)
|
||||||
|
|
||||||
|
rebulk.defaults(name='screen_size', validator=seps_surround, abbreviations=[dash],
|
||||||
|
disabled=lambda context: is_disabled(context, 'screen_size'))
|
||||||
|
|
||||||
|
frame_rate_pattern = build_or_pattern(frame_rates, name='frame_rate')
|
||||||
|
interlaced_pattern = build_or_pattern(interlaced, name='height')
|
||||||
|
progressive_pattern = build_or_pattern(progressive, name='height')
|
||||||
|
|
||||||
|
res_pattern = r'(?:(?P<width>\d{3,4})(?:x|\*))?'
|
||||||
|
rebulk.regex(res_pattern + interlaced_pattern + r'(?P<scan_type>i)' + frame_rate_pattern + '?')
|
||||||
|
rebulk.regex(res_pattern + progressive_pattern + r'(?P<scan_type>p)' + frame_rate_pattern + '?')
|
||||||
|
rebulk.regex(res_pattern + progressive_pattern + r'(?P<scan_type>p)?(?:hd)')
|
||||||
|
rebulk.regex(res_pattern + progressive_pattern + r'(?P<scan_type>p)?x?')
|
||||||
|
rebulk.string('4k', value='2160p')
|
||||||
|
rebulk.regex(r'(?P<width>\d{3,4})-?(?:x|\*)-?(?P<height>\d{3,4})',
|
||||||
|
conflict_solver=lambda match, other: '__default__' if other.name == 'screen_size' else other)
|
||||||
|
|
||||||
|
rebulk.regex(frame_rate_pattern + '(p|fps)', name='frame_rate',
|
||||||
|
formatter=FrameRate.fromstring, disabled=lambda context: is_disabled(context, 'frame_rate'))
|
||||||
|
|
||||||
|
rebulk.rules(PostProcessScreenSize(progressive, min_ar, max_ar), ScreenSizeOnlyOne, ResolveScreenSizeConflicts)
|
||||||
|
|
||||||
|
return rebulk
|
||||||
|
|
||||||
|
|
||||||
|
class PostProcessScreenSize(Rule):
|
||||||
|
"""
|
||||||
|
Process the screen size calculating the aspect ratio if available.
|
||||||
|
|
||||||
|
Convert to a standard notation (720p, 1080p, etc) when it's a standard resolution and
|
||||||
|
aspect ratio is valid or not available.
|
||||||
|
|
||||||
|
It also creates an aspect_ratio match when available.
|
||||||
|
"""
|
||||||
|
consequence = AppendMatch
|
||||||
|
|
||||||
|
def __init__(self, standard_heights, min_ar, max_ar):
|
||||||
|
super(PostProcessScreenSize, self).__init__()
|
||||||
|
self.standard_heights = standard_heights
|
||||||
|
self.min_ar = min_ar
|
||||||
|
self.max_ar = max_ar
|
||||||
|
|
||||||
|
def when(self, matches, context):
|
||||||
|
to_append = []
|
||||||
|
for match in matches.named('screen_size'):
|
||||||
|
if not is_disabled(context, 'frame_rate'):
|
||||||
|
for frame_rate in match.children.named('frame_rate'):
|
||||||
|
frame_rate.formatter = FrameRate.fromstring
|
||||||
|
to_append.append(frame_rate)
|
||||||
|
|
||||||
|
values = match.children.to_dict()
|
||||||
|
if 'height' not in values:
|
||||||
|
continue
|
||||||
|
|
||||||
|
scan_type = (values.get('scan_type') or 'p').lower()
|
||||||
|
height = values['height']
|
||||||
|
if 'width' not in values:
|
||||||
|
match.value = '{0}{1}'.format(height, scan_type)
|
||||||
|
continue
|
||||||
|
|
||||||
|
width = values['width']
|
||||||
|
calculated_ar = float(width) / float(height)
|
||||||
|
|
||||||
|
aspect_ratio = Match(match.start, match.end, input_string=match.input_string,
|
||||||
|
name='aspect_ratio', value=round(calculated_ar, 3))
|
||||||
|
|
||||||
|
if not is_disabled(context, 'aspect_ratio'):
|
||||||
|
to_append.append(aspect_ratio)
|
||||||
|
|
||||||
|
if height in self.standard_heights and self.min_ar < calculated_ar < self.max_ar:
|
||||||
|
match.value = '{0}{1}'.format(height, scan_type)
|
||||||
|
else:
|
||||||
|
match.value = '{0}x{1}'.format(width, height)
|
||||||
|
|
||||||
|
return to_append
|
||||||
|
|
||||||
|
|
||||||
|
class ScreenSizeOnlyOne(Rule):
|
||||||
|
"""
|
||||||
|
Keep a single screen_size per filepath part.
|
||||||
|
"""
|
||||||
|
consequence = RemoveMatch
|
||||||
|
|
||||||
|
def when(self, matches, context):
|
||||||
|
to_remove = []
|
||||||
|
for filepart in matches.markers.named('path'):
|
||||||
|
screensize = list(reversed(matches.range(filepart.start, filepart.end,
|
||||||
|
lambda match: match.name == 'screen_size')))
|
||||||
|
if len(screensize) > 1 and len(set((match.value for match in screensize))) > 1:
|
||||||
|
to_remove.extend(screensize[1:])
|
||||||
|
|
||||||
|
return to_remove
|
||||||
|
|
||||||
|
|
||||||
|
class ResolveScreenSizeConflicts(Rule):
|
||||||
|
"""
|
||||||
|
Resolve screen_size conflicts with season and episode matches.
|
||||||
|
"""
|
||||||
|
consequence = RemoveMatch
|
||||||
|
|
||||||
|
def when(self, matches, context):
|
||||||
|
to_remove = []
|
||||||
|
for filepart in matches.markers.named('path'):
|
||||||
|
screensize = matches.range(filepart.start, filepart.end, lambda match: match.name == 'screen_size', 0)
|
||||||
|
if not screensize:
|
||||||
|
continue
|
||||||
|
|
||||||
|
conflicts = matches.conflicting(screensize, lambda match: match.name in ('season', 'episode'))
|
||||||
|
if not conflicts:
|
||||||
|
continue
|
||||||
|
|
||||||
|
has_neighbor = False
|
||||||
|
video_profile = matches.range(screensize.end, filepart.end, lambda match: match.name == 'video_profile', 0)
|
||||||
|
if video_profile and not matches.holes(screensize.end, video_profile.start,
|
||||||
|
predicate=lambda h: h.value and h.value.strip(seps)):
|
||||||
|
to_remove.extend(conflicts)
|
||||||
|
has_neighbor = True
|
||||||
|
|
||||||
|
previous = matches.previous(screensize, index=0, predicate=(
|
||||||
|
lambda m: m.name in ('date', 'source', 'other', 'streaming_service')))
|
||||||
|
if previous and not matches.holes(previous.end, screensize.start,
|
||||||
|
predicate=lambda h: h.value and h.value.strip(seps)):
|
||||||
|
to_remove.extend(conflicts)
|
||||||
|
has_neighbor = True
|
||||||
|
|
||||||
|
if not has_neighbor:
|
||||||
|
to_remove.append(screensize)
|
||||||
|
|
||||||
|
return to_remove
|
||||||
30
lib/guessit/rules/properties/size.py
Normal file
30
lib/guessit/rules/properties/size.py
Normal file
@@ -0,0 +1,30 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
"""
|
||||||
|
size property
|
||||||
|
"""
|
||||||
|
import re
|
||||||
|
|
||||||
|
from rebulk import Rebulk
|
||||||
|
|
||||||
|
from ..common import dash
|
||||||
|
from ..common.quantity import Size
|
||||||
|
from ..common.pattern import is_disabled
|
||||||
|
from ..common.validators import seps_surround
|
||||||
|
|
||||||
|
|
||||||
|
def size(config): # pylint:disable=unused-argument
|
||||||
|
"""
|
||||||
|
Builder for rebulk object.
|
||||||
|
|
||||||
|
:param config: rule configuration
|
||||||
|
:type config: dict
|
||||||
|
:return: Created Rebulk object
|
||||||
|
:rtype: Rebulk
|
||||||
|
"""
|
||||||
|
rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'size'))
|
||||||
|
rebulk.regex_defaults(flags=re.IGNORECASE, abbreviations=[dash])
|
||||||
|
rebulk.defaults(name='size', validator=seps_surround)
|
||||||
|
rebulk.regex(r'\d+-?[mgt]b', r'\d+\.\d+-?[mgt]b', formatter=Size.fromstring, tags=['release-group-prefix'])
|
||||||
|
|
||||||
|
return rebulk
|
||||||
235
lib/guessit/rules/properties/source.py
Normal file
235
lib/guessit/rules/properties/source.py
Normal file
@@ -0,0 +1,235 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
"""
|
||||||
|
source property
|
||||||
|
"""
|
||||||
|
import copy
|
||||||
|
|
||||||
|
from rebulk.remodule import re
|
||||||
|
|
||||||
|
from rebulk import AppendMatch, Rebulk, RemoveMatch, Rule
|
||||||
|
|
||||||
|
from .audio_codec import HqConflictRule
|
||||||
|
from ..common import dash, seps
|
||||||
|
from ..common.pattern import is_disabled
|
||||||
|
from ..common.validators import seps_before, seps_after, or_
|
||||||
|
|
||||||
|
|
||||||
|
def source(config): # pylint:disable=unused-argument
|
||||||
|
"""
|
||||||
|
Builder for rebulk object.
|
||||||
|
|
||||||
|
:param config: rule configuration
|
||||||
|
:type config: dict
|
||||||
|
:return: Created Rebulk object
|
||||||
|
:rtype: Rebulk
|
||||||
|
"""
|
||||||
|
rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'source'))
|
||||||
|
rebulk = rebulk.regex_defaults(flags=re.IGNORECASE, abbreviations=[dash], private_parent=True, children=True)
|
||||||
|
rebulk = rebulk.defaults(name='source',
|
||||||
|
tags=['video-codec-prefix', 'streaming_service.suffix'],
|
||||||
|
validate_all=True,
|
||||||
|
validator={'__parent__': or_(seps_before, seps_after)})
|
||||||
|
|
||||||
|
rip_prefix = '(?P<other>Rip)-?'
|
||||||
|
rip_suffix = '-?(?P<other>Rip)'
|
||||||
|
rip_optional_suffix = '(?:' + rip_suffix + ')?'
|
||||||
|
|
||||||
|
def build_source_pattern(*patterns, **kwargs):
|
||||||
|
"""Helper pattern to build source pattern."""
|
||||||
|
prefix_format = kwargs.get('prefix') or ''
|
||||||
|
suffix_format = kwargs.get('suffix') or ''
|
||||||
|
|
||||||
|
string_format = prefix_format + '({0})' + suffix_format
|
||||||
|
return [string_format.format(pattern) for pattern in patterns]
|
||||||
|
|
||||||
|
def demote_other(match, other): # pylint: disable=unused-argument
|
||||||
|
"""Default conflict solver with 'other' property."""
|
||||||
|
return other if other.name == 'other' or other.name == 'release_group' else '__default__'
|
||||||
|
|
||||||
|
rebulk.regex(*build_source_pattern('VHS', suffix=rip_optional_suffix),
|
||||||
|
value={'source': 'VHS', 'other': 'Rip'})
|
||||||
|
rebulk.regex(*build_source_pattern('CAM', suffix=rip_optional_suffix),
|
||||||
|
value={'source': 'Camera', 'other': 'Rip'})
|
||||||
|
rebulk.regex(*build_source_pattern('HD-?CAM', suffix=rip_optional_suffix),
|
||||||
|
value={'source': 'HD Camera', 'other': 'Rip'})
|
||||||
|
rebulk.regex(*build_source_pattern('TELESYNC', 'TS', suffix=rip_optional_suffix),
|
||||||
|
value={'source': 'Telesync', 'other': 'Rip'})
|
||||||
|
rebulk.regex(*build_source_pattern('HD-?TELESYNC', 'HD-?TS', suffix=rip_optional_suffix),
|
||||||
|
value={'source': 'HD Telesync', 'other': 'Rip'})
|
||||||
|
rebulk.regex(*build_source_pattern('WORKPRINT', 'WP'), value='Workprint')
|
||||||
|
rebulk.regex(*build_source_pattern('TELECINE', 'TC', suffix=rip_optional_suffix),
|
||||||
|
value={'source': 'Telecine', 'other': 'Rip'})
|
||||||
|
rebulk.regex(*build_source_pattern('HD-?TELECINE', 'HD-?TC', suffix=rip_optional_suffix),
|
||||||
|
value={'source': 'HD Telecine', 'other': 'Rip'})
|
||||||
|
rebulk.regex(*build_source_pattern('PPV', suffix=rip_optional_suffix),
|
||||||
|
value={'source': 'Pay-per-view', 'other': 'Rip'})
|
||||||
|
rebulk.regex(*build_source_pattern('SD-?TV', suffix=rip_optional_suffix),
|
||||||
|
value={'source': 'TV', 'other': 'Rip'})
|
||||||
|
rebulk.regex(*build_source_pattern('TV', suffix=rip_suffix), # TV is too common to allow matching
|
||||||
|
value={'source': 'TV', 'other': 'Rip'})
|
||||||
|
rebulk.regex(*build_source_pattern('TV', 'SD-?TV', prefix=rip_prefix),
|
||||||
|
value={'source': 'TV', 'other': 'Rip'})
|
||||||
|
rebulk.regex(*build_source_pattern('TV-?(?=Dub)'), value='TV')
|
||||||
|
rebulk.regex(*build_source_pattern('DVB', 'PD-?TV', suffix=rip_optional_suffix),
|
||||||
|
value={'source': 'Digital TV', 'other': 'Rip'})
|
||||||
|
rebulk.regex(*build_source_pattern('DVD', suffix=rip_optional_suffix),
|
||||||
|
value={'source': 'DVD', 'other': 'Rip'})
|
||||||
|
rebulk.regex(*build_source_pattern('DM', suffix=rip_optional_suffix),
|
||||||
|
value={'source': 'Digital Master', 'other': 'Rip'})
|
||||||
|
rebulk.regex(*build_source_pattern('VIDEO-?TS', 'DVD-?R(?:$|(?!E))', # 'DVD-?R(?:$|^E)' => DVD-Real ...
|
||||||
|
'DVD-?9', 'DVD-?5'), value='DVD')
|
||||||
|
|
||||||
|
rebulk.regex(*build_source_pattern('HD-?TV', suffix=rip_optional_suffix), conflict_solver=demote_other,
|
||||||
|
value={'source': 'HDTV', 'other': 'Rip'})
|
||||||
|
rebulk.regex(*build_source_pattern('TV-?HD', suffix=rip_suffix), conflict_solver=demote_other,
|
||||||
|
value={'source': 'HDTV', 'other': 'Rip'})
|
||||||
|
rebulk.regex(*build_source_pattern('TV', suffix='-?(?P<other>Rip-?HD)'), conflict_solver=demote_other,
|
||||||
|
value={'source': 'HDTV', 'other': 'Rip'})
|
||||||
|
|
||||||
|
rebulk.regex(*build_source_pattern('VOD', suffix=rip_optional_suffix),
|
||||||
|
value={'source': 'Video on Demand', 'other': 'Rip'})
|
||||||
|
|
||||||
|
rebulk.regex(*build_source_pattern('WEB', 'WEB-?DL', suffix=rip_suffix),
|
||||||
|
value={'source': 'Web', 'other': 'Rip'})
|
||||||
|
# WEBCap is a synonym to WEBRip, mostly used by non english
|
||||||
|
rebulk.regex(*build_source_pattern('WEB-?(?P<another>Cap)', suffix=rip_optional_suffix),
|
||||||
|
value={'source': 'Web', 'other': 'Rip', 'another': 'Rip'})
|
||||||
|
rebulk.regex(*build_source_pattern('WEB-?DL', 'WEB-?U?HD', 'DL-?WEB', 'DL(?=-?Mux)'),
|
||||||
|
value={'source': 'Web'})
|
||||||
|
rebulk.regex('(WEB)', value='Web', tags='weak.source')
|
||||||
|
|
||||||
|
rebulk.regex(*build_source_pattern('HD-?DVD', suffix=rip_optional_suffix),
|
||||||
|
value={'source': 'HD-DVD', 'other': 'Rip'})
|
||||||
|
|
||||||
|
rebulk.regex(*build_source_pattern('Blu-?ray', 'BD', 'BD[59]', 'BD25', 'BD50', suffix=rip_optional_suffix),
|
||||||
|
value={'source': 'Blu-ray', 'other': 'Rip'})
|
||||||
|
rebulk.regex(*build_source_pattern('(?P<another>BR)-?(?=Scr(?:eener)?)', '(?P<another>BR)-?(?=Mux)'), # BRRip
|
||||||
|
value={'source': 'Blu-ray', 'another': 'Reencoded'})
|
||||||
|
rebulk.regex(*build_source_pattern('(?P<another>BR)', suffix=rip_suffix), # BRRip
|
||||||
|
value={'source': 'Blu-ray', 'other': 'Rip', 'another': 'Reencoded'})
|
||||||
|
|
||||||
|
rebulk.regex(*build_source_pattern('Ultra-?Blu-?ray', 'Blu-?ray-?Ultra'), value='Ultra HD Blu-ray')
|
||||||
|
|
||||||
|
rebulk.regex(*build_source_pattern('AHDTV'), value='Analog HDTV')
|
||||||
|
rebulk.regex(*build_source_pattern('UHD-?TV', suffix=rip_optional_suffix), conflict_solver=demote_other,
|
||||||
|
value={'source': 'Ultra HDTV', 'other': 'Rip'})
|
||||||
|
rebulk.regex(*build_source_pattern('UHD', suffix=rip_suffix), conflict_solver=demote_other,
|
||||||
|
value={'source': 'Ultra HDTV', 'other': 'Rip'})
|
||||||
|
|
||||||
|
rebulk.regex(*build_source_pattern('DSR', 'DTH', suffix=rip_optional_suffix),
|
||||||
|
value={'source': 'Satellite', 'other': 'Rip'})
|
||||||
|
rebulk.regex(*build_source_pattern('DSR?', 'SAT', suffix=rip_suffix),
|
||||||
|
value={'source': 'Satellite', 'other': 'Rip'})
|
||||||
|
|
||||||
|
rebulk.rules(ValidateSourcePrefixSuffix, ValidateWeakSource, UltraHdBlurayRule)
|
||||||
|
|
||||||
|
return rebulk
|
||||||
|
|
||||||
|
|
||||||
|
class UltraHdBlurayRule(Rule):
|
||||||
|
"""
|
||||||
|
Replace other:Ultra HD and source:Blu-ray with source:Ultra HD Blu-ray
|
||||||
|
"""
|
||||||
|
dependency = HqConflictRule
|
||||||
|
consequence = [RemoveMatch, AppendMatch]
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def find_ultrahd(cls, matches, start, end, index):
|
||||||
|
"""Find Ultra HD match."""
|
||||||
|
return matches.range(start, end, index=index, predicate=(
|
||||||
|
lambda m: not m.private and m.name == 'other' and m.value == 'Ultra HD'
|
||||||
|
))
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def validate_range(cls, matches, start, end):
|
||||||
|
"""Validate no holes or invalid matches exist in the specified range."""
|
||||||
|
return (
|
||||||
|
not matches.holes(start, end, predicate=lambda m: m.value.strip(seps)) and
|
||||||
|
not matches.range(start, end, predicate=(
|
||||||
|
lambda m: not m.private and (
|
||||||
|
m.name not in ('screen_size', 'color_depth') and (
|
||||||
|
m.name != 'other' or 'uhdbluray-neighbor' not in m.tags))))
|
||||||
|
)
|
||||||
|
|
||||||
|
def when(self, matches, context):
|
||||||
|
to_remove = []
|
||||||
|
to_append = []
|
||||||
|
for filepart in matches.markers.named('path'):
|
||||||
|
for match in matches.range(filepart.start, filepart.end, predicate=(
|
||||||
|
lambda m: not m.private and m.name == 'source' and m.value == 'Blu-ray')):
|
||||||
|
other = self.find_ultrahd(matches, filepart.start, match.start, -1)
|
||||||
|
if not other or not self.validate_range(matches, other.end, match.start):
|
||||||
|
other = self.find_ultrahd(matches, match.end, filepart.end, 0)
|
||||||
|
if not other or not self.validate_range(matches, match.end, other.start):
|
||||||
|
if not matches.range(filepart.start, filepart.end, predicate=(
|
||||||
|
lambda m: m.name == 'screen_size' and m.value == '2160p')):
|
||||||
|
continue
|
||||||
|
|
||||||
|
if other:
|
||||||
|
other.private = True
|
||||||
|
|
||||||
|
new_source = copy.copy(match)
|
||||||
|
new_source.value = 'Ultra HD Blu-ray'
|
||||||
|
to_remove.append(match)
|
||||||
|
to_append.append(new_source)
|
||||||
|
|
||||||
|
if to_remove or to_append:
|
||||||
|
return to_remove, to_append
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
class ValidateSourcePrefixSuffix(Rule):
|
||||||
|
"""
|
||||||
|
Validate source with source prefix, source suffix.
|
||||||
|
"""
|
||||||
|
priority = 64
|
||||||
|
consequence = RemoveMatch
|
||||||
|
|
||||||
|
def when(self, matches, context):
|
||||||
|
ret = []
|
||||||
|
for filepart in matches.markers.named('path'):
|
||||||
|
for match in matches.range(filepart.start, filepart.end, predicate=lambda m: m.name == 'source'):
|
||||||
|
match = match.initiator
|
||||||
|
if not seps_before(match) and \
|
||||||
|
not matches.range(match.start - 1, match.start - 2,
|
||||||
|
lambda m: 'source-prefix' in m.tags):
|
||||||
|
if match.children:
|
||||||
|
ret.extend(match.children)
|
||||||
|
ret.append(match)
|
||||||
|
continue
|
||||||
|
if not seps_after(match) and \
|
||||||
|
not matches.range(match.end, match.end + 1,
|
||||||
|
lambda m: 'source-suffix' in m.tags):
|
||||||
|
if match.children:
|
||||||
|
ret.extend(match.children)
|
||||||
|
ret.append(match)
|
||||||
|
continue
|
||||||
|
|
||||||
|
return ret
|
||||||
|
|
||||||
|
|
||||||
|
class ValidateWeakSource(Rule):
|
||||||
|
"""
|
||||||
|
Validate weak source
|
||||||
|
"""
|
||||||
|
dependency = [ValidateSourcePrefixSuffix]
|
||||||
|
priority = 64
|
||||||
|
consequence = RemoveMatch
|
||||||
|
|
||||||
|
def when(self, matches, context):
|
||||||
|
ret = []
|
||||||
|
for filepart in matches.markers.named('path'):
|
||||||
|
for match in matches.range(filepart.start, filepart.end, predicate=lambda m: m.name == 'source'):
|
||||||
|
# if there are more than 1 source in this filepart, just before the year and with holes for the title
|
||||||
|
# most likely the source is part of the title
|
||||||
|
if 'weak.source' in match.tags \
|
||||||
|
and matches.range(match.end, filepart.end, predicate=lambda m: m.name == 'source') \
|
||||||
|
and matches.holes(filepart.start, match.start,
|
||||||
|
predicate=lambda m: m.value.strip(seps), index=-1):
|
||||||
|
if match.children:
|
||||||
|
ret.extend(match.children)
|
||||||
|
ret.append(match)
|
||||||
|
continue
|
||||||
|
|
||||||
|
return ret
|
||||||
78
lib/guessit/rules/properties/streaming_service.py
Normal file
78
lib/guessit/rules/properties/streaming_service.py
Normal file
@@ -0,0 +1,78 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
"""
|
||||||
|
streaming_service property
|
||||||
|
"""
|
||||||
|
import re
|
||||||
|
|
||||||
|
from rebulk import Rebulk
|
||||||
|
from rebulk.rules import Rule, RemoveMatch
|
||||||
|
|
||||||
|
from ..common.pattern import is_disabled
|
||||||
|
from ...rules.common import seps, dash
|
||||||
|
from ...rules.common.validators import seps_before, seps_after
|
||||||
|
|
||||||
|
|
||||||
|
def streaming_service(config): # pylint: disable=too-many-statements,unused-argument
|
||||||
|
"""Streaming service property.
|
||||||
|
|
||||||
|
:param config: rule configuration
|
||||||
|
:type config: dict
|
||||||
|
:return:
|
||||||
|
:rtype: Rebulk
|
||||||
|
"""
|
||||||
|
rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'streaming_service'))
|
||||||
|
rebulk = rebulk.string_defaults(ignore_case=True).regex_defaults(flags=re.IGNORECASE, abbreviations=[dash])
|
||||||
|
rebulk.defaults(name='streaming_service', tags=['source-prefix'])
|
||||||
|
|
||||||
|
for value, items in config.items():
|
||||||
|
patterns = items if isinstance(items, list) else [items]
|
||||||
|
for pattern in patterns:
|
||||||
|
if pattern.startswith('re:'):
|
||||||
|
rebulk.regex(pattern, value=value)
|
||||||
|
else:
|
||||||
|
rebulk.string(pattern, value=value)
|
||||||
|
|
||||||
|
rebulk.rules(ValidateStreamingService)
|
||||||
|
|
||||||
|
return rebulk
|
||||||
|
|
||||||
|
|
||||||
|
class ValidateStreamingService(Rule):
|
||||||
|
"""Validate streaming service matches."""
|
||||||
|
|
||||||
|
priority = 128
|
||||||
|
consequence = RemoveMatch
|
||||||
|
|
||||||
|
def when(self, matches, context):
|
||||||
|
"""Streaming service is always before source.
|
||||||
|
|
||||||
|
:param matches:
|
||||||
|
:type matches: rebulk.match.Matches
|
||||||
|
:param context:
|
||||||
|
:type context: dict
|
||||||
|
:return:
|
||||||
|
"""
|
||||||
|
to_remove = []
|
||||||
|
for service in matches.named('streaming_service'):
|
||||||
|
next_match = matches.next(service, lambda match: 'streaming_service.suffix' in match.tags, 0)
|
||||||
|
previous_match = matches.previous(service, lambda match: 'streaming_service.prefix' in match.tags, 0)
|
||||||
|
has_other = service.initiator and service.initiator.children.named('other')
|
||||||
|
|
||||||
|
if not has_other:
|
||||||
|
if (not next_match or
|
||||||
|
matches.holes(service.end, next_match.start,
|
||||||
|
predicate=lambda match: match.value.strip(seps)) or
|
||||||
|
not seps_before(service)):
|
||||||
|
if (not previous_match or
|
||||||
|
matches.holes(previous_match.end, service.start,
|
||||||
|
predicate=lambda match: match.value.strip(seps)) or
|
||||||
|
not seps_after(service)):
|
||||||
|
to_remove.append(service)
|
||||||
|
continue
|
||||||
|
|
||||||
|
if service.value == 'Comedy Central':
|
||||||
|
# Current match is a valid streaming service, removing invalid Criterion Collection (CC) matches
|
||||||
|
to_remove.extend(matches.named('edition', predicate=lambda match: match.value == 'Criterion'))
|
||||||
|
|
||||||
|
return to_remove
|
||||||
349
lib/guessit/rules/properties/title.py
Normal file
349
lib/guessit/rules/properties/title.py
Normal file
@@ -0,0 +1,349 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
"""
|
||||||
|
title property
|
||||||
|
"""
|
||||||
|
|
||||||
|
from rebulk import Rebulk, Rule, AppendMatch, RemoveMatch, AppendTags
|
||||||
|
from rebulk.formatters import formatters
|
||||||
|
|
||||||
|
from .film import FilmTitleRule
|
||||||
|
from .language import (
|
||||||
|
SubtitlePrefixLanguageRule,
|
||||||
|
SubtitleSuffixLanguageRule,
|
||||||
|
SubtitleExtensionRule,
|
||||||
|
NON_SPECIFIC_LANGUAGES
|
||||||
|
)
|
||||||
|
from ..common import seps, title_seps
|
||||||
|
from ..common.comparators import marker_sorted
|
||||||
|
from ..common.expected import build_expected_function
|
||||||
|
from ..common.formatters import cleanup, reorder_title
|
||||||
|
from ..common.pattern import is_disabled
|
||||||
|
from ..common.validators import seps_surround
|
||||||
|
|
||||||
|
|
||||||
|
def title(config): # pylint:disable=unused-argument
|
||||||
|
"""
|
||||||
|
Builder for rebulk object.
|
||||||
|
|
||||||
|
:param config: rule configuration
|
||||||
|
:type config: dict
|
||||||
|
:return: Created Rebulk object
|
||||||
|
:rtype: Rebulk
|
||||||
|
"""
|
||||||
|
rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'title'))
|
||||||
|
rebulk.rules(TitleFromPosition, PreferTitleWithYear)
|
||||||
|
|
||||||
|
expected_title = build_expected_function('expected_title')
|
||||||
|
|
||||||
|
rebulk.functional(expected_title, name='title', tags=['expected', 'title'],
|
||||||
|
validator=seps_surround,
|
||||||
|
formatter=formatters(cleanup, reorder_title),
|
||||||
|
conflict_solver=lambda match, other: other,
|
||||||
|
disabled=lambda context: not context.get('expected_title'))
|
||||||
|
|
||||||
|
return rebulk
|
||||||
|
|
||||||
|
|
||||||
|
class TitleBaseRule(Rule):
|
||||||
|
"""
|
||||||
|
Add title match in existing matches
|
||||||
|
"""
|
||||||
|
# pylint:disable=no-self-use,unused-argument
|
||||||
|
consequence = [AppendMatch, RemoveMatch]
|
||||||
|
|
||||||
|
def __init__(self, match_name, match_tags=None, alternative_match_name=None):
|
||||||
|
super(TitleBaseRule, self).__init__()
|
||||||
|
self.match_name = match_name
|
||||||
|
self.match_tags = match_tags
|
||||||
|
self.alternative_match_name = alternative_match_name
|
||||||
|
|
||||||
|
def hole_filter(self, hole, matches):
|
||||||
|
"""
|
||||||
|
Filter holes for titles.
|
||||||
|
:param hole:
|
||||||
|
:type hole:
|
||||||
|
:param matches:
|
||||||
|
:type matches:
|
||||||
|
:return:
|
||||||
|
:rtype:
|
||||||
|
"""
|
||||||
|
return True
|
||||||
|
|
||||||
|
def filepart_filter(self, filepart, matches):
|
||||||
|
"""
|
||||||
|
Filter filepart for titles.
|
||||||
|
:param filepart:
|
||||||
|
:type filepart:
|
||||||
|
:param matches:
|
||||||
|
:type matches:
|
||||||
|
:return:
|
||||||
|
:rtype:
|
||||||
|
"""
|
||||||
|
return True
|
||||||
|
|
||||||
|
def holes_process(self, holes, matches):
|
||||||
|
"""
|
||||||
|
process holes
|
||||||
|
:param holes:
|
||||||
|
:type holes:
|
||||||
|
:param matches:
|
||||||
|
:type matches:
|
||||||
|
:return:
|
||||||
|
:rtype:
|
||||||
|
"""
|
||||||
|
cropped_holes = []
|
||||||
|
group_markers = matches.markers.named('group')
|
||||||
|
for group_marker in group_markers:
|
||||||
|
path_marker = matches.markers.at_match(group_marker, predicate=lambda m: m.name == 'path', index=0)
|
||||||
|
if path_marker and path_marker.span == group_marker.span:
|
||||||
|
group_markers.remove(group_marker)
|
||||||
|
|
||||||
|
for hole in holes:
|
||||||
|
cropped_holes.extend(hole.crop(group_markers))
|
||||||
|
|
||||||
|
return cropped_holes
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def is_ignored(match):
|
||||||
|
"""
|
||||||
|
Ignore matches when scanning for title (hole).
|
||||||
|
|
||||||
|
Full word language and countries won't be ignored if they are uppercase.
|
||||||
|
"""
|
||||||
|
return not (len(match) > 3 and match.raw.isupper()) and match.name in ('language', 'country', 'episode_details')
|
||||||
|
|
||||||
|
def should_keep(self, match, to_keep, matches, filepart, hole, starting):
|
||||||
|
"""
|
||||||
|
Check if this match should be accepted when ending or starting a hole.
|
||||||
|
:param match:
|
||||||
|
:type match:
|
||||||
|
:param to_keep:
|
||||||
|
:type to_keep: list[Match]
|
||||||
|
:param matches:
|
||||||
|
:type matches: Matches
|
||||||
|
:param hole: the filepart match
|
||||||
|
:type hole: Match
|
||||||
|
:param hole: the hole match
|
||||||
|
:type hole: Match
|
||||||
|
:param starting: true if match is starting the hole
|
||||||
|
:type starting: bool
|
||||||
|
:return:
|
||||||
|
:rtype:
|
||||||
|
"""
|
||||||
|
if match.name in ('language', 'country'):
|
||||||
|
# Keep language if exactly matching the hole.
|
||||||
|
if len(hole.value) == len(match.raw):
|
||||||
|
return True
|
||||||
|
|
||||||
|
# Keep language if other languages exists in the filepart.
|
||||||
|
outside_matches = filepart.crop(hole)
|
||||||
|
other_languages = []
|
||||||
|
for outside in outside_matches:
|
||||||
|
other_languages.extend(matches.range(outside.start, outside.end,
|
||||||
|
lambda c_match: c_match.name == match.name and
|
||||||
|
c_match not in to_keep and
|
||||||
|
c_match.value not in NON_SPECIFIC_LANGUAGES))
|
||||||
|
|
||||||
|
if not other_languages and (not starting or len(match.raw) <= 3):
|
||||||
|
return True
|
||||||
|
|
||||||
|
return False
|
||||||
|
|
||||||
|
def should_remove(self, match, matches, filepart, hole, context):
|
||||||
|
"""
|
||||||
|
Check if this match should be removed after beeing ignored.
|
||||||
|
:param match:
|
||||||
|
:param matches:
|
||||||
|
:param filepart:
|
||||||
|
:param hole:
|
||||||
|
:return:
|
||||||
|
"""
|
||||||
|
if context.get('type') == 'episode' and match.name == 'episode_details':
|
||||||
|
return match.start >= hole.start and match.end <= hole.end
|
||||||
|
return True
|
||||||
|
|
||||||
|
def check_titles_in_filepart(self, filepart, matches, context): # pylint:disable=inconsistent-return-statements
|
||||||
|
"""
|
||||||
|
Find title in filepart (ignoring language)
|
||||||
|
"""
|
||||||
|
# pylint:disable=too-many-locals,too-many-branches,too-many-statements
|
||||||
|
start, end = filepart.span
|
||||||
|
|
||||||
|
holes = matches.holes(start, end + 1, formatter=formatters(cleanup, reorder_title),
|
||||||
|
ignore=self.is_ignored,
|
||||||
|
predicate=lambda m: m.value)
|
||||||
|
|
||||||
|
holes = self.holes_process(holes, matches)
|
||||||
|
|
||||||
|
for hole in holes:
|
||||||
|
if not hole or (self.hole_filter and not self.hole_filter(hole, matches)):
|
||||||
|
continue
|
||||||
|
|
||||||
|
to_remove = []
|
||||||
|
to_keep = []
|
||||||
|
|
||||||
|
ignored_matches = matches.range(hole.start, hole.end, self.is_ignored)
|
||||||
|
|
||||||
|
if ignored_matches:
|
||||||
|
for ignored_match in reversed(ignored_matches):
|
||||||
|
# pylint:disable=undefined-loop-variable, cell-var-from-loop
|
||||||
|
trailing = matches.chain_before(hole.end, seps, predicate=lambda m: m == ignored_match)
|
||||||
|
if trailing:
|
||||||
|
should_keep = self.should_keep(ignored_match, to_keep, matches, filepart, hole, False)
|
||||||
|
if should_keep:
|
||||||
|
# pylint:disable=unpacking-non-sequence
|
||||||
|
try:
|
||||||
|
append, crop = should_keep
|
||||||
|
except TypeError:
|
||||||
|
append, crop = should_keep, should_keep
|
||||||
|
if append:
|
||||||
|
to_keep.append(ignored_match)
|
||||||
|
if crop:
|
||||||
|
hole.end = ignored_match.start
|
||||||
|
|
||||||
|
for ignored_match in ignored_matches:
|
||||||
|
if ignored_match not in to_keep:
|
||||||
|
starting = matches.chain_after(hole.start, seps,
|
||||||
|
predicate=lambda m: m == ignored_match)
|
||||||
|
if starting:
|
||||||
|
should_keep = self.should_keep(ignored_match, to_keep, matches, filepart, hole, True)
|
||||||
|
if should_keep:
|
||||||
|
# pylint:disable=unpacking-non-sequence
|
||||||
|
try:
|
||||||
|
append, crop = should_keep
|
||||||
|
except TypeError:
|
||||||
|
append, crop = should_keep, should_keep
|
||||||
|
if append:
|
||||||
|
to_keep.append(ignored_match)
|
||||||
|
if crop:
|
||||||
|
hole.start = ignored_match.end
|
||||||
|
|
||||||
|
for match in ignored_matches:
|
||||||
|
if self.should_remove(match, matches, filepart, hole, context):
|
||||||
|
to_remove.append(match)
|
||||||
|
for keep_match in to_keep:
|
||||||
|
if keep_match in to_remove:
|
||||||
|
to_remove.remove(keep_match)
|
||||||
|
|
||||||
|
if hole and hole.value:
|
||||||
|
hole.name = self.match_name
|
||||||
|
hole.tags = self.match_tags
|
||||||
|
if self.alternative_match_name:
|
||||||
|
# Split and keep values that can be a title
|
||||||
|
titles = hole.split(title_seps, lambda m: m.value)
|
||||||
|
for title_match in list(titles[1:]):
|
||||||
|
previous_title = titles[titles.index(title_match) - 1]
|
||||||
|
separator = matches.input_string[previous_title.end:title_match.start]
|
||||||
|
if len(separator) == 1 and separator == '-' \
|
||||||
|
and previous_title.raw[-1] not in seps \
|
||||||
|
and title_match.raw[0] not in seps:
|
||||||
|
titles[titles.index(title_match) - 1].end = title_match.end
|
||||||
|
titles.remove(title_match)
|
||||||
|
else:
|
||||||
|
title_match.name = self.alternative_match_name
|
||||||
|
|
||||||
|
else:
|
||||||
|
titles = [hole]
|
||||||
|
return titles, to_remove
|
||||||
|
|
||||||
|
def when(self, matches, context):
|
||||||
|
ret = []
|
||||||
|
to_remove = []
|
||||||
|
|
||||||
|
if matches.named(self.match_name, lambda match: 'expected' in match.tags):
|
||||||
|
return False
|
||||||
|
|
||||||
|
fileparts = [filepart for filepart in list(marker_sorted(matches.markers.named('path'), matches))
|
||||||
|
if not self.filepart_filter or self.filepart_filter(filepart, matches)]
|
||||||
|
|
||||||
|
# Priorize fileparts containing the year
|
||||||
|
years_fileparts = []
|
||||||
|
for filepart in fileparts:
|
||||||
|
year_match = matches.range(filepart.start, filepart.end, lambda match: match.name == 'year', 0)
|
||||||
|
if year_match:
|
||||||
|
years_fileparts.append(filepart)
|
||||||
|
|
||||||
|
for filepart in fileparts:
|
||||||
|
try:
|
||||||
|
years_fileparts.remove(filepart)
|
||||||
|
except ValueError:
|
||||||
|
pass
|
||||||
|
titles = self.check_titles_in_filepart(filepart, matches, context)
|
||||||
|
if titles:
|
||||||
|
titles, to_remove_c = titles
|
||||||
|
ret.extend(titles)
|
||||||
|
to_remove.extend(to_remove_c)
|
||||||
|
break
|
||||||
|
|
||||||
|
# Add title match in all fileparts containing the year.
|
||||||
|
for filepart in years_fileparts:
|
||||||
|
titles = self.check_titles_in_filepart(filepart, matches, context)
|
||||||
|
if titles:
|
||||||
|
# pylint:disable=unbalanced-tuple-unpacking
|
||||||
|
titles, to_remove_c = titles
|
||||||
|
ret.extend(titles)
|
||||||
|
to_remove.extend(to_remove_c)
|
||||||
|
|
||||||
|
if ret or to_remove:
|
||||||
|
return ret, to_remove
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
class TitleFromPosition(TitleBaseRule):
|
||||||
|
"""
|
||||||
|
Add title match in existing matches
|
||||||
|
"""
|
||||||
|
dependency = [FilmTitleRule, SubtitlePrefixLanguageRule, SubtitleSuffixLanguageRule, SubtitleExtensionRule]
|
||||||
|
|
||||||
|
properties = {'title': [None], 'alternative_title': [None]}
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
super(TitleFromPosition, self).__init__('title', ['title'], 'alternative_title')
|
||||||
|
|
||||||
|
def enabled(self, context):
|
||||||
|
return not is_disabled(context, 'alternative_title')
|
||||||
|
|
||||||
|
|
||||||
|
class PreferTitleWithYear(Rule):
|
||||||
|
"""
|
||||||
|
Prefer title where filepart contains year.
|
||||||
|
"""
|
||||||
|
dependency = TitleFromPosition
|
||||||
|
consequence = [RemoveMatch, AppendTags(['equivalent-ignore'])]
|
||||||
|
|
||||||
|
properties = {'title': [None]}
|
||||||
|
|
||||||
|
def when(self, matches, context):
|
||||||
|
with_year_in_group = []
|
||||||
|
with_year = []
|
||||||
|
titles = matches.named('title')
|
||||||
|
|
||||||
|
for title_match in titles:
|
||||||
|
filepart = matches.markers.at_match(title_match, lambda marker: marker.name == 'path', 0)
|
||||||
|
if filepart:
|
||||||
|
year_match = matches.range(filepart.start, filepart.end, lambda match: match.name == 'year', 0)
|
||||||
|
if year_match:
|
||||||
|
group = matches.markers.at_match(year_match, lambda m: m.name == 'group')
|
||||||
|
if group:
|
||||||
|
with_year_in_group.append(title_match)
|
||||||
|
else:
|
||||||
|
with_year.append(title_match)
|
||||||
|
|
||||||
|
to_tag = []
|
||||||
|
if with_year_in_group:
|
||||||
|
title_values = {title_match.value for title_match in with_year_in_group}
|
||||||
|
to_tag.extend(with_year_in_group)
|
||||||
|
elif with_year:
|
||||||
|
title_values = {title_match.value for title_match in with_year}
|
||||||
|
to_tag.extend(with_year)
|
||||||
|
else:
|
||||||
|
title_values = {title_match.value for title_match in titles}
|
||||||
|
|
||||||
|
to_remove = []
|
||||||
|
for title_match in titles:
|
||||||
|
if title_match.value not in title_values:
|
||||||
|
to_remove.append(title_match)
|
||||||
|
if to_remove or to_tag:
|
||||||
|
return to_remove, to_tag
|
||||||
|
return False
|
||||||
83
lib/guessit/rules/properties/type.py
Normal file
83
lib/guessit/rules/properties/type.py
Normal file
@@ -0,0 +1,83 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
"""
|
||||||
|
type property
|
||||||
|
"""
|
||||||
|
from rebulk import CustomRule, Rebulk, POST_PROCESS
|
||||||
|
from rebulk.match import Match
|
||||||
|
|
||||||
|
from ..common.pattern import is_disabled
|
||||||
|
from ...rules.processors import Processors
|
||||||
|
|
||||||
|
|
||||||
|
def _type(matches, value):
|
||||||
|
"""
|
||||||
|
Define type match with given value.
|
||||||
|
:param matches:
|
||||||
|
:param value:
|
||||||
|
:return:
|
||||||
|
"""
|
||||||
|
matches.append(Match(len(matches.input_string), len(matches.input_string), name='type', value=value))
|
||||||
|
|
||||||
|
|
||||||
|
def type_(config): # pylint:disable=unused-argument
|
||||||
|
"""
|
||||||
|
Builder for rebulk object.
|
||||||
|
|
||||||
|
:param config: rule configuration
|
||||||
|
:type config: dict
|
||||||
|
:return: Created Rebulk object
|
||||||
|
:rtype: Rebulk
|
||||||
|
"""
|
||||||
|
rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'type'))
|
||||||
|
rebulk = rebulk.rules(TypeProcessor)
|
||||||
|
|
||||||
|
return rebulk
|
||||||
|
|
||||||
|
|
||||||
|
class TypeProcessor(CustomRule):
|
||||||
|
"""
|
||||||
|
Post processor to find file type based on all others found matches.
|
||||||
|
"""
|
||||||
|
priority = POST_PROCESS
|
||||||
|
|
||||||
|
dependency = Processors
|
||||||
|
|
||||||
|
properties = {'type': ['episode', 'movie']}
|
||||||
|
|
||||||
|
def when(self, matches, context): # pylint:disable=too-many-return-statements
|
||||||
|
option_type = context.get('type', None)
|
||||||
|
if option_type:
|
||||||
|
return option_type
|
||||||
|
|
||||||
|
episode = matches.named('episode')
|
||||||
|
season = matches.named('season')
|
||||||
|
absolute_episode = matches.named('absolute_episode')
|
||||||
|
episode_details = matches.named('episode_details')
|
||||||
|
|
||||||
|
if episode or season or episode_details or absolute_episode:
|
||||||
|
return 'episode'
|
||||||
|
|
||||||
|
film = matches.named('film')
|
||||||
|
if film:
|
||||||
|
return 'movie'
|
||||||
|
|
||||||
|
year = matches.named('year')
|
||||||
|
date = matches.named('date')
|
||||||
|
|
||||||
|
if date and not year:
|
||||||
|
return 'episode'
|
||||||
|
|
||||||
|
bonus = matches.named('bonus')
|
||||||
|
if bonus and not year:
|
||||||
|
return 'episode'
|
||||||
|
|
||||||
|
crc32 = matches.named('crc32')
|
||||||
|
anime_release_group = matches.named('release_group', lambda match: 'anime' in match.tags)
|
||||||
|
if crc32 and anime_release_group:
|
||||||
|
return 'episode'
|
||||||
|
|
||||||
|
return 'movie'
|
||||||
|
|
||||||
|
def then(self, matches, when_response, context):
|
||||||
|
_type(matches, when_response)
|
||||||
126
lib/guessit/rules/properties/video_codec.py
Normal file
126
lib/guessit/rules/properties/video_codec.py
Normal file
@@ -0,0 +1,126 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
"""
|
||||||
|
video_codec and video_profile property
|
||||||
|
"""
|
||||||
|
from rebulk import Rebulk, Rule, RemoveMatch
|
||||||
|
from rebulk.remodule import re
|
||||||
|
|
||||||
|
from ..common import dash
|
||||||
|
from ..common.pattern import is_disabled
|
||||||
|
from ..common.validators import seps_after, seps_before, seps_surround
|
||||||
|
|
||||||
|
|
||||||
|
def video_codec(config): # pylint:disable=unused-argument
|
||||||
|
"""
|
||||||
|
Builder for rebulk object.
|
||||||
|
|
||||||
|
:param config: rule configuration
|
||||||
|
:type config: dict
|
||||||
|
:return: Created Rebulk object
|
||||||
|
:rtype: Rebulk
|
||||||
|
"""
|
||||||
|
rebulk = Rebulk()
|
||||||
|
rebulk = rebulk.regex_defaults(flags=re.IGNORECASE, abbreviations=[dash]).string_defaults(ignore_case=True)
|
||||||
|
rebulk.defaults(name="video_codec",
|
||||||
|
tags=['source-suffix', 'streaming_service.suffix'],
|
||||||
|
disabled=lambda context: is_disabled(context, 'video_codec'))
|
||||||
|
|
||||||
|
rebulk.regex(r'Rv\d{2}', value='RealVideo')
|
||||||
|
rebulk.regex('Mpe?g-?2', '[hx]-?262', value='MPEG-2')
|
||||||
|
rebulk.string("DVDivX", "DivX", value="DivX")
|
||||||
|
rebulk.string('XviD', value='Xvid')
|
||||||
|
rebulk.regex('VC-?1', value='VC-1')
|
||||||
|
rebulk.string('VP7', value='VP7')
|
||||||
|
rebulk.string('VP8', 'VP80', value='VP8')
|
||||||
|
rebulk.string('VP9', value='VP9')
|
||||||
|
rebulk.regex('[hx]-?263', value='H.263')
|
||||||
|
rebulk.regex('[hx]-?264', '(MPEG-?4)?AVC(?:HD)?', value='H.264')
|
||||||
|
rebulk.regex('[hx]-?265', 'HEVC', value='H.265')
|
||||||
|
rebulk.regex('(?P<video_codec>hevc)(?P<color_depth>10)', value={'video_codec': 'H.265', 'color_depth': '10-bit'},
|
||||||
|
tags=['video-codec-suffix'], children=True)
|
||||||
|
|
||||||
|
# http://blog.mediacoderhq.com/h264-profiles-and-levels/
|
||||||
|
# https://en.wikipedia.org/wiki/H.264/MPEG-4_AVC
|
||||||
|
rebulk.defaults(clear=True,
|
||||||
|
name="video_profile",
|
||||||
|
validator=seps_surround,
|
||||||
|
disabled=lambda context: is_disabled(context, 'video_profile'))
|
||||||
|
|
||||||
|
rebulk.string('BP', value='Baseline', tags='video_profile.rule')
|
||||||
|
rebulk.string('XP', 'EP', value='Extended', tags='video_profile.rule')
|
||||||
|
rebulk.string('MP', value='Main', tags='video_profile.rule')
|
||||||
|
rebulk.string('HP', 'HiP', value='High', tags='video_profile.rule')
|
||||||
|
|
||||||
|
# https://en.wikipedia.org/wiki/Scalable_Video_Coding
|
||||||
|
rebulk.string('SC', 'SVC', value='Scalable Video Coding', tags='video_profile.rule')
|
||||||
|
# https://en.wikipedia.org/wiki/AVCHD
|
||||||
|
rebulk.regex('AVC(?:HD)?', value='Advanced Video Codec High Definition', tags='video_profile.rule')
|
||||||
|
# https://en.wikipedia.org/wiki/H.265/HEVC
|
||||||
|
rebulk.string('HEVC', value='High Efficiency Video Coding', tags='video_profile.rule')
|
||||||
|
|
||||||
|
rebulk.regex('Hi422P', value='High 4:2:2')
|
||||||
|
rebulk.regex('Hi444PP', value='High 4:4:4 Predictive')
|
||||||
|
rebulk.regex('Hi10P?', value='High 10') # no profile validation is required
|
||||||
|
|
||||||
|
rebulk.string('DXVA', value='DXVA', name='video_api',
|
||||||
|
disabled=lambda context: is_disabled(context, 'video_api'))
|
||||||
|
|
||||||
|
rebulk.defaults(clear=True,
|
||||||
|
name='color_depth',
|
||||||
|
validator=seps_surround,
|
||||||
|
disabled=lambda context: is_disabled(context, 'color_depth'))
|
||||||
|
rebulk.regex('12.?bits?', value='12-bit')
|
||||||
|
rebulk.regex('10.?bits?', 'YUV420P10', 'Hi10P?', value='10-bit')
|
||||||
|
rebulk.regex('8.?bits?', value='8-bit')
|
||||||
|
|
||||||
|
rebulk.rules(ValidateVideoCodec, VideoProfileRule)
|
||||||
|
|
||||||
|
return rebulk
|
||||||
|
|
||||||
|
|
||||||
|
class ValidateVideoCodec(Rule):
|
||||||
|
"""
|
||||||
|
Validate video_codec with source property or separated
|
||||||
|
"""
|
||||||
|
priority = 64
|
||||||
|
consequence = RemoveMatch
|
||||||
|
|
||||||
|
def enabled(self, context):
|
||||||
|
return not is_disabled(context, 'video_codec')
|
||||||
|
|
||||||
|
def when(self, matches, context):
|
||||||
|
ret = []
|
||||||
|
for codec in matches.named('video_codec'):
|
||||||
|
if not seps_before(codec) and \
|
||||||
|
not matches.at_index(codec.start - 1, lambda match: 'video-codec-prefix' in match.tags):
|
||||||
|
ret.append(codec)
|
||||||
|
continue
|
||||||
|
if not seps_after(codec) and \
|
||||||
|
not matches.at_index(codec.end + 1, lambda match: 'video-codec-suffix' in match.tags):
|
||||||
|
ret.append(codec)
|
||||||
|
continue
|
||||||
|
return ret
|
||||||
|
|
||||||
|
|
||||||
|
class VideoProfileRule(Rule):
|
||||||
|
"""
|
||||||
|
Rule to validate video_profile
|
||||||
|
"""
|
||||||
|
consequence = RemoveMatch
|
||||||
|
|
||||||
|
def enabled(self, context):
|
||||||
|
return not is_disabled(context, 'video_profile')
|
||||||
|
|
||||||
|
def when(self, matches, context):
|
||||||
|
profile_list = matches.named('video_profile', lambda match: 'video_profile.rule' in match.tags)
|
||||||
|
ret = []
|
||||||
|
for profile in profile_list:
|
||||||
|
codec = matches.at_span(profile.span, lambda match: match.name == 'video_codec', 0)
|
||||||
|
if not codec:
|
||||||
|
codec = matches.previous(profile, lambda match: match.name == 'video_codec')
|
||||||
|
if not codec:
|
||||||
|
codec = matches.next(profile, lambda match: match.name == 'video_codec')
|
||||||
|
if not codec:
|
||||||
|
ret.append(profile)
|
||||||
|
return ret
|
||||||
110
lib/guessit/rules/properties/website.py
Normal file
110
lib/guessit/rules/properties/website.py
Normal file
@@ -0,0 +1,110 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
"""
|
||||||
|
Website property.
|
||||||
|
"""
|
||||||
|
# from pkg_resources import resource_stream # @UnresolvedImport
|
||||||
|
import os
|
||||||
|
|
||||||
|
from rebulk.remodule import re
|
||||||
|
|
||||||
|
from rebulk import Rebulk, Rule, RemoveMatch
|
||||||
|
from ..common import seps
|
||||||
|
from ..common.formatters import cleanup
|
||||||
|
from ..common.pattern import is_disabled
|
||||||
|
from ..common.validators import seps_surround
|
||||||
|
from ...reutils import build_or_pattern
|
||||||
|
|
||||||
|
|
||||||
|
def website(config):
|
||||||
|
"""
|
||||||
|
Builder for rebulk object.
|
||||||
|
|
||||||
|
:param config: rule configuration
|
||||||
|
:type config: dict
|
||||||
|
:return: Created Rebulk object
|
||||||
|
:rtype: Rebulk
|
||||||
|
"""
|
||||||
|
rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'website'))
|
||||||
|
rebulk = rebulk.regex_defaults(flags=re.IGNORECASE).string_defaults(ignore_case=True)
|
||||||
|
rebulk.defaults(name="website")
|
||||||
|
|
||||||
|
with open(os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), 'tlds-alpha-by-domain.txt')) as tld_file:
|
||||||
|
tlds = [
|
||||||
|
tld.strip().decode('utf-8')
|
||||||
|
for tld in tld_file.readlines()
|
||||||
|
if b'--' not in tld
|
||||||
|
][1:] # All registered domain extension
|
||||||
|
|
||||||
|
safe_tlds = config['safe_tlds'] # For sure a website extension
|
||||||
|
safe_subdomains = config['safe_subdomains'] # For sure a website subdomain
|
||||||
|
safe_prefix = config['safe_prefixes'] # Those words before a tlds are sure
|
||||||
|
website_prefixes = config['prefixes']
|
||||||
|
|
||||||
|
rebulk.regex(r'(?:[^a-z0-9]|^)((?:'+build_or_pattern(safe_subdomains) +
|
||||||
|
r'\.)+(?:[a-z-]+\.)+(?:'+build_or_pattern(tlds) +
|
||||||
|
r'))(?:[^a-z0-9]|$)',
|
||||||
|
children=True)
|
||||||
|
rebulk.regex(r'(?:[^a-z0-9]|^)((?:'+build_or_pattern(safe_subdomains) +
|
||||||
|
r'\.)*[a-z-]+\.(?:'+build_or_pattern(safe_tlds) +
|
||||||
|
r'))(?:[^a-z0-9]|$)',
|
||||||
|
safe_subdomains=safe_subdomains, safe_tlds=safe_tlds, children=True)
|
||||||
|
rebulk.regex(r'(?:[^a-z0-9]|^)((?:'+build_or_pattern(safe_subdomains) +
|
||||||
|
r'\.)*[a-z-]+\.(?:'+build_or_pattern(safe_prefix) +
|
||||||
|
r'\.)+(?:'+build_or_pattern(tlds) +
|
||||||
|
r'))(?:[^a-z0-9]|$)',
|
||||||
|
safe_subdomains=safe_subdomains, safe_prefix=safe_prefix, tlds=tlds, children=True)
|
||||||
|
|
||||||
|
rebulk.string(*website_prefixes,
|
||||||
|
validator=seps_surround, private=True, tags=['website.prefix'])
|
||||||
|
|
||||||
|
class PreferTitleOverWebsite(Rule):
|
||||||
|
"""
|
||||||
|
If found match is more likely a title, remove website.
|
||||||
|
"""
|
||||||
|
consequence = RemoveMatch
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def valid_followers(match):
|
||||||
|
"""
|
||||||
|
Validator for next website matches
|
||||||
|
"""
|
||||||
|
return match.named('season', 'episode', 'year')
|
||||||
|
|
||||||
|
def when(self, matches, context):
|
||||||
|
to_remove = []
|
||||||
|
for website_match in matches.named('website'):
|
||||||
|
safe = False
|
||||||
|
for safe_start in safe_subdomains + safe_prefix:
|
||||||
|
if website_match.value.lower().startswith(safe_start):
|
||||||
|
safe = True
|
||||||
|
break
|
||||||
|
if not safe:
|
||||||
|
suffix = matches.next(website_match, PreferTitleOverWebsite.valid_followers, 0)
|
||||||
|
if suffix:
|
||||||
|
group = matches.markers.at_match(website_match, lambda marker: marker.name == 'group', 0)
|
||||||
|
if not group:
|
||||||
|
to_remove.append(website_match)
|
||||||
|
return to_remove
|
||||||
|
|
||||||
|
rebulk.rules(PreferTitleOverWebsite, ValidateWebsitePrefix)
|
||||||
|
|
||||||
|
return rebulk
|
||||||
|
|
||||||
|
|
||||||
|
class ValidateWebsitePrefix(Rule):
|
||||||
|
"""
|
||||||
|
Validate website prefixes
|
||||||
|
"""
|
||||||
|
priority = 64
|
||||||
|
consequence = RemoveMatch
|
||||||
|
|
||||||
|
def when(self, matches, context):
|
||||||
|
to_remove = []
|
||||||
|
for prefix in matches.tagged('website.prefix'):
|
||||||
|
website_match = matches.next(prefix, predicate=lambda match: match.name == 'website', index=0)
|
||||||
|
if (not website_match or
|
||||||
|
matches.holes(prefix.end, website_match.start,
|
||||||
|
formatter=cleanup, seps=seps, predicate=lambda match: match.value)):
|
||||||
|
to_remove.append(prefix)
|
||||||
|
return to_remove
|
||||||
3
lib/guessit/test/__init__.py
Normal file
3
lib/guessit/test/__init__.py
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
# pylint: disable=no-self-use, pointless-statement, missing-docstring, invalid-name
|
||||||
1
lib/guessit/test/config/dummy.txt
Normal file
1
lib/guessit/test/config/dummy.txt
Normal file
@@ -0,0 +1 @@
|
|||||||
|
Not a configuration file
|
||||||
4
lib/guessit/test/config/test.json
Normal file
4
lib/guessit/test/config/test.json
Normal file
@@ -0,0 +1,4 @@
|
|||||||
|
{
|
||||||
|
"expected_title": ["The 100", "OSS 117"],
|
||||||
|
"yaml": false
|
||||||
|
}
|
||||||
4
lib/guessit/test/config/test.yaml
Normal file
4
lib/guessit/test/config/test.yaml
Normal file
@@ -0,0 +1,4 @@
|
|||||||
|
expected_title:
|
||||||
|
- The 100
|
||||||
|
- OSS 117
|
||||||
|
yaml: True
|
||||||
4
lib/guessit/test/config/test.yml
Normal file
4
lib/guessit/test/config/test.yml
Normal file
@@ -0,0 +1,4 @@
|
|||||||
|
expected_title:
|
||||||
|
- The 100
|
||||||
|
- OSS 117
|
||||||
|
yaml: True
|
||||||
335
lib/guessit/test/enable_disable_properties.yml
Normal file
335
lib/guessit/test/enable_disable_properties.yml
Normal file
@@ -0,0 +1,335 @@
|
|||||||
|
? vorbis
|
||||||
|
: options: --exclude audio_codec
|
||||||
|
-audio_codec: Vorbis
|
||||||
|
|
||||||
|
? DTS-ES
|
||||||
|
: options: --exclude audio_profile
|
||||||
|
audio_codec: DTS
|
||||||
|
-audio_profile: Extended Surround
|
||||||
|
|
||||||
|
? DTS.ES
|
||||||
|
: options: --include audio_codec
|
||||||
|
audio_codec: DTS
|
||||||
|
-audio_profile: Extended Surround
|
||||||
|
|
||||||
|
? 5.1
|
||||||
|
? 5ch
|
||||||
|
? 6ch
|
||||||
|
: options: --exclude audio_channels
|
||||||
|
-audio_channels: '5.1'
|
||||||
|
|
||||||
|
? Movie Title-x01-Other Title.mkv
|
||||||
|
? Movie Title-x01-Other Title
|
||||||
|
? directory/Movie Title-x01-Other Title/file.mkv
|
||||||
|
: options: --exclude bonus
|
||||||
|
-bonus: 1
|
||||||
|
-bonus_title: Other Title
|
||||||
|
|
||||||
|
? Title-x02-Bonus Title.mkv
|
||||||
|
: options: --include bonus
|
||||||
|
bonus: 2
|
||||||
|
-bonus_title: Other Title
|
||||||
|
|
||||||
|
? cd 1of3
|
||||||
|
: options: --exclude cd
|
||||||
|
-cd: 1
|
||||||
|
-cd_count: 3
|
||||||
|
|
||||||
|
? This.is.Us
|
||||||
|
: options: --exclude country
|
||||||
|
title: This is Us
|
||||||
|
-country: US
|
||||||
|
|
||||||
|
? 2015.01.31
|
||||||
|
: options: --exclude date
|
||||||
|
year: 2015
|
||||||
|
-date: 2015-01-31
|
||||||
|
|
||||||
|
? Something 2 mar 2013)
|
||||||
|
: options: --exclude date
|
||||||
|
-date: 2013-03-02
|
||||||
|
|
||||||
|
? 2012 2009 S01E02 2015 # If no year is marked, the second one is guessed.
|
||||||
|
: options: --exclude year
|
||||||
|
-year: 2009
|
||||||
|
|
||||||
|
? Director's cut
|
||||||
|
: options: --exclude edition
|
||||||
|
-edition: Director's Cut
|
||||||
|
|
||||||
|
? 2x5
|
||||||
|
? 2X5
|
||||||
|
? 02x05
|
||||||
|
? 2X05
|
||||||
|
? 02x5
|
||||||
|
? S02E05
|
||||||
|
? s02e05
|
||||||
|
? s02e5
|
||||||
|
? s2e05
|
||||||
|
? s02ep05
|
||||||
|
? s2EP5
|
||||||
|
: options: --exclude season
|
||||||
|
-season: 2
|
||||||
|
-episode: 5
|
||||||
|
|
||||||
|
? 2x6
|
||||||
|
? 2X6
|
||||||
|
? 02x06
|
||||||
|
? 2X06
|
||||||
|
? 02x6
|
||||||
|
? S02E06
|
||||||
|
? s02e06
|
||||||
|
? s02e6
|
||||||
|
? s2e06
|
||||||
|
? s02ep06
|
||||||
|
? s2EP6
|
||||||
|
: options: --exclude episode
|
||||||
|
-season: 2
|
||||||
|
-episode: 6
|
||||||
|
|
||||||
|
? serie Season 2 other
|
||||||
|
: options: --exclude season
|
||||||
|
-season: 2
|
||||||
|
|
||||||
|
? Some Dummy Directory/S02 Some Series/E01-Episode title.mkv
|
||||||
|
: options: --exclude episode_title
|
||||||
|
-episode_title: Episode title
|
||||||
|
season: 2
|
||||||
|
episode: 1
|
||||||
|
|
||||||
|
? Another Dummy Directory/S02 Some Series/E01-Episode title.mkv
|
||||||
|
: options: --include season --include episode
|
||||||
|
-episode_title: Episode title
|
||||||
|
season: 2
|
||||||
|
episode: 1
|
||||||
|
|
||||||
|
# pattern contains season and episode: it wont work enabling only one
|
||||||
|
? Some Series S03E01E02
|
||||||
|
: options: --include episode
|
||||||
|
-season: 3
|
||||||
|
-episode: [1, 2]
|
||||||
|
|
||||||
|
# pattern contains season and episode: it wont work enabling only one
|
||||||
|
? Another Series S04E01E02
|
||||||
|
: options: --include season
|
||||||
|
-season: 4
|
||||||
|
-episode: [1, 2]
|
||||||
|
|
||||||
|
? Show.Name.Season.4.Episode.1
|
||||||
|
: options: --include episode
|
||||||
|
-season: 4
|
||||||
|
episode: 1
|
||||||
|
|
||||||
|
? Another.Show.Name.Season.4.Episode.1
|
||||||
|
: options: --include season
|
||||||
|
season: 4
|
||||||
|
-episode: 1
|
||||||
|
|
||||||
|
? Some Series S01 02 03
|
||||||
|
: options: --exclude season
|
||||||
|
-season: [1, 2, 3]
|
||||||
|
|
||||||
|
? Some Series E01 02 04
|
||||||
|
: options: --exclude episode
|
||||||
|
-episode: [1, 2, 4]
|
||||||
|
|
||||||
|
? A very special episode s06 special
|
||||||
|
: options: -t episode --exclude episode_details
|
||||||
|
season: 6
|
||||||
|
-episode_details: Special
|
||||||
|
|
||||||
|
? S01D02.3-5-GROUP
|
||||||
|
: options: --exclude disc
|
||||||
|
-season: 1
|
||||||
|
-disc: [2, 3, 4, 5]
|
||||||
|
-episode: [2, 3, 4, 5]
|
||||||
|
|
||||||
|
? S01D02&4-6&8
|
||||||
|
: options: --exclude season
|
||||||
|
-season: 1
|
||||||
|
-disc: [2, 4, 5, 6, 8]
|
||||||
|
-episode: [2, 4, 5, 6, 8]
|
||||||
|
|
||||||
|
? Film Title-f01-Series Title.mkv
|
||||||
|
: options: --exclude film
|
||||||
|
-film: 1
|
||||||
|
-film_title: Film Title
|
||||||
|
|
||||||
|
? Another Film Title-f01-Series Title.mkv
|
||||||
|
: options: --exclude film_title
|
||||||
|
film: 1
|
||||||
|
-film_title: Film Title
|
||||||
|
|
||||||
|
? English
|
||||||
|
? .ENG.
|
||||||
|
: options: --exclude language
|
||||||
|
-language: English
|
||||||
|
|
||||||
|
? SubFrench
|
||||||
|
? SubFr
|
||||||
|
? STFr
|
||||||
|
: options: --exclude subtitle_language
|
||||||
|
-language: French
|
||||||
|
-subtitle_language: French
|
||||||
|
|
||||||
|
? ST.FR
|
||||||
|
: options: --exclude subtitle_language
|
||||||
|
language: French
|
||||||
|
-subtitle_language: French
|
||||||
|
|
||||||
|
? ENG.-.sub.FR
|
||||||
|
? ENG.-.FR Sub
|
||||||
|
: options: --include language
|
||||||
|
language: [English, French]
|
||||||
|
-subtitle_language: French
|
||||||
|
|
||||||
|
? ENG.-.SubFR
|
||||||
|
: options: --include language
|
||||||
|
language: English
|
||||||
|
-subtitle_language: French
|
||||||
|
|
||||||
|
? ENG.-.FRSUB
|
||||||
|
? ENG.-.FRSUBS
|
||||||
|
? ENG.-.FR-SUBS
|
||||||
|
: options: --include subtitle_language
|
||||||
|
-language: English
|
||||||
|
subtitle_language: French
|
||||||
|
|
||||||
|
? DVD.Real.XViD
|
||||||
|
? DVD.fix.XViD
|
||||||
|
: options: --exclude other
|
||||||
|
-other: Fix
|
||||||
|
-proper_count: 1
|
||||||
|
|
||||||
|
? Part 3
|
||||||
|
? Part III
|
||||||
|
? Part Three
|
||||||
|
? Part Trois
|
||||||
|
? Part3
|
||||||
|
: options: --exclude part
|
||||||
|
-part: 3
|
||||||
|
|
||||||
|
? Some.Title.XViD-by.Artik[SEDG].avi
|
||||||
|
: options: --exclude release_group
|
||||||
|
-release_group: Artik[SEDG]
|
||||||
|
|
||||||
|
? "[ABC] Some.Title.avi"
|
||||||
|
? some/folder/[ABC]Some.Title.avi
|
||||||
|
: options: --exclude release_group
|
||||||
|
-release_group: ABC
|
||||||
|
|
||||||
|
? 360p
|
||||||
|
? 360px
|
||||||
|
? "360"
|
||||||
|
? +500x360
|
||||||
|
: options: --exclude screen_size
|
||||||
|
-screen_size: 360p
|
||||||
|
|
||||||
|
? 640x360
|
||||||
|
: options: --exclude aspect_ratio
|
||||||
|
screen_size: 360p
|
||||||
|
-aspect_ratio: 1.778
|
||||||
|
|
||||||
|
? 8196x4320
|
||||||
|
: options: --exclude screen_size
|
||||||
|
-screen_size: 4320p
|
||||||
|
-aspect_ratio: 1.897
|
||||||
|
|
||||||
|
? 4.3gb
|
||||||
|
: options: --exclude size
|
||||||
|
-size: 4.3GB
|
||||||
|
|
||||||
|
? VhS_rip
|
||||||
|
? VHS.RIP
|
||||||
|
: options: --exclude source
|
||||||
|
-source: VHS
|
||||||
|
-other: Rip
|
||||||
|
|
||||||
|
? DVD.RIP
|
||||||
|
: options: --include other
|
||||||
|
-source: DVD
|
||||||
|
-other: Rip
|
||||||
|
|
||||||
|
? Title Only.avi
|
||||||
|
: options: --exclude title
|
||||||
|
-title: Title Only
|
||||||
|
|
||||||
|
? h265
|
||||||
|
? x265
|
||||||
|
? h.265
|
||||||
|
? x.265
|
||||||
|
? hevc
|
||||||
|
: options: --exclude video_codec
|
||||||
|
-video_codec: H.265
|
||||||
|
|
||||||
|
? hevc10
|
||||||
|
: options: --include color_depth
|
||||||
|
-video_codec: H.265
|
||||||
|
-color_depth: 10-bit
|
||||||
|
|
||||||
|
? HEVC-YUV420P10
|
||||||
|
: options: --include color_depth
|
||||||
|
-video_codec: H.265
|
||||||
|
color_depth: 10-bit
|
||||||
|
|
||||||
|
? h265-HP
|
||||||
|
: options: --exclude video_profile
|
||||||
|
video_codec: H.265
|
||||||
|
-video_profile: High
|
||||||
|
|
||||||
|
? House.of.Cards.2013.S02E03.1080p.NF.WEBRip.DD5.1.x264-NTb.mkv
|
||||||
|
? House.of.Cards.2013.S02E03.1080p.Netflix.WEBRip.DD5.1.x264-NTb.mkv
|
||||||
|
: options: --exclude streaming_service
|
||||||
|
-streaming_service: Netflix
|
||||||
|
|
||||||
|
? wawa.co.uk
|
||||||
|
: options: --exclude website
|
||||||
|
-website: wawa.co.uk
|
||||||
|
|
||||||
|
? movie.mp4
|
||||||
|
: options: --exclude mimetype
|
||||||
|
-mimetype: video/mp4
|
||||||
|
|
||||||
|
? another movie.mkv
|
||||||
|
: options: --exclude container
|
||||||
|
-container: mkv
|
||||||
|
|
||||||
|
? series s02e01
|
||||||
|
: options: --exclude type
|
||||||
|
-type: episode
|
||||||
|
|
||||||
|
? series s02e01
|
||||||
|
: options: --exclude type
|
||||||
|
-type: episode
|
||||||
|
|
||||||
|
? Hotel.Hell.S01E01.720p.DD5.1.448kbps-ALANiS
|
||||||
|
: options: --exclude audio_bit_rate
|
||||||
|
-audio_bit_rate: 448Kbps
|
||||||
|
|
||||||
|
? Katy Perry - Pepsi & Billboard Summer Beats Concert Series 2012 1080i HDTV 20 Mbps DD2.0 MPEG2-TrollHD.ts
|
||||||
|
: options: --exclude video_bit_rate
|
||||||
|
-video_bit_rate: 20Mbps
|
||||||
|
|
||||||
|
? "[Figmentos] Monster 34 - At the End of Darkness [781219F1].mkv"
|
||||||
|
: options: --exclude crc32
|
||||||
|
-crc32: 781219F1
|
||||||
|
|
||||||
|
? 1080p25
|
||||||
|
: options: --exclude frame_rate
|
||||||
|
screen_size: 1080p
|
||||||
|
-frame_rate: 25fps
|
||||||
|
|
||||||
|
? 1080p25
|
||||||
|
: options: --exclude screen_size
|
||||||
|
-screen_size: 1080p
|
||||||
|
-frame_rate: 25fps
|
||||||
|
|
||||||
|
? 1080p25
|
||||||
|
: options: --include frame_rate
|
||||||
|
-screen_size: 1080p
|
||||||
|
-frame_rate: 25fps
|
||||||
|
|
||||||
|
? 1080p 30fps
|
||||||
|
: options: --exclude screen_size
|
||||||
|
-screen_size: 1080p
|
||||||
|
frame_rate: 30fps
|
||||||
4693
lib/guessit/test/episodes.yml
Normal file
4693
lib/guessit/test/episodes.yml
Normal file
File diff suppressed because it is too large
Load Diff
1786
lib/guessit/test/movies.yml
Normal file
1786
lib/guessit/test/movies.yml
Normal file
File diff suppressed because it is too large
Load Diff
3
lib/guessit/test/rules/__init__.py
Normal file
3
lib/guessit/test/rules/__init__.py
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
# pylint: disable=no-self-use, pointless-statement, missing-docstring, invalid-name
|
||||||
134
lib/guessit/test/rules/audio_codec.yml
Normal file
134
lib/guessit/test/rules/audio_codec.yml
Normal file
@@ -0,0 +1,134 @@
|
|||||||
|
# Multiple input strings having same expected results can be chained.
|
||||||
|
# Use $ marker to check inputs that should not match results.
|
||||||
|
|
||||||
|
|
||||||
|
? +MP3
|
||||||
|
? +lame
|
||||||
|
? +lame3.12
|
||||||
|
? +lame3.100
|
||||||
|
: audio_codec: MP3
|
||||||
|
|
||||||
|
? +MP2
|
||||||
|
: audio_codec: MP2
|
||||||
|
|
||||||
|
? +DolbyDigital
|
||||||
|
? +DD
|
||||||
|
? +Dolby Digital
|
||||||
|
? +AC3
|
||||||
|
: audio_codec: Dolby Digital
|
||||||
|
|
||||||
|
? +DDP
|
||||||
|
? +DD+
|
||||||
|
? +EAC3
|
||||||
|
: audio_codec: Dolby Digital Plus
|
||||||
|
|
||||||
|
? +DolbyAtmos
|
||||||
|
? +Dolby Atmos
|
||||||
|
? +Atmos
|
||||||
|
? -Atmosphere
|
||||||
|
: audio_codec: Dolby Atmos
|
||||||
|
|
||||||
|
? +AAC
|
||||||
|
: audio_codec: AAC
|
||||||
|
|
||||||
|
? +Flac
|
||||||
|
: audio_codec: FLAC
|
||||||
|
|
||||||
|
? +DTS
|
||||||
|
: audio_codec: DTS
|
||||||
|
|
||||||
|
? +True-HD
|
||||||
|
? +trueHD
|
||||||
|
: audio_codec: Dolby TrueHD
|
||||||
|
|
||||||
|
? +True-HD51
|
||||||
|
? +trueHD51
|
||||||
|
: audio_codec: Dolby TrueHD
|
||||||
|
audio_channels: '5.1'
|
||||||
|
|
||||||
|
? +DTSHD
|
||||||
|
? +DTS HD
|
||||||
|
? +DTS-HD
|
||||||
|
: audio_codec: DTS-HD
|
||||||
|
|
||||||
|
? +DTS-HDma
|
||||||
|
? +DTSMA
|
||||||
|
: audio_codec: DTS-HD
|
||||||
|
audio_profile: Master Audio
|
||||||
|
|
||||||
|
? +AC3-hq
|
||||||
|
: audio_codec: Dolby Digital
|
||||||
|
audio_profile: High Quality
|
||||||
|
|
||||||
|
? +AAC-HE
|
||||||
|
: audio_codec: AAC
|
||||||
|
audio_profile: High Efficiency
|
||||||
|
|
||||||
|
? +AAC-LC
|
||||||
|
: audio_codec: AAC
|
||||||
|
audio_profile: Low Complexity
|
||||||
|
|
||||||
|
? +AAC2.0
|
||||||
|
? +AAC20
|
||||||
|
: audio_codec: AAC
|
||||||
|
audio_channels: '2.0'
|
||||||
|
|
||||||
|
? +7.1
|
||||||
|
? +7ch
|
||||||
|
? +8ch
|
||||||
|
: audio_channels: '7.1'
|
||||||
|
|
||||||
|
? +5.1
|
||||||
|
? +5ch
|
||||||
|
? +6ch
|
||||||
|
: audio_channels: '5.1'
|
||||||
|
|
||||||
|
? +2ch
|
||||||
|
? +2.0
|
||||||
|
? +stereo
|
||||||
|
: audio_channels: '2.0'
|
||||||
|
|
||||||
|
? +1ch
|
||||||
|
? +mono
|
||||||
|
: audio_channels: '1.0'
|
||||||
|
|
||||||
|
? DD5.1
|
||||||
|
? DD51
|
||||||
|
: audio_codec: Dolby Digital
|
||||||
|
audio_channels: '5.1'
|
||||||
|
|
||||||
|
? -51
|
||||||
|
: audio_channels: '5.1'
|
||||||
|
|
||||||
|
? DTS-HD.HRA
|
||||||
|
? DTSHD.HRA
|
||||||
|
? DTS-HD.HR
|
||||||
|
? DTSHD.HR
|
||||||
|
? -HRA
|
||||||
|
? -HR
|
||||||
|
: audio_codec: DTS-HD
|
||||||
|
audio_profile: High Resolution Audio
|
||||||
|
|
||||||
|
? DTSES
|
||||||
|
? DTS-ES
|
||||||
|
? -ES
|
||||||
|
: audio_codec: DTS
|
||||||
|
audio_profile: Extended Surround
|
||||||
|
|
||||||
|
? DD-EX
|
||||||
|
? DDEX
|
||||||
|
? -EX
|
||||||
|
: audio_codec: Dolby Digital
|
||||||
|
audio_profile: EX
|
||||||
|
|
||||||
|
? OPUS
|
||||||
|
: audio_codec: Opus
|
||||||
|
|
||||||
|
? Vorbis
|
||||||
|
: audio_codec: Vorbis
|
||||||
|
|
||||||
|
? PCM
|
||||||
|
: audio_codec: PCM
|
||||||
|
|
||||||
|
? LPCM
|
||||||
|
: audio_codec: LPCM
|
||||||
9
lib/guessit/test/rules/bonus.yml
Normal file
9
lib/guessit/test/rules/bonus.yml
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
# Multiple input strings having same expected results can be chained.
|
||||||
|
# Use - marker to check inputs that should not match results.
|
||||||
|
? Movie Title-x01-Other Title.mkv
|
||||||
|
? Movie Title-x01-Other Title
|
||||||
|
? directory/Movie Title-x01-Other Title/file.mkv
|
||||||
|
: title: Movie Title
|
||||||
|
bonus_title: Other Title
|
||||||
|
bonus: 1
|
||||||
|
|
||||||
10
lib/guessit/test/rules/cds.yml
Normal file
10
lib/guessit/test/rules/cds.yml
Normal file
@@ -0,0 +1,10 @@
|
|||||||
|
# Multiple input strings having same expected results can be chained.
|
||||||
|
# Use - marker to check inputs that should not match results.
|
||||||
|
? cd 1of3
|
||||||
|
: cd: 1
|
||||||
|
cd_count: 3
|
||||||
|
|
||||||
|
? Some.Title-DVDRIP-x264-CDP
|
||||||
|
: cd: !!null
|
||||||
|
release_group: CDP
|
||||||
|
video_codec: H.264
|
||||||
467
lib/guessit/test/rules/common_words.yml
Normal file
467
lib/guessit/test/rules/common_words.yml
Normal file
@@ -0,0 +1,467 @@
|
|||||||
|
? is
|
||||||
|
: title: is
|
||||||
|
|
||||||
|
? it
|
||||||
|
: title: it
|
||||||
|
|
||||||
|
? am
|
||||||
|
: title: am
|
||||||
|
|
||||||
|
? mad
|
||||||
|
: title: mad
|
||||||
|
|
||||||
|
? men
|
||||||
|
: title: men
|
||||||
|
|
||||||
|
? man
|
||||||
|
: title: man
|
||||||
|
|
||||||
|
? run
|
||||||
|
: title: run
|
||||||
|
|
||||||
|
? sin
|
||||||
|
: title: sin
|
||||||
|
|
||||||
|
? st
|
||||||
|
: title: st
|
||||||
|
|
||||||
|
? to
|
||||||
|
: title: to
|
||||||
|
|
||||||
|
? 'no'
|
||||||
|
: title: 'no'
|
||||||
|
|
||||||
|
? non
|
||||||
|
: title: non
|
||||||
|
|
||||||
|
? war
|
||||||
|
: title: war
|
||||||
|
|
||||||
|
? min
|
||||||
|
: title: min
|
||||||
|
|
||||||
|
? new
|
||||||
|
: title: new
|
||||||
|
|
||||||
|
? car
|
||||||
|
: title: car
|
||||||
|
|
||||||
|
? day
|
||||||
|
: title: day
|
||||||
|
|
||||||
|
? bad
|
||||||
|
: title: bad
|
||||||
|
|
||||||
|
? bat
|
||||||
|
: title: bat
|
||||||
|
|
||||||
|
? fan
|
||||||
|
: title: fan
|
||||||
|
|
||||||
|
? fry
|
||||||
|
: title: fry
|
||||||
|
|
||||||
|
? cop
|
||||||
|
: title: cop
|
||||||
|
|
||||||
|
? zen
|
||||||
|
: title: zen
|
||||||
|
|
||||||
|
? gay
|
||||||
|
: title: gay
|
||||||
|
|
||||||
|
? fat
|
||||||
|
: title: fat
|
||||||
|
|
||||||
|
? one
|
||||||
|
: title: one
|
||||||
|
|
||||||
|
? cherokee
|
||||||
|
: title: cherokee
|
||||||
|
|
||||||
|
? got
|
||||||
|
: title: got
|
||||||
|
|
||||||
|
? an
|
||||||
|
: title: an
|
||||||
|
|
||||||
|
? as
|
||||||
|
: title: as
|
||||||
|
|
||||||
|
? cat
|
||||||
|
: title: cat
|
||||||
|
|
||||||
|
? her
|
||||||
|
: title: her
|
||||||
|
|
||||||
|
? be
|
||||||
|
: title: be
|
||||||
|
|
||||||
|
? hat
|
||||||
|
: title: hat
|
||||||
|
|
||||||
|
? sun
|
||||||
|
: title: sun
|
||||||
|
|
||||||
|
? may
|
||||||
|
: title: may
|
||||||
|
|
||||||
|
? my
|
||||||
|
: title: my
|
||||||
|
|
||||||
|
? mr
|
||||||
|
: title: mr
|
||||||
|
|
||||||
|
? rum
|
||||||
|
: title: rum
|
||||||
|
|
||||||
|
? pi
|
||||||
|
: title: pi
|
||||||
|
|
||||||
|
? bb
|
||||||
|
: title: bb
|
||||||
|
|
||||||
|
? bt
|
||||||
|
: title: bt
|
||||||
|
|
||||||
|
? tv
|
||||||
|
: title: tv
|
||||||
|
|
||||||
|
? aw
|
||||||
|
: title: aw
|
||||||
|
|
||||||
|
? by
|
||||||
|
: title: by
|
||||||
|
|
||||||
|
? md
|
||||||
|
: other: Mic Dubbed
|
||||||
|
|
||||||
|
? mp
|
||||||
|
: title: mp
|
||||||
|
|
||||||
|
? cd
|
||||||
|
: title: cd
|
||||||
|
|
||||||
|
? in
|
||||||
|
: title: in
|
||||||
|
|
||||||
|
? ad
|
||||||
|
: title: ad
|
||||||
|
|
||||||
|
? ice
|
||||||
|
: title: ice
|
||||||
|
|
||||||
|
? ay
|
||||||
|
: title: ay
|
||||||
|
|
||||||
|
? at
|
||||||
|
: title: at
|
||||||
|
|
||||||
|
? star
|
||||||
|
: title: star
|
||||||
|
|
||||||
|
? so
|
||||||
|
: title: so
|
||||||
|
|
||||||
|
? he
|
||||||
|
: title: he
|
||||||
|
|
||||||
|
? do
|
||||||
|
: title: do
|
||||||
|
|
||||||
|
? ax
|
||||||
|
: title: ax
|
||||||
|
|
||||||
|
? mx
|
||||||
|
: title: mx
|
||||||
|
|
||||||
|
? bas
|
||||||
|
: title: bas
|
||||||
|
|
||||||
|
? de
|
||||||
|
: title: de
|
||||||
|
|
||||||
|
? le
|
||||||
|
: title: le
|
||||||
|
|
||||||
|
? son
|
||||||
|
: title: son
|
||||||
|
|
||||||
|
? ne
|
||||||
|
: title: ne
|
||||||
|
|
||||||
|
? ca
|
||||||
|
: title: ca
|
||||||
|
|
||||||
|
? ce
|
||||||
|
: title: ce
|
||||||
|
|
||||||
|
? et
|
||||||
|
: title: et
|
||||||
|
|
||||||
|
? que
|
||||||
|
: title: que
|
||||||
|
|
||||||
|
? mal
|
||||||
|
: title: mal
|
||||||
|
|
||||||
|
? est
|
||||||
|
: title: est
|
||||||
|
|
||||||
|
? vol
|
||||||
|
: title: vol
|
||||||
|
|
||||||
|
? or
|
||||||
|
: title: or
|
||||||
|
|
||||||
|
? mon
|
||||||
|
: title: mon
|
||||||
|
|
||||||
|
? se
|
||||||
|
: title: se
|
||||||
|
|
||||||
|
? je
|
||||||
|
: title: je
|
||||||
|
|
||||||
|
? tu
|
||||||
|
: title: tu
|
||||||
|
|
||||||
|
? me
|
||||||
|
: title: me
|
||||||
|
|
||||||
|
? ma
|
||||||
|
: title: ma
|
||||||
|
|
||||||
|
? va
|
||||||
|
: title: va
|
||||||
|
|
||||||
|
? au
|
||||||
|
: country: AU
|
||||||
|
|
||||||
|
? lu
|
||||||
|
: title: lu
|
||||||
|
|
||||||
|
? wa
|
||||||
|
: title: wa
|
||||||
|
|
||||||
|
? ga
|
||||||
|
: title: ga
|
||||||
|
|
||||||
|
? ao
|
||||||
|
: title: ao
|
||||||
|
|
||||||
|
? la
|
||||||
|
: title: la
|
||||||
|
|
||||||
|
? el
|
||||||
|
: title: el
|
||||||
|
|
||||||
|
? del
|
||||||
|
: title: del
|
||||||
|
|
||||||
|
? por
|
||||||
|
: title: por
|
||||||
|
|
||||||
|
? mar
|
||||||
|
: title: mar
|
||||||
|
|
||||||
|
? al
|
||||||
|
: title: al
|
||||||
|
|
||||||
|
? un
|
||||||
|
: title: un
|
||||||
|
|
||||||
|
? ind
|
||||||
|
: title: ind
|
||||||
|
|
||||||
|
? arw
|
||||||
|
: title: arw
|
||||||
|
|
||||||
|
? ts
|
||||||
|
: source: Telesync
|
||||||
|
|
||||||
|
? ii
|
||||||
|
: title: ii
|
||||||
|
|
||||||
|
? bin
|
||||||
|
: title: bin
|
||||||
|
|
||||||
|
? chan
|
||||||
|
: title: chan
|
||||||
|
|
||||||
|
? ss
|
||||||
|
: title: ss
|
||||||
|
|
||||||
|
? san
|
||||||
|
: title: san
|
||||||
|
|
||||||
|
? oss
|
||||||
|
: title: oss
|
||||||
|
|
||||||
|
? iii
|
||||||
|
: title: iii
|
||||||
|
|
||||||
|
? vi
|
||||||
|
: title: vi
|
||||||
|
|
||||||
|
? ben
|
||||||
|
: title: ben
|
||||||
|
|
||||||
|
? da
|
||||||
|
: title: da
|
||||||
|
|
||||||
|
? lt
|
||||||
|
: title: lt
|
||||||
|
|
||||||
|
? ch
|
||||||
|
: title: ch
|
||||||
|
|
||||||
|
? sr
|
||||||
|
: title: sr
|
||||||
|
|
||||||
|
? ps
|
||||||
|
: title: ps
|
||||||
|
|
||||||
|
? cx
|
||||||
|
: title: cx
|
||||||
|
|
||||||
|
? vo
|
||||||
|
: title: vo
|
||||||
|
|
||||||
|
? mkv
|
||||||
|
: container: mkv
|
||||||
|
|
||||||
|
? avi
|
||||||
|
: container: avi
|
||||||
|
|
||||||
|
? dmd
|
||||||
|
: title: dmd
|
||||||
|
|
||||||
|
? the
|
||||||
|
: title: the
|
||||||
|
|
||||||
|
? dis
|
||||||
|
: title: dis
|
||||||
|
|
||||||
|
? cut
|
||||||
|
: title: cut
|
||||||
|
|
||||||
|
? stv
|
||||||
|
: title: stv
|
||||||
|
|
||||||
|
? des
|
||||||
|
: title: des
|
||||||
|
|
||||||
|
? dia
|
||||||
|
: title: dia
|
||||||
|
|
||||||
|
? and
|
||||||
|
: title: and
|
||||||
|
|
||||||
|
? cab
|
||||||
|
: title: cab
|
||||||
|
|
||||||
|
? sub
|
||||||
|
: title: sub
|
||||||
|
|
||||||
|
? mia
|
||||||
|
: title: mia
|
||||||
|
|
||||||
|
? rim
|
||||||
|
: title: rim
|
||||||
|
|
||||||
|
? las
|
||||||
|
: title: las
|
||||||
|
|
||||||
|
? une
|
||||||
|
: title: une
|
||||||
|
|
||||||
|
? par
|
||||||
|
: title: par
|
||||||
|
|
||||||
|
? srt
|
||||||
|
: container: srt
|
||||||
|
|
||||||
|
? ano
|
||||||
|
: title: ano
|
||||||
|
|
||||||
|
? toy
|
||||||
|
: title: toy
|
||||||
|
|
||||||
|
? job
|
||||||
|
: title: job
|
||||||
|
|
||||||
|
? gag
|
||||||
|
: title: gag
|
||||||
|
|
||||||
|
? reel
|
||||||
|
: title: reel
|
||||||
|
|
||||||
|
? www
|
||||||
|
: title: www
|
||||||
|
|
||||||
|
? for
|
||||||
|
: title: for
|
||||||
|
|
||||||
|
? ayu
|
||||||
|
: title: ayu
|
||||||
|
|
||||||
|
? csi
|
||||||
|
: title: csi
|
||||||
|
|
||||||
|
? ren
|
||||||
|
: title: ren
|
||||||
|
|
||||||
|
? moi
|
||||||
|
: title: moi
|
||||||
|
|
||||||
|
? sur
|
||||||
|
: title: sur
|
||||||
|
|
||||||
|
? fer
|
||||||
|
: title: fer
|
||||||
|
|
||||||
|
? fun
|
||||||
|
: title: fun
|
||||||
|
|
||||||
|
? two
|
||||||
|
: title: two
|
||||||
|
|
||||||
|
? big
|
||||||
|
: title: big
|
||||||
|
|
||||||
|
? psy
|
||||||
|
: title: psy
|
||||||
|
|
||||||
|
? air
|
||||||
|
: title: air
|
||||||
|
|
||||||
|
? brazil
|
||||||
|
: title: brazil
|
||||||
|
|
||||||
|
? jordan
|
||||||
|
: title: jordan
|
||||||
|
|
||||||
|
? bs
|
||||||
|
: title: bs
|
||||||
|
|
||||||
|
? kz
|
||||||
|
: title: kz
|
||||||
|
|
||||||
|
? gt
|
||||||
|
: title: gt
|
||||||
|
|
||||||
|
? im
|
||||||
|
: title: im
|
||||||
|
|
||||||
|
? pt
|
||||||
|
: language: pt
|
||||||
|
|
||||||
|
? scr
|
||||||
|
: title: scr
|
||||||
|
|
||||||
|
? sd
|
||||||
|
: title: sd
|
||||||
|
|
||||||
|
? hr
|
||||||
|
: other: High Resolution
|
||||||
13
lib/guessit/test/rules/country.yml
Normal file
13
lib/guessit/test/rules/country.yml
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
# Multiple input strings having same expected results can be chained.
|
||||||
|
# Use $ marker to check inputs that should not match results.
|
||||||
|
? Us.this.is.title
|
||||||
|
? this.is.title.US
|
||||||
|
: country: US
|
||||||
|
title: this is title
|
||||||
|
|
||||||
|
? This.is.Us
|
||||||
|
: title: This is Us
|
||||||
|
|
||||||
|
? This.Is.Us
|
||||||
|
: options: --no-default-config
|
||||||
|
title: This Is Us
|
||||||
50
lib/guessit/test/rules/date.yml
Normal file
50
lib/guessit/test/rules/date.yml
Normal file
@@ -0,0 +1,50 @@
|
|||||||
|
# Multiple input strings having same expected results can be chained.
|
||||||
|
# Use - marker to check inputs that should not match results.
|
||||||
|
? +09.03.08
|
||||||
|
? +09.03.2008
|
||||||
|
? +2008.03.09
|
||||||
|
: date: 2008-03-09
|
||||||
|
|
||||||
|
? +31.01.15
|
||||||
|
? +31.01.2015
|
||||||
|
? +15.01.31
|
||||||
|
? +2015.01.31
|
||||||
|
: date: 2015-01-31
|
||||||
|
|
||||||
|
? +01.02.03
|
||||||
|
: date: 2003-02-01
|
||||||
|
|
||||||
|
? +01.02.03
|
||||||
|
: options: --date-year-first
|
||||||
|
date: 2001-02-03
|
||||||
|
|
||||||
|
? +01.02.03
|
||||||
|
: options: --date-day-first
|
||||||
|
date: 2003-02-01
|
||||||
|
|
||||||
|
? 1919
|
||||||
|
? 2030
|
||||||
|
: !!map {}
|
||||||
|
|
||||||
|
? 2029
|
||||||
|
: year: 2029
|
||||||
|
|
||||||
|
? (1920)
|
||||||
|
: year: 1920
|
||||||
|
|
||||||
|
? 2012
|
||||||
|
: year: 2012
|
||||||
|
|
||||||
|
? 2011 2013 (2012) (2015) # first marked year is guessed.
|
||||||
|
: title: "2011 2013"
|
||||||
|
year: 2012
|
||||||
|
|
||||||
|
? 2012 2009 S01E02 2015 # If no year is marked, the second one is guessed.
|
||||||
|
: title: "2012"
|
||||||
|
year: 2009
|
||||||
|
episode_title: "2015"
|
||||||
|
|
||||||
|
? Something 2 mar 2013)
|
||||||
|
: title: Something
|
||||||
|
date: 2013-03-02
|
||||||
|
type: episode
|
||||||
63
lib/guessit/test/rules/edition.yml
Normal file
63
lib/guessit/test/rules/edition.yml
Normal file
@@ -0,0 +1,63 @@
|
|||||||
|
# Multiple input strings having same expected results can be chained.
|
||||||
|
# Use - marker to check inputs that should not match results.
|
||||||
|
? Director's cut
|
||||||
|
? Edition Director's cut
|
||||||
|
: edition: Director's Cut
|
||||||
|
|
||||||
|
? Collector
|
||||||
|
? Collector Edition
|
||||||
|
? Edition Collector
|
||||||
|
: edition: Collector
|
||||||
|
|
||||||
|
? Special Edition
|
||||||
|
? Edition Special
|
||||||
|
? -Special
|
||||||
|
: edition: Special
|
||||||
|
|
||||||
|
? Criterion Edition
|
||||||
|
? Edition Criterion
|
||||||
|
? CC
|
||||||
|
? -Criterion
|
||||||
|
: edition: Criterion
|
||||||
|
|
||||||
|
? Deluxe
|
||||||
|
? Deluxe Edition
|
||||||
|
? Edition Deluxe
|
||||||
|
: edition: Deluxe
|
||||||
|
|
||||||
|
? Super Movie Alternate XViD
|
||||||
|
? Super Movie Alternative XViD
|
||||||
|
? Super Movie Alternate Cut XViD
|
||||||
|
? Super Movie Alternative Cut XViD
|
||||||
|
: edition: Alternative Cut
|
||||||
|
|
||||||
|
? ddc
|
||||||
|
: edition: Director's Definitive Cut
|
||||||
|
|
||||||
|
? IMAX
|
||||||
|
? IMAX Edition
|
||||||
|
: edition: IMAX
|
||||||
|
|
||||||
|
? ultimate edition
|
||||||
|
? -ultimate
|
||||||
|
: edition: Ultimate
|
||||||
|
|
||||||
|
? ultimate collector edition
|
||||||
|
? ultimate collector's edition
|
||||||
|
? ultimate collectors edition
|
||||||
|
? -collectors edition
|
||||||
|
? -ultimate edition
|
||||||
|
: edition: [Ultimate, Collector]
|
||||||
|
|
||||||
|
? ultimate collectors edition dc
|
||||||
|
: edition: [Ultimate, Collector, Director's Cut]
|
||||||
|
|
||||||
|
? fan edit
|
||||||
|
? fan edition
|
||||||
|
? fan collection
|
||||||
|
: edition: Fan
|
||||||
|
|
||||||
|
? ultimate fan edit
|
||||||
|
? ultimate fan edition
|
||||||
|
? ultimate fan collection
|
||||||
|
: edition: [Ultimate, Fan]
|
||||||
331
lib/guessit/test/rules/episodes.yml
Normal file
331
lib/guessit/test/rules/episodes.yml
Normal file
@@ -0,0 +1,331 @@
|
|||||||
|
# Multiple input strings having same expected results can be chained.
|
||||||
|
# Use $ marker to check inputs that should not match results.
|
||||||
|
? +2x5
|
||||||
|
? +2X5
|
||||||
|
? +02x05
|
||||||
|
? +2X05
|
||||||
|
? +02x5
|
||||||
|
? S02E05
|
||||||
|
? s02e05
|
||||||
|
? s02e5
|
||||||
|
? s2e05
|
||||||
|
? s02ep05
|
||||||
|
? s2EP5
|
||||||
|
? -s03e05
|
||||||
|
? -s02e06
|
||||||
|
? -3x05
|
||||||
|
? -2x06
|
||||||
|
: season: 2
|
||||||
|
episode: 5
|
||||||
|
|
||||||
|
? "+0102"
|
||||||
|
? "+102"
|
||||||
|
: season: 1
|
||||||
|
episode: 2
|
||||||
|
|
||||||
|
? "0102 S03E04"
|
||||||
|
? "S03E04 102"
|
||||||
|
: season: 3
|
||||||
|
episode: 4
|
||||||
|
|
||||||
|
? +serie Saison 2 other
|
||||||
|
? +serie Season 2 other
|
||||||
|
? +serie Saisons 2 other
|
||||||
|
? +serie Seasons 2 other
|
||||||
|
? +serie Season Two other
|
||||||
|
? +serie Season II other
|
||||||
|
: season: 2
|
||||||
|
|
||||||
|
? Some Series.S02E01.Episode.title.mkv
|
||||||
|
? Some Series/Season 02/E01-Episode title.mkv
|
||||||
|
? Some Series/Season 02/Some Series-E01-Episode title.mkv
|
||||||
|
? Some Dummy Directory/Season 02/Some Series-E01-Episode title.mkv
|
||||||
|
? -Some Dummy Directory/Season 02/E01-Episode title.mkv
|
||||||
|
? Some Series/Unsafe Season 02/Some Series-E01-Episode title.mkv
|
||||||
|
? -Some Series/Unsafe Season 02/E01-Episode title.mkv
|
||||||
|
? Some Series/Season 02/E01-Episode title.mkv
|
||||||
|
? Some Series/ Season 02/E01-Episode title.mkv
|
||||||
|
? Some Dummy Directory/Some Series S02/E01-Episode title.mkv
|
||||||
|
? Some Dummy Directory/S02 Some Series/E01-Episode title.mkv
|
||||||
|
: title: Some Series
|
||||||
|
episode_title: Episode title
|
||||||
|
season: 2
|
||||||
|
episode: 1
|
||||||
|
|
||||||
|
? Some Series.S02E01.mkv
|
||||||
|
? Some Series/Season 02/E01.mkv
|
||||||
|
? Some Series/Season 02/Some Series-E01.mkv
|
||||||
|
? Some Dummy Directory/Season 02/Some Series-E01.mkv
|
||||||
|
? -Some Dummy Directory/Season 02/E01.mkv
|
||||||
|
? Some Series/Unsafe Season 02/Some Series-E01.mkv
|
||||||
|
? -Some Series/Unsafe Season 02/E01.mkv
|
||||||
|
? Some Series/Season 02/E01.mkv
|
||||||
|
? Some Series/ Season 02/E01.mkv
|
||||||
|
? Some Dummy Directory/Some Series S02/E01-AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA.mkv
|
||||||
|
: title: Some Series
|
||||||
|
season: 2
|
||||||
|
episode: 1
|
||||||
|
|
||||||
|
? Some Series S03E01E02
|
||||||
|
: title: Some Series
|
||||||
|
season: 3
|
||||||
|
episode: [1, 2]
|
||||||
|
|
||||||
|
? Some Series S01S02S03
|
||||||
|
? Some Series S01-02-03
|
||||||
|
? Some Series S01 S02 S03
|
||||||
|
? Some Series S01 02 03
|
||||||
|
: title: Some Series
|
||||||
|
season: [1, 2, 3]
|
||||||
|
|
||||||
|
? Some Series E01E02E03
|
||||||
|
? Some Series E01-02-03
|
||||||
|
? Some Series E01-03
|
||||||
|
? Some Series E01 E02 E03
|
||||||
|
? Some Series E01 02 03
|
||||||
|
: title: Some Series
|
||||||
|
episode: [1, 2, 3]
|
||||||
|
|
||||||
|
? Some Series E01E02E04
|
||||||
|
? Some Series E01 E02 E04
|
||||||
|
? Some Series E01 02 04
|
||||||
|
: title: Some Series
|
||||||
|
episode: [1, 2, 4]
|
||||||
|
|
||||||
|
? Some Series E01-02-04
|
||||||
|
? Some Series E01-04
|
||||||
|
? Some Series E01-04
|
||||||
|
: title: Some Series
|
||||||
|
episode: [1, 2, 3, 4]
|
||||||
|
|
||||||
|
? Some Series E01-02-E04
|
||||||
|
: title: Some Series
|
||||||
|
episode: [1, 2, 3, 4]
|
||||||
|
|
||||||
|
? Episode 3
|
||||||
|
? -Episode III
|
||||||
|
: episode: 3
|
||||||
|
|
||||||
|
? Episode 3
|
||||||
|
? Episode III
|
||||||
|
: options: -t episode
|
||||||
|
episode: 3
|
||||||
|
|
||||||
|
? -A very special movie
|
||||||
|
: episode_details: Special
|
||||||
|
|
||||||
|
? -A very special episode
|
||||||
|
: options: -t episode
|
||||||
|
episode_details: Special
|
||||||
|
|
||||||
|
? A very special episode s06 special
|
||||||
|
: options: -t episode
|
||||||
|
title: A very special episode
|
||||||
|
episode_details: Special
|
||||||
|
|
||||||
|
? 12 Monkeys\Season 01\Episode 05\12 Monkeys - S01E05 - The Night Room.mkv
|
||||||
|
: container: mkv
|
||||||
|
title: 12 Monkeys
|
||||||
|
episode: 5
|
||||||
|
season: 1
|
||||||
|
|
||||||
|
? S03E02.X.1080p
|
||||||
|
: episode: 2
|
||||||
|
screen_size: 1080p
|
||||||
|
season: 3
|
||||||
|
|
||||||
|
? Something 1 x 2-FlexGet
|
||||||
|
: options: -t episode
|
||||||
|
title: Something
|
||||||
|
season: 1
|
||||||
|
episode: 2
|
||||||
|
episode_title: FlexGet
|
||||||
|
|
||||||
|
? Show.Name.-.Season.1.to.3.-.Mp4.1080p
|
||||||
|
? Show.Name.-.Season.1~3.-.Mp4.1080p
|
||||||
|
? Show.Name.-.Saison.1.a.3.-.Mp4.1080p
|
||||||
|
: container: mp4
|
||||||
|
screen_size: 1080p
|
||||||
|
season:
|
||||||
|
- 1
|
||||||
|
- 2
|
||||||
|
- 3
|
||||||
|
title: Show Name
|
||||||
|
|
||||||
|
? Show.Name.Season.1.3&5.HDTV.XviD-GoodGroup[SomeTrash]
|
||||||
|
? Show.Name.Season.1.3 and 5.HDTV.XviD-GoodGroup[SomeTrash]
|
||||||
|
: source: HDTV
|
||||||
|
release_group: GoodGroup[SomeTrash]
|
||||||
|
season:
|
||||||
|
- 1
|
||||||
|
- 3
|
||||||
|
- 5
|
||||||
|
title: Show Name
|
||||||
|
type: episode
|
||||||
|
video_codec: Xvid
|
||||||
|
|
||||||
|
? Show.Name.Season.1.2.3-5.HDTV.XviD-GoodGroup[SomeTrash]
|
||||||
|
? Show.Name.Season.1.2.3~5.HDTV.XviD-GoodGroup[SomeTrash]
|
||||||
|
? Show.Name.Season.1.2.3 to 5.HDTV.XviD-GoodGroup[SomeTrash]
|
||||||
|
: source: HDTV
|
||||||
|
release_group: GoodGroup[SomeTrash]
|
||||||
|
season:
|
||||||
|
- 1
|
||||||
|
- 2
|
||||||
|
- 3
|
||||||
|
- 4
|
||||||
|
- 5
|
||||||
|
title: Show Name
|
||||||
|
type: episode
|
||||||
|
video_codec: Xvid
|
||||||
|
|
||||||
|
? The.Get.Down.S01EP01.FRENCH.720p.WEBRIP.XVID-STR
|
||||||
|
: episode: 1
|
||||||
|
source: Web
|
||||||
|
other: Rip
|
||||||
|
language: fr
|
||||||
|
release_group: STR
|
||||||
|
screen_size: 720p
|
||||||
|
season: 1
|
||||||
|
title: The Get Down
|
||||||
|
type: episode
|
||||||
|
video_codec: Xvid
|
||||||
|
|
||||||
|
? My.Name.Is.Earl.S01E01-S01E21.SWE-SUB
|
||||||
|
: episode:
|
||||||
|
- 1
|
||||||
|
- 2
|
||||||
|
- 3
|
||||||
|
- 4
|
||||||
|
- 5
|
||||||
|
- 6
|
||||||
|
- 7
|
||||||
|
- 8
|
||||||
|
- 9
|
||||||
|
- 10
|
||||||
|
- 11
|
||||||
|
- 12
|
||||||
|
- 13
|
||||||
|
- 14
|
||||||
|
- 15
|
||||||
|
- 16
|
||||||
|
- 17
|
||||||
|
- 18
|
||||||
|
- 19
|
||||||
|
- 20
|
||||||
|
- 21
|
||||||
|
season: 1
|
||||||
|
subtitle_language: sv
|
||||||
|
title: My Name Is Earl
|
||||||
|
type: episode
|
||||||
|
|
||||||
|
? Show.Name.Season.4.Episodes.1-12
|
||||||
|
: episode:
|
||||||
|
- 1
|
||||||
|
- 2
|
||||||
|
- 3
|
||||||
|
- 4
|
||||||
|
- 5
|
||||||
|
- 6
|
||||||
|
- 7
|
||||||
|
- 8
|
||||||
|
- 9
|
||||||
|
- 10
|
||||||
|
- 11
|
||||||
|
- 12
|
||||||
|
season: 4
|
||||||
|
title: Show Name
|
||||||
|
type: episode
|
||||||
|
|
||||||
|
? show name s01.to.s04
|
||||||
|
: season:
|
||||||
|
- 1
|
||||||
|
- 2
|
||||||
|
- 3
|
||||||
|
- 4
|
||||||
|
title: show name
|
||||||
|
type: episode
|
||||||
|
|
||||||
|
? epi
|
||||||
|
: options: -t episode
|
||||||
|
title: epi
|
||||||
|
|
||||||
|
? Episode20
|
||||||
|
? Episode 20
|
||||||
|
: episode: 20
|
||||||
|
|
||||||
|
? Episode50
|
||||||
|
? Episode 50
|
||||||
|
: episode: 50
|
||||||
|
|
||||||
|
? Episode51
|
||||||
|
? Episode 51
|
||||||
|
: episode: 51
|
||||||
|
|
||||||
|
? Episode70
|
||||||
|
? Episode 70
|
||||||
|
: episode: 70
|
||||||
|
|
||||||
|
? Episode71
|
||||||
|
? Episode 71
|
||||||
|
: episode: 71
|
||||||
|
|
||||||
|
? S01D02.3-5-GROUP
|
||||||
|
: disc: [2, 3, 4, 5]
|
||||||
|
|
||||||
|
? S01D02&4-6&8
|
||||||
|
: disc: [2, 4, 5, 6, 8]
|
||||||
|
|
||||||
|
? Something.4x05-06
|
||||||
|
? Something - 4x05-06
|
||||||
|
? Something:4x05-06
|
||||||
|
? Something 4x05-06
|
||||||
|
? Something-4x05-06
|
||||||
|
: title: Something
|
||||||
|
season: 4
|
||||||
|
episode:
|
||||||
|
- 5
|
||||||
|
- 6
|
||||||
|
|
||||||
|
? Something.4x05-06
|
||||||
|
? Something - 4x05-06
|
||||||
|
? Something:4x05-06
|
||||||
|
? Something 4x05-06
|
||||||
|
? Something-4x05-06
|
||||||
|
: options: -T something
|
||||||
|
title: something
|
||||||
|
season: 4
|
||||||
|
episode:
|
||||||
|
- 5
|
||||||
|
- 6
|
||||||
|
|
||||||
|
? Colony 23/S01E01.Some.title.mkv
|
||||||
|
: title: Colony 23
|
||||||
|
season: 1
|
||||||
|
episode: 1
|
||||||
|
episode_title: Some title
|
||||||
|
|
||||||
|
? Show.Name.E02.2010.mkv
|
||||||
|
: options: -t episode
|
||||||
|
title: Show Name
|
||||||
|
year: 2010
|
||||||
|
episode: 2
|
||||||
|
|
||||||
|
? Show.Name.E02.S2010.mkv
|
||||||
|
: options: -t episode
|
||||||
|
title: Show Name
|
||||||
|
year: 2010
|
||||||
|
season: 2010
|
||||||
|
episode: 2
|
||||||
|
|
||||||
|
|
||||||
|
? Show.Name.E02.2010.mkv
|
||||||
|
: title: Show Name
|
||||||
|
year: 2010
|
||||||
|
episode: 2
|
||||||
|
|
||||||
|
? Show.Name.E02.S2010.mkv
|
||||||
|
: title: Show Name
|
||||||
|
year: 2010
|
||||||
|
season: 2010
|
||||||
|
episode: 2
|
||||||
9
lib/guessit/test/rules/film.yml
Normal file
9
lib/guessit/test/rules/film.yml
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
# Multiple input strings having same expected results can be chained.
|
||||||
|
# Use - marker to check inputs that should not match results.
|
||||||
|
? Film Title-f01-Series Title.mkv
|
||||||
|
? Film Title-f01-Series Title
|
||||||
|
? directory/Film Title-f01-Series Title/file.mkv
|
||||||
|
: title: Series Title
|
||||||
|
film_title: Film Title
|
||||||
|
film: 1
|
||||||
|
|
||||||
47
lib/guessit/test/rules/language.yml
Normal file
47
lib/guessit/test/rules/language.yml
Normal file
@@ -0,0 +1,47 @@
|
|||||||
|
# Multiple input strings having same expected results can be chained.
|
||||||
|
# Use - marker to check inputs that should not match results.
|
||||||
|
? +English
|
||||||
|
? .ENG.
|
||||||
|
: language: English
|
||||||
|
|
||||||
|
? +French
|
||||||
|
: language: French
|
||||||
|
|
||||||
|
? +SubFrench
|
||||||
|
? +SubFr
|
||||||
|
? +STFr
|
||||||
|
? ST.FR
|
||||||
|
: subtitle_language: French
|
||||||
|
|
||||||
|
? +ENG.-.sub.FR
|
||||||
|
? ENG.-.FR Sub
|
||||||
|
? +ENG.-.SubFR
|
||||||
|
? +ENG.-.FRSUB
|
||||||
|
? +ENG.-.FRSUBS
|
||||||
|
? +ENG.-.FR-SUBS
|
||||||
|
: language: English
|
||||||
|
subtitle_language: French
|
||||||
|
|
||||||
|
? "{Fr-Eng}.St{Fr-Eng}"
|
||||||
|
? "Le.Prestige[x264.{Fr-Eng}.St{Fr-Eng}.Chaps].mkv"
|
||||||
|
: language: [French, English]
|
||||||
|
subtitle_language: [French, English]
|
||||||
|
|
||||||
|
? +ENG.-.sub.SWE
|
||||||
|
? ENG.-.SWE Sub
|
||||||
|
? +ENG.-.SubSWE
|
||||||
|
? +ENG.-.SWESUB
|
||||||
|
? +ENG.-.sub.SV
|
||||||
|
? ENG.-.SV Sub
|
||||||
|
? +ENG.-.SubSV
|
||||||
|
? +ENG.-.SVSUB
|
||||||
|
: language: English
|
||||||
|
subtitle_language: Swedish
|
||||||
|
|
||||||
|
? The English Patient (1996)
|
||||||
|
: title: The English Patient
|
||||||
|
-language: english
|
||||||
|
|
||||||
|
? French.Kiss.1995.1080p
|
||||||
|
: title: French Kiss
|
||||||
|
-language: french
|
||||||
169
lib/guessit/test/rules/other.yml
Normal file
169
lib/guessit/test/rules/other.yml
Normal file
@@ -0,0 +1,169 @@
|
|||||||
|
# Multiple input strings having same expected results can be chained.
|
||||||
|
# Use - marker to check inputs that should not match results.
|
||||||
|
? +DVDSCR
|
||||||
|
? +DVDScreener
|
||||||
|
? +DVD-SCR
|
||||||
|
? +DVD Screener
|
||||||
|
? +DVD AnythingElse Screener
|
||||||
|
? -DVD AnythingElse SCR
|
||||||
|
: other: Screener
|
||||||
|
|
||||||
|
? +AudioFix
|
||||||
|
? +AudioFixed
|
||||||
|
? +Audio Fix
|
||||||
|
? +Audio Fixed
|
||||||
|
: other: Audio Fixed
|
||||||
|
|
||||||
|
? +SyncFix
|
||||||
|
? +SyncFixed
|
||||||
|
? +Sync Fix
|
||||||
|
? +Sync Fixed
|
||||||
|
: other: Sync Fixed
|
||||||
|
|
||||||
|
? +DualAudio
|
||||||
|
? +Dual Audio
|
||||||
|
: other: Dual Audio
|
||||||
|
|
||||||
|
? +ws
|
||||||
|
? +WideScreen
|
||||||
|
? +Wide Screen
|
||||||
|
: other: Widescreen
|
||||||
|
|
||||||
|
# Fix must be surround by others properties to be matched.
|
||||||
|
? DVD.fix.XViD
|
||||||
|
? -DVD.Fix
|
||||||
|
? -Fix.XViD
|
||||||
|
: other: Fix
|
||||||
|
-proper_count: 1
|
||||||
|
|
||||||
|
? -DVD.BlablaBla.Fix.Blablabla.XVID
|
||||||
|
? -DVD.BlablaBla.Fix.XVID
|
||||||
|
? -DVD.Fix.Blablabla.XVID
|
||||||
|
: other: Fix
|
||||||
|
-proper_count: 1
|
||||||
|
|
||||||
|
|
||||||
|
? DVD.Real.PROPER.REPACK
|
||||||
|
: other: Proper
|
||||||
|
proper_count: 3
|
||||||
|
|
||||||
|
|
||||||
|
? Proper.720p
|
||||||
|
? +Repack
|
||||||
|
? +Rerip
|
||||||
|
: other: Proper
|
||||||
|
proper_count: 1
|
||||||
|
|
||||||
|
? XViD.Fansub
|
||||||
|
: other: Fan Subtitled
|
||||||
|
|
||||||
|
? XViD.Fastsub
|
||||||
|
: other: Fast Subtitled
|
||||||
|
|
||||||
|
? +Season Complete
|
||||||
|
? -Complete
|
||||||
|
: other: Complete
|
||||||
|
|
||||||
|
? R5
|
||||||
|
: other: Region 5
|
||||||
|
|
||||||
|
? RC
|
||||||
|
: other: Region C
|
||||||
|
|
||||||
|
? PreAir
|
||||||
|
? Pre Air
|
||||||
|
: other: Preair
|
||||||
|
|
||||||
|
? Screener
|
||||||
|
: other: Screener
|
||||||
|
|
||||||
|
? Remux
|
||||||
|
: other: Remux
|
||||||
|
|
||||||
|
? 3D.2019
|
||||||
|
: other: 3D
|
||||||
|
|
||||||
|
? HD
|
||||||
|
: other: HD
|
||||||
|
|
||||||
|
? FHD
|
||||||
|
? FullHD
|
||||||
|
? Full HD
|
||||||
|
: other: Full HD
|
||||||
|
|
||||||
|
? UHD
|
||||||
|
? Ultra
|
||||||
|
? UltraHD
|
||||||
|
? Ultra HD
|
||||||
|
: other: Ultra HD
|
||||||
|
|
||||||
|
? mHD # ??
|
||||||
|
? HDLight
|
||||||
|
: other: Micro HD
|
||||||
|
|
||||||
|
? HQ
|
||||||
|
: other: High Quality
|
||||||
|
|
||||||
|
? hr
|
||||||
|
: other: High Resolution
|
||||||
|
|
||||||
|
? PAL
|
||||||
|
: other: PAL
|
||||||
|
|
||||||
|
? SECAM
|
||||||
|
: other: SECAM
|
||||||
|
|
||||||
|
? NTSC
|
||||||
|
: other: NTSC
|
||||||
|
|
||||||
|
? LDTV
|
||||||
|
: other: Low Definition
|
||||||
|
|
||||||
|
? LD
|
||||||
|
: other: Line Dubbed
|
||||||
|
|
||||||
|
? MD
|
||||||
|
: other: Mic Dubbed
|
||||||
|
|
||||||
|
? -The complete movie
|
||||||
|
: other: Complete
|
||||||
|
|
||||||
|
? +The complete movie
|
||||||
|
: title: The complete movie
|
||||||
|
|
||||||
|
? +AC3-HQ
|
||||||
|
: audio_profile: High Quality
|
||||||
|
|
||||||
|
? Other-HQ
|
||||||
|
: other: High Quality
|
||||||
|
|
||||||
|
? reenc
|
||||||
|
? re-enc
|
||||||
|
? re-encoded
|
||||||
|
? reencoded
|
||||||
|
: other: Reencoded
|
||||||
|
|
||||||
|
? CONVERT XViD
|
||||||
|
: other: Converted
|
||||||
|
|
||||||
|
? +HDRIP # it's a Rip from non specified HD source
|
||||||
|
: other: [HD, Rip]
|
||||||
|
|
||||||
|
? SDR
|
||||||
|
: other: Standard Dynamic Range
|
||||||
|
|
||||||
|
? HDR
|
||||||
|
? HDR10
|
||||||
|
? -HDR100
|
||||||
|
: other: HDR10
|
||||||
|
|
||||||
|
? BT2020
|
||||||
|
? BT.2020
|
||||||
|
? -BT.20200
|
||||||
|
? -BT.2021
|
||||||
|
: other: BT.2020
|
||||||
|
|
||||||
|
? Upscaled
|
||||||
|
? Upscale
|
||||||
|
: other: Upscaled
|
||||||
|
|
||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user