rimesso guessit con le modifiche per gli import
This commit is contained in:
@@ -7,6 +7,8 @@ import os
|
||||
import re
|
||||
import sys
|
||||
|
||||
from lib.guessit import guessit
|
||||
|
||||
PY3 = False
|
||||
if sys.version_info[0] >= 3: PY3 = True; unicode = str; unichr = chr; long = int
|
||||
if PY3:
|
||||
@@ -271,14 +273,13 @@ def scrapeBlock(item, args, block, patron, headers, action, pagination, debug, t
|
||||
longtitle = title + (s if title and title2 else '') + title2 + '\n'
|
||||
|
||||
if sceneTitle:
|
||||
import lib.PTN.parse as parse
|
||||
parsedTitle = parse(title)
|
||||
parsedTitle = guessit(title)
|
||||
title = longtitle = parsedTitle.get('title', '')
|
||||
log('TITOLO',title)
|
||||
if parsedTitle.get('quality'):
|
||||
quality = str(parsedTitle.get('quality'))
|
||||
if parsedTitle.get('resolution'):
|
||||
quality += ' ' + str(parsedTitle.get('resolution', ''))
|
||||
if parsedTitle.get('source'):
|
||||
quality = str(parsedTitle.get('source'))
|
||||
if parsedTitle.get('screen_size'):
|
||||
quality += ' ' + str(parsedTitle.get('screen_size', ''))
|
||||
if not scraped['year']:
|
||||
infolabels['year'] = parsedTitle.get('year', '')
|
||||
if parsedTitle.get('episode') and parsedTitle.get('season'):
|
||||
@@ -297,8 +298,8 @@ def scrapeBlock(item, args, block, patron, headers, action, pagination, debug, t
|
||||
longtitle += s + config.get_localized_string(30140) + " " +str(parsedTitle.get('season')[0]) + '-' + str(parsedTitle.get('season')[-1])
|
||||
elif parsedTitle.get('season'):
|
||||
longtitle += s + config.get_localized_string(60027) % str(parsedTitle.get('season'))
|
||||
if parsedTitle.get('episodeName'):
|
||||
longtitle += s + parsedTitle.get('episodeName')
|
||||
if parsedTitle.get('episode_title'):
|
||||
longtitle += s + parsedTitle.get('episode_title')
|
||||
|
||||
longtitle = typo(longtitle, 'bold')
|
||||
lang1, longtitle = scrapeLang(scraped, lang, longtitle)
|
||||
@@ -871,7 +872,7 @@ def match(item_url_string, **args):
|
||||
string = args.get('string', False)
|
||||
|
||||
# remove scrape arguments
|
||||
args = dict([(key, val) for key, val in args.items() if key not in ['patron', 'patronBlock', 'patronBlocks', 'debug', 'debugBlock', 'string']])
|
||||
args = dict([(key, val) for key, val in args.items() if key not in ['patron', 'patronBlock', 'patronBlocks', 'debug', 'debugBlock', 'string']])
|
||||
|
||||
# check type of item_url_string
|
||||
if string:
|
||||
|
||||
25
lib/babelfish/__init__.py
Executable file
25
lib/babelfish/__init__.py
Executable file
@@ -0,0 +1,25 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
|
||||
# Use of this source code is governed by the 3-clause BSD license
|
||||
# that can be found in the LICENSE file.
|
||||
#
|
||||
__title__ = 'babelfish'
|
||||
__version__ = '0.5.5-dev'
|
||||
__author__ = 'Antoine Bertin'
|
||||
__license__ = 'BSD'
|
||||
__copyright__ = 'Copyright 2015 the BabelFish authors'
|
||||
|
||||
import sys
|
||||
|
||||
if sys.version_info[0] >= 3:
|
||||
basestr = str
|
||||
else:
|
||||
basestr = basestring
|
||||
|
||||
from .converters import (LanguageConverter, LanguageReverseConverter, LanguageEquivalenceConverter, CountryConverter,
|
||||
CountryReverseConverter)
|
||||
from .country import country_converters, COUNTRIES, COUNTRY_MATRIX, Country
|
||||
from .exceptions import Error, LanguageConvertError, LanguageReverseError, CountryConvertError, CountryReverseError
|
||||
from .language import language_converters, LANGUAGES, LANGUAGE_MATRIX, Language
|
||||
from .script import SCRIPTS, SCRIPT_MATRIX, Script
|
||||
289
lib/babelfish/converters/__init__.py
Executable file
289
lib/babelfish/converters/__init__.py
Executable file
@@ -0,0 +1,289 @@
|
||||
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
|
||||
# Use of this source code is governed by the 3-clause BSD license
|
||||
# that can be found in the LICENSE file.
|
||||
#
|
||||
import collections
|
||||
import functools
|
||||
from importlib import import_module
|
||||
|
||||
# from pkg_resources import iter_entry_points, EntryPoint
|
||||
from ..exceptions import LanguageConvertError, LanguageReverseError
|
||||
|
||||
|
||||
# from https://github.com/kennethreitz/requests/blob/master/requests/structures.py
|
||||
class CaseInsensitiveDict(collections.MutableMapping):
|
||||
"""A case-insensitive ``dict``-like object.
|
||||
|
||||
Implements all methods and operations of
|
||||
``collections.MutableMapping`` as well as dict's ``copy``. Also
|
||||
provides ``lower_items``.
|
||||
|
||||
All keys are expected to be strings. The structure remembers the
|
||||
case of the last key to be set, and ``iter(instance)``,
|
||||
``keys()``, ``items()``, ``iterkeys()``, and ``iteritems()``
|
||||
will contain case-sensitive keys. However, querying and contains
|
||||
testing is case insensitive:
|
||||
|
||||
cid = CaseInsensitiveDict()
|
||||
cid['English'] = 'eng'
|
||||
cid['ENGLISH'] == 'eng' # True
|
||||
list(cid) == ['English'] # True
|
||||
|
||||
If the constructor, ``.update``, or equality comparison
|
||||
operations are given keys that have equal ``.lower()``s, the
|
||||
behavior is undefined.
|
||||
|
||||
"""
|
||||
def __init__(self, data=None, **kwargs):
|
||||
self._store = dict()
|
||||
if data is None:
|
||||
data = {}
|
||||
self.update(data, **kwargs)
|
||||
|
||||
def __setitem__(self, key, value):
|
||||
# Use the lowercased key for lookups, but store the actual
|
||||
# key alongside the value.
|
||||
self._store[key.lower()] = (key, value)
|
||||
|
||||
def __getitem__(self, key):
|
||||
return self._store[key.lower()][1]
|
||||
|
||||
def __delitem__(self, key):
|
||||
del self._store[key.lower()]
|
||||
|
||||
def __iter__(self):
|
||||
return (casedkey for casedkey, mappedvalue in self._store.values())
|
||||
|
||||
def __len__(self):
|
||||
return len(self._store)
|
||||
|
||||
def lower_items(self):
|
||||
"""Like iteritems(), but with all lowercase keys."""
|
||||
return (
|
||||
(lowerkey, keyval[1])
|
||||
for (lowerkey, keyval)
|
||||
in self._store.items()
|
||||
)
|
||||
|
||||
def __eq__(self, other):
|
||||
if isinstance(other, collections.Mapping):
|
||||
other = CaseInsensitiveDict(other)
|
||||
else:
|
||||
return NotImplemented
|
||||
# Compare insensitively
|
||||
return dict(self.lower_items()) == dict(other.lower_items())
|
||||
|
||||
# Copy is required
|
||||
def copy(self):
|
||||
return CaseInsensitiveDict(self._store.values())
|
||||
|
||||
def __repr__(self):
|
||||
return '%s(%r)' % (self.__class__.__name__, dict(self.items()))
|
||||
|
||||
|
||||
class LanguageConverter(object):
|
||||
"""A :class:`LanguageConverter` supports converting an alpha3 language code with an
|
||||
alpha2 country code and a script code into a custom code
|
||||
|
||||
.. attribute:: codes
|
||||
|
||||
Set of possible custom codes
|
||||
|
||||
"""
|
||||
def convert(self, alpha3, country=None, script=None):
|
||||
"""Convert an alpha3 language code with an alpha2 country code and a script code
|
||||
into a custom code
|
||||
|
||||
:param string alpha3: ISO-639-3 language code
|
||||
:param country: ISO-3166 country code, if any
|
||||
:type country: string or None
|
||||
:param script: ISO-15924 script code, if any
|
||||
:type script: string or None
|
||||
:return: the corresponding custom code
|
||||
:rtype: string
|
||||
:raise: :class:`~babelfish.exceptions.LanguageConvertError`
|
||||
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
class LanguageReverseConverter(LanguageConverter):
|
||||
"""A :class:`LanguageConverter` able to reverse a custom code into a alpha3
|
||||
ISO-639-3 language code, alpha2 ISO-3166-1 country code and ISO-15924 script code
|
||||
|
||||
"""
|
||||
def reverse(self, code):
|
||||
"""Reverse a custom code into alpha3, country and script code
|
||||
|
||||
:param string code: custom code to reverse
|
||||
:return: the corresponding alpha3 ISO-639-3 language code, alpha2 ISO-3166-1 country code and ISO-15924 script code
|
||||
:rtype: tuple
|
||||
:raise: :class:`~babelfish.exceptions.LanguageReverseError`
|
||||
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
class LanguageEquivalenceConverter(LanguageReverseConverter):
|
||||
"""A :class:`LanguageEquivalenceConverter` is a utility class that allows you to easily define a
|
||||
:class:`LanguageReverseConverter` by only specifying the dict from alpha3 to their corresponding symbols.
|
||||
|
||||
You must specify the dict of equivalence as a class variable named SYMBOLS.
|
||||
|
||||
If you also set the class variable CASE_SENSITIVE to ``True`` then the reverse conversion function will be
|
||||
case-sensitive (it is case-insensitive by default).
|
||||
|
||||
Example::
|
||||
|
||||
class MyCodeConverter(babelfish.LanguageEquivalenceConverter):
|
||||
CASE_SENSITIVE = True
|
||||
SYMBOLS = {'fra': 'mycode1', 'eng': 'mycode2'}
|
||||
|
||||
"""
|
||||
CASE_SENSITIVE = False
|
||||
|
||||
def __init__(self):
|
||||
self.codes = set()
|
||||
self.to_symbol = {}
|
||||
if self.CASE_SENSITIVE:
|
||||
self.from_symbol = {}
|
||||
else:
|
||||
self.from_symbol = CaseInsensitiveDict()
|
||||
|
||||
for alpha3, symbol in self.SYMBOLS.items():
|
||||
self.to_symbol[alpha3] = symbol
|
||||
self.from_symbol[symbol] = (alpha3, None, None)
|
||||
self.codes.add(symbol)
|
||||
|
||||
def convert(self, alpha3, country=None, script=None):
|
||||
try:
|
||||
return self.to_symbol[alpha3]
|
||||
except KeyError:
|
||||
raise LanguageConvertError(alpha3, country, script)
|
||||
|
||||
def reverse(self, code):
|
||||
try:
|
||||
return self.from_symbol[code]
|
||||
except KeyError:
|
||||
raise LanguageReverseError(code)
|
||||
|
||||
|
||||
class CountryConverter(object):
|
||||
"""A :class:`CountryConverter` supports converting an alpha2 country code
|
||||
into a custom code
|
||||
|
||||
.. attribute:: codes
|
||||
|
||||
Set of possible custom codes
|
||||
|
||||
"""
|
||||
def convert(self, alpha2):
|
||||
"""Convert an alpha2 country code into a custom code
|
||||
|
||||
:param string alpha2: ISO-3166-1 language code
|
||||
:return: the corresponding custom code
|
||||
:rtype: string
|
||||
:raise: :class:`~babelfish.exceptions.CountryConvertError`
|
||||
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
class CountryReverseConverter(CountryConverter):
|
||||
"""A :class:`CountryConverter` able to reverse a custom code into a alpha2
|
||||
ISO-3166-1 country code
|
||||
|
||||
"""
|
||||
def reverse(self, code):
|
||||
"""Reverse a custom code into alpha2 code
|
||||
|
||||
:param string code: custom code to reverse
|
||||
:return: the corresponding alpha2 ISO-3166-1 country code
|
||||
:rtype: string
|
||||
:raise: :class:`~babelfish.exceptions.CountryReverseError`
|
||||
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
class ConverterManager(object):
|
||||
"""Manager for babelfish converters behaving like a dict with lazy loading
|
||||
|
||||
Loading is done in this order:
|
||||
|
||||
* Entry point converters
|
||||
* Registered converters
|
||||
* Internal converters
|
||||
|
||||
.. attribute:: entry_point
|
||||
|
||||
The entry point where to look for converters
|
||||
|
||||
.. attribute:: internal_converters
|
||||
|
||||
Internal converters with entry point syntax
|
||||
|
||||
"""
|
||||
entry_point = ''
|
||||
internal_converters = []
|
||||
|
||||
def __init__(self):
|
||||
#: Registered converters with entry point syntax
|
||||
self.registered_converters = []
|
||||
|
||||
#: Loaded converters
|
||||
self.converters = {}
|
||||
|
||||
def __getitem__(self, name):
|
||||
"""Get a converter, lazy loading it if necessary"""
|
||||
if name in self.converters:
|
||||
return self.converters[name]
|
||||
# for ep in iter_entry_points(self.entry_point):
|
||||
# if ep.name == name:
|
||||
# self.converters[ep.name] = ep.load()()
|
||||
# return self.converters[ep.name]
|
||||
def parse(str):
|
||||
import re
|
||||
match = re.match('(?P<name>\w+) = (?P<module>[a-z0-9.]+):(?P<class>\w+)', str)
|
||||
print match.groupdict()
|
||||
return match.groupdict()
|
||||
for ep in (parse(c) for c in self.registered_converters + self.internal_converters):
|
||||
if ep.get('name') == name:
|
||||
cl = getattr(import_module(ep.get('module')), ep.get('class'))
|
||||
self.converters[ep.get('name')] = cl()
|
||||
return self.converters[ep.get('name')]
|
||||
raise KeyError(name)
|
||||
|
||||
def __setitem__(self, name, converter):
|
||||
"""Load a converter"""
|
||||
self.converters[name] = converter
|
||||
|
||||
def __delitem__(self, name):
|
||||
"""Unload a converter"""
|
||||
del self.converters[name]
|
||||
|
||||
def __iter__(self):
|
||||
"""Iterator over loaded converters"""
|
||||
return iter(self.converters)
|
||||
|
||||
def register(self, entry_point):
|
||||
"""Register a converter
|
||||
|
||||
:param string entry_point: converter to register (entry point syntax)
|
||||
:raise: ValueError if already registered
|
||||
|
||||
"""
|
||||
if entry_point in self.registered_converters:
|
||||
raise ValueError('Already registered')
|
||||
self.registered_converters.insert(0, entry_point)
|
||||
|
||||
def unregister(self, entry_point):
|
||||
"""Unregister a converter
|
||||
|
||||
:param string entry_point: converter to unregister (entry point syntax)
|
||||
|
||||
"""
|
||||
self.registered_converters.remove(entry_point)
|
||||
|
||||
def __contains__(self, name):
|
||||
return name in self.converters
|
||||
17
lib/babelfish/converters/alpha2.py
Executable file
17
lib/babelfish/converters/alpha2.py
Executable file
@@ -0,0 +1,17 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
|
||||
# Use of this source code is governed by the 3-clause BSD license
|
||||
# that can be found in the LICENSE file.
|
||||
#
|
||||
from __future__ import unicode_literals
|
||||
from . import LanguageEquivalenceConverter
|
||||
from ..language import LANGUAGE_MATRIX
|
||||
|
||||
|
||||
class Alpha2Converter(LanguageEquivalenceConverter):
|
||||
CASE_SENSITIVE = True
|
||||
SYMBOLS = {}
|
||||
for iso_language in LANGUAGE_MATRIX:
|
||||
if iso_language.alpha2:
|
||||
SYMBOLS[iso_language.alpha3] = iso_language.alpha2
|
||||
17
lib/babelfish/converters/alpha3b.py
Executable file
17
lib/babelfish/converters/alpha3b.py
Executable file
@@ -0,0 +1,17 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
|
||||
# Use of this source code is governed by the 3-clause BSD license
|
||||
# that can be found in the LICENSE file.
|
||||
#
|
||||
from __future__ import unicode_literals
|
||||
from . import LanguageEquivalenceConverter
|
||||
from ..language import LANGUAGE_MATRIX
|
||||
|
||||
|
||||
class Alpha3BConverter(LanguageEquivalenceConverter):
|
||||
CASE_SENSITIVE = True
|
||||
SYMBOLS = {}
|
||||
for iso_language in LANGUAGE_MATRIX:
|
||||
if iso_language.alpha3b:
|
||||
SYMBOLS[iso_language.alpha3] = iso_language.alpha3b
|
||||
17
lib/babelfish/converters/alpha3t.py
Executable file
17
lib/babelfish/converters/alpha3t.py
Executable file
@@ -0,0 +1,17 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
|
||||
# Use of this source code is governed by the 3-clause BSD license
|
||||
# that can be found in the LICENSE file.
|
||||
#
|
||||
from __future__ import unicode_literals
|
||||
from . import LanguageEquivalenceConverter
|
||||
from ..language import LANGUAGE_MATRIX
|
||||
|
||||
|
||||
class Alpha3TConverter(LanguageEquivalenceConverter):
|
||||
CASE_SENSITIVE = True
|
||||
SYMBOLS = {}
|
||||
for iso_language in LANGUAGE_MATRIX:
|
||||
if iso_language.alpha3t:
|
||||
SYMBOLS[iso_language.alpha3] = iso_language.alpha3t
|
||||
31
lib/babelfish/converters/countryname.py
Executable file
31
lib/babelfish/converters/countryname.py
Executable file
@@ -0,0 +1,31 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
|
||||
# Use of this source code is governed by the 3-clause BSD license
|
||||
# that can be found in the LICENSE file.
|
||||
#
|
||||
from __future__ import unicode_literals
|
||||
from . import CountryReverseConverter, CaseInsensitiveDict
|
||||
from ..country import COUNTRY_MATRIX
|
||||
from ..exceptions import CountryConvertError, CountryReverseError
|
||||
|
||||
|
||||
class CountryNameConverter(CountryReverseConverter):
|
||||
def __init__(self):
|
||||
self.codes = set()
|
||||
self.to_name = {}
|
||||
self.from_name = CaseInsensitiveDict()
|
||||
for country in COUNTRY_MATRIX:
|
||||
self.codes.add(country.name)
|
||||
self.to_name[country.alpha2] = country.name
|
||||
self.from_name[country.name] = country.alpha2
|
||||
|
||||
def convert(self, alpha2):
|
||||
if alpha2 not in self.to_name:
|
||||
raise CountryConvertError(alpha2)
|
||||
return self.to_name[alpha2]
|
||||
|
||||
def reverse(self, name):
|
||||
if name not in self.from_name:
|
||||
raise CountryReverseError(name)
|
||||
return self.from_name[name]
|
||||
17
lib/babelfish/converters/name.py
Executable file
17
lib/babelfish/converters/name.py
Executable file
@@ -0,0 +1,17 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
|
||||
# Use of this source code is governed by the 3-clause BSD license
|
||||
# that can be found in the LICENSE file.
|
||||
#
|
||||
from __future__ import unicode_literals
|
||||
from . import LanguageEquivalenceConverter
|
||||
from ..language import LANGUAGE_MATRIX
|
||||
|
||||
|
||||
class NameConverter(LanguageEquivalenceConverter):
|
||||
CASE_SENSITIVE = False
|
||||
SYMBOLS = {}
|
||||
for iso_language in LANGUAGE_MATRIX:
|
||||
if iso_language.name:
|
||||
SYMBOLS[iso_language.alpha3] = iso_language.name
|
||||
36
lib/babelfish/converters/opensubtitles.py
Executable file
36
lib/babelfish/converters/opensubtitles.py
Executable file
@@ -0,0 +1,36 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
|
||||
# Use of this source code is governed by the 3-clause BSD license
|
||||
# that can be found in the LICENSE file.
|
||||
#
|
||||
from __future__ import unicode_literals
|
||||
from . import LanguageReverseConverter, CaseInsensitiveDict
|
||||
from ..exceptions import LanguageReverseError
|
||||
from ..language import language_converters
|
||||
|
||||
|
||||
class OpenSubtitlesConverter(LanguageReverseConverter):
|
||||
def __init__(self):
|
||||
self.alpha3b_converter = language_converters['alpha3b']
|
||||
self.alpha2_converter = language_converters['alpha2']
|
||||
self.to_opensubtitles = {('por', 'BR'): 'pob', ('gre', None): 'ell', ('srp', None): 'scc', ('srp', 'ME'): 'mne'}
|
||||
self.from_opensubtitles = CaseInsensitiveDict({'pob': ('por', 'BR'), 'pb': ('por', 'BR'), 'ell': ('ell', None),
|
||||
'scc': ('srp', None), 'mne': ('srp', 'ME')})
|
||||
self.codes = (self.alpha2_converter.codes | self.alpha3b_converter.codes | set(['pob', 'pb', 'scc', 'mne']))
|
||||
|
||||
def convert(self, alpha3, country=None, script=None):
|
||||
alpha3b = self.alpha3b_converter.convert(alpha3, country, script)
|
||||
if (alpha3b, country) in self.to_opensubtitles:
|
||||
return self.to_opensubtitles[(alpha3b, country)]
|
||||
return alpha3b
|
||||
|
||||
def reverse(self, opensubtitles):
|
||||
if opensubtitles in self.from_opensubtitles:
|
||||
return self.from_opensubtitles[opensubtitles]
|
||||
for conv in [self.alpha3b_converter, self.alpha2_converter]:
|
||||
try:
|
||||
return conv.reverse(opensubtitles)
|
||||
except LanguageReverseError:
|
||||
pass
|
||||
raise LanguageReverseError(opensubtitles)
|
||||
23
lib/babelfish/converters/scope.py
Executable file
23
lib/babelfish/converters/scope.py
Executable file
@@ -0,0 +1,23 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
|
||||
# Use of this source code is governed by the 3-clause BSD license
|
||||
# that can be found in the LICENSE file.
|
||||
#
|
||||
from __future__ import unicode_literals
|
||||
from . import LanguageConverter
|
||||
from ..exceptions import LanguageConvertError
|
||||
from ..language import LANGUAGE_MATRIX
|
||||
|
||||
|
||||
class ScopeConverter(LanguageConverter):
|
||||
FULLNAME = {'I': 'individual', 'M': 'macrolanguage', 'S': 'special'}
|
||||
SYMBOLS = {}
|
||||
for iso_language in LANGUAGE_MATRIX:
|
||||
SYMBOLS[iso_language.alpha3] = iso_language.scope
|
||||
codes = set(SYMBOLS.values())
|
||||
|
||||
def convert(self, alpha3, country=None, script=None):
|
||||
if self.SYMBOLS[alpha3] in self.FULLNAME:
|
||||
return self.FULLNAME[self.SYMBOLS[alpha3]]
|
||||
raise LanguageConvertError(alpha3, country, script)
|
||||
23
lib/babelfish/converters/type.py
Executable file
23
lib/babelfish/converters/type.py
Executable file
@@ -0,0 +1,23 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
|
||||
# Use of this source code is governed by the 3-clause BSD license
|
||||
# that can be found in the LICENSE file.
|
||||
#
|
||||
from __future__ import unicode_literals
|
||||
from . import LanguageConverter
|
||||
from ..exceptions import LanguageConvertError
|
||||
from ..language import LANGUAGE_MATRIX
|
||||
|
||||
|
||||
class LanguageTypeConverter(LanguageConverter):
|
||||
FULLNAME = {'A': 'ancient', 'C': 'constructed', 'E': 'extinct', 'H': 'historical', 'L': 'living', 'S': 'special'}
|
||||
SYMBOLS = {}
|
||||
for iso_language in LANGUAGE_MATRIX:
|
||||
SYMBOLS[iso_language.alpha3] = iso_language.type
|
||||
codes = set(SYMBOLS.values())
|
||||
|
||||
def convert(self, alpha3, country=None, script=None):
|
||||
if self.SYMBOLS[alpha3] in self.FULLNAME:
|
||||
return self.FULLNAME[self.SYMBOLS[alpha3]]
|
||||
raise LanguageConvertError(alpha3, country, script)
|
||||
108
lib/babelfish/country.py
Executable file
108
lib/babelfish/country.py
Executable file
@@ -0,0 +1,108 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
|
||||
# Use of this source code is governed by the 3-clause BSD license
|
||||
# that can be found in the LICENSE file.
|
||||
#
|
||||
from __future__ import unicode_literals
|
||||
from collections import namedtuple
|
||||
from functools import partial
|
||||
# from pkg_resources import resource_stream # @UnresolvedImport
|
||||
import os
|
||||
from .converters import ConverterManager
|
||||
from . import basestr
|
||||
|
||||
|
||||
COUNTRIES = {}
|
||||
COUNTRY_MATRIX = []
|
||||
|
||||
#: The namedtuple used in the :data:`COUNTRY_MATRIX`
|
||||
IsoCountry = namedtuple('IsoCountry', ['name', 'alpha2'])
|
||||
|
||||
f = open(os.path.join(os.path.dirname(__file__), 'data/iso-3166-1.txt'))
|
||||
f.readline()
|
||||
for l in f:
|
||||
iso_country = IsoCountry(*l.decode('utf-8').strip().split(';'))
|
||||
COUNTRIES[iso_country.alpha2] = iso_country.name
|
||||
COUNTRY_MATRIX.append(iso_country)
|
||||
f.close()
|
||||
|
||||
|
||||
class CountryConverterManager(ConverterManager):
|
||||
""":class:`~babelfish.converters.ConverterManager` for country converters"""
|
||||
entry_point = 'babelfish.country_converters'
|
||||
internal_converters = ['name = babelfish.converters.countryname:CountryNameConverter']
|
||||
|
||||
country_converters = CountryConverterManager()
|
||||
|
||||
|
||||
class CountryMeta(type):
|
||||
"""The :class:`Country` metaclass
|
||||
|
||||
Dynamically redirect :meth:`Country.frommycode` to :meth:`Country.fromcode` with the ``mycode`` `converter`
|
||||
|
||||
"""
|
||||
def __getattr__(cls, name):
|
||||
if name.startswith('from'):
|
||||
return partial(cls.fromcode, converter=name[4:])
|
||||
return type.__getattribute__(cls, name)
|
||||
|
||||
|
||||
class Country(CountryMeta(str('CountryBase'), (object,), {})):
|
||||
"""A country on Earth
|
||||
|
||||
A country is represented by a 2-letter code from the ISO-3166 standard
|
||||
|
||||
:param string country: 2-letter ISO-3166 country code
|
||||
|
||||
"""
|
||||
def __init__(self, country):
|
||||
if country not in COUNTRIES:
|
||||
raise ValueError('%r is not a valid country' % country)
|
||||
|
||||
#: ISO-3166 2-letter country code
|
||||
self.alpha2 = country
|
||||
|
||||
@classmethod
|
||||
def fromcode(cls, code, converter):
|
||||
"""Create a :class:`Country` by its `code` using `converter` to
|
||||
:meth:`~babelfish.converters.CountryReverseConverter.reverse` it
|
||||
|
||||
:param string code: the code to reverse
|
||||
:param string converter: name of the :class:`~babelfish.converters.CountryReverseConverter` to use
|
||||
:return: the corresponding :class:`Country` instance
|
||||
:rtype: :class:`Country`
|
||||
|
||||
"""
|
||||
return cls(country_converters[converter].reverse(code))
|
||||
|
||||
def __getstate__(self):
|
||||
return self.alpha2
|
||||
|
||||
def __setstate__(self, state):
|
||||
self.alpha2 = state
|
||||
|
||||
def __getattr__(self, name):
|
||||
try:
|
||||
return country_converters[name].convert(self.alpha2)
|
||||
except KeyError:
|
||||
raise AttributeError(name)
|
||||
|
||||
def __hash__(self):
|
||||
return hash(self.alpha2)
|
||||
|
||||
def __eq__(self, other):
|
||||
if isinstance(other, basestr):
|
||||
return str(self) == other
|
||||
if not isinstance(other, Country):
|
||||
return False
|
||||
return self.alpha2 == other.alpha2
|
||||
|
||||
def __ne__(self, other):
|
||||
return not self == other
|
||||
|
||||
def __repr__(self):
|
||||
return '<Country [%s]>' % self
|
||||
|
||||
def __str__(self):
|
||||
return self.alpha2
|
||||
250
lib/babelfish/data/iso-3166-1.txt
Executable file
250
lib/babelfish/data/iso-3166-1.txt
Executable file
@@ -0,0 +1,250 @@
|
||||
Country Name;ISO 3166-1-alpha-2 code
|
||||
AFGHANISTAN;AF
|
||||
ÅLAND ISLANDS;AX
|
||||
ALBANIA;AL
|
||||
ALGERIA;DZ
|
||||
AMERICAN SAMOA;AS
|
||||
ANDORRA;AD
|
||||
ANGOLA;AO
|
||||
ANGUILLA;AI
|
||||
ANTARCTICA;AQ
|
||||
ANTIGUA AND BARBUDA;AG
|
||||
ARGENTINA;AR
|
||||
ARMENIA;AM
|
||||
ARUBA;AW
|
||||
AUSTRALIA;AU
|
||||
AUSTRIA;AT
|
||||
AZERBAIJAN;AZ
|
||||
BAHAMAS;BS
|
||||
BAHRAIN;BH
|
||||
BANGLADESH;BD
|
||||
BARBADOS;BB
|
||||
BELARUS;BY
|
||||
BELGIUM;BE
|
||||
BELIZE;BZ
|
||||
BENIN;BJ
|
||||
BERMUDA;BM
|
||||
BHUTAN;BT
|
||||
BOLIVIA, PLURINATIONAL STATE OF;BO
|
||||
BONAIRE, SINT EUSTATIUS AND SABA;BQ
|
||||
BOSNIA AND HERZEGOVINA;BA
|
||||
BOTSWANA;BW
|
||||
BOUVET ISLAND;BV
|
||||
BRAZIL;BR
|
||||
BRITISH INDIAN OCEAN TERRITORY;IO
|
||||
BRUNEI DARUSSALAM;BN
|
||||
BULGARIA;BG
|
||||
BURKINA FASO;BF
|
||||
BURUNDI;BI
|
||||
CAMBODIA;KH
|
||||
CAMEROON;CM
|
||||
CANADA;CA
|
||||
CAPE VERDE;CV
|
||||
CAYMAN ISLANDS;KY
|
||||
CENTRAL AFRICAN REPUBLIC;CF
|
||||
CHAD;TD
|
||||
CHILE;CL
|
||||
CHINA;CN
|
||||
CHRISTMAS ISLAND;CX
|
||||
COCOS (KEELING) ISLANDS;CC
|
||||
COLOMBIA;CO
|
||||
COMOROS;KM
|
||||
CONGO;CG
|
||||
CONGO, THE DEMOCRATIC REPUBLIC OF THE;CD
|
||||
COOK ISLANDS;CK
|
||||
COSTA RICA;CR
|
||||
CÔTE D'IVOIRE;CI
|
||||
CROATIA;HR
|
||||
CUBA;CU
|
||||
CURAÇAO;CW
|
||||
CYPRUS;CY
|
||||
CZECH REPUBLIC;CZ
|
||||
DENMARK;DK
|
||||
DJIBOUTI;DJ
|
||||
DOMINICA;DM
|
||||
DOMINICAN REPUBLIC;DO
|
||||
ECUADOR;EC
|
||||
EGYPT;EG
|
||||
EL SALVADOR;SV
|
||||
EQUATORIAL GUINEA;GQ
|
||||
ERITREA;ER
|
||||
ESTONIA;EE
|
||||
ETHIOPIA;ET
|
||||
FALKLAND ISLANDS (MALVINAS);FK
|
||||
FAROE ISLANDS;FO
|
||||
FIJI;FJ
|
||||
FINLAND;FI
|
||||
FRANCE;FR
|
||||
FRENCH GUIANA;GF
|
||||
FRENCH POLYNESIA;PF
|
||||
FRENCH SOUTHERN TERRITORIES;TF
|
||||
GABON;GA
|
||||
GAMBIA;GM
|
||||
GEORGIA;GE
|
||||
GERMANY;DE
|
||||
GHANA;GH
|
||||
GIBRALTAR;GI
|
||||
GREECE;GR
|
||||
GREENLAND;GL
|
||||
GRENADA;GD
|
||||
GUADELOUPE;GP
|
||||
GUAM;GU
|
||||
GUATEMALA;GT
|
||||
GUERNSEY;GG
|
||||
GUINEA;GN
|
||||
GUINEA-BISSAU;GW
|
||||
GUYANA;GY
|
||||
HAITI;HT
|
||||
HEARD ISLAND AND MCDONALD ISLANDS;HM
|
||||
HOLY SEE (VATICAN CITY STATE);VA
|
||||
HONDURAS;HN
|
||||
HONG KONG;HK
|
||||
HUNGARY;HU
|
||||
ICELAND;IS
|
||||
INDIA;IN
|
||||
INDONESIA;ID
|
||||
IRAN, ISLAMIC REPUBLIC OF;IR
|
||||
IRAQ;IQ
|
||||
IRELAND;IE
|
||||
ISLE OF MAN;IM
|
||||
ISRAEL;IL
|
||||
ITALY;IT
|
||||
JAMAICA;JM
|
||||
JAPAN;JP
|
||||
JERSEY;JE
|
||||
JORDAN;JO
|
||||
KAZAKHSTAN;KZ
|
||||
KENYA;KE
|
||||
KIRIBATI;KI
|
||||
KOREA, DEMOCRATIC PEOPLE'S REPUBLIC OF;KP
|
||||
KOREA, REPUBLIC OF;KR
|
||||
KUWAIT;KW
|
||||
KYRGYZSTAN;KG
|
||||
LAO PEOPLE'S DEMOCRATIC REPUBLIC;LA
|
||||
LATVIA;LV
|
||||
LEBANON;LB
|
||||
LESOTHO;LS
|
||||
LIBERIA;LR
|
||||
LIBYA;LY
|
||||
LIECHTENSTEIN;LI
|
||||
LITHUANIA;LT
|
||||
LUXEMBOURG;LU
|
||||
MACAO;MO
|
||||
MACEDONIA, THE FORMER YUGOSLAV REPUBLIC OF;MK
|
||||
MADAGASCAR;MG
|
||||
MALAWI;MW
|
||||
MALAYSIA;MY
|
||||
MALDIVES;MV
|
||||
MALI;ML
|
||||
MALTA;MT
|
||||
MARSHALL ISLANDS;MH
|
||||
MARTINIQUE;MQ
|
||||
MAURITANIA;MR
|
||||
MAURITIUS;MU
|
||||
MAYOTTE;YT
|
||||
MEXICO;MX
|
||||
MICRONESIA, FEDERATED STATES OF;FM
|
||||
MOLDOVA, REPUBLIC OF;MD
|
||||
MONACO;MC
|
||||
MONGOLIA;MN
|
||||
MONTENEGRO;ME
|
||||
MONTSERRAT;MS
|
||||
MOROCCO;MA
|
||||
MOZAMBIQUE;MZ
|
||||
MYANMAR;MM
|
||||
NAMIBIA;NA
|
||||
NAURU;NR
|
||||
NEPAL;NP
|
||||
NETHERLANDS;NL
|
||||
NEW CALEDONIA;NC
|
||||
NEW ZEALAND;NZ
|
||||
NICARAGUA;NI
|
||||
NIGER;NE
|
||||
NIGERIA;NG
|
||||
NIUE;NU
|
||||
NORFOLK ISLAND;NF
|
||||
NORTHERN MARIANA ISLANDS;MP
|
||||
NORWAY;NO
|
||||
OMAN;OM
|
||||
PAKISTAN;PK
|
||||
PALAU;PW
|
||||
PALESTINE, STATE OF;PS
|
||||
PANAMA;PA
|
||||
PAPUA NEW GUINEA;PG
|
||||
PARAGUAY;PY
|
||||
PERU;PE
|
||||
PHILIPPINES;PH
|
||||
PITCAIRN;PN
|
||||
POLAND;PL
|
||||
PORTUGAL;PT
|
||||
PUERTO RICO;PR
|
||||
QATAR;QA
|
||||
RÉUNION;RE
|
||||
ROMANIA;RO
|
||||
RUSSIAN FEDERATION;RU
|
||||
RWANDA;RW
|
||||
SAINT BARTHÉLEMY;BL
|
||||
SAINT HELENA, ASCENSION AND TRISTAN DA CUNHA;SH
|
||||
SAINT KITTS AND NEVIS;KN
|
||||
SAINT LUCIA;LC
|
||||
SAINT MARTIN (FRENCH PART);MF
|
||||
SAINT PIERRE AND MIQUELON;PM
|
||||
SAINT VINCENT AND THE GRENADINES;VC
|
||||
SAMOA;WS
|
||||
SAN MARINO;SM
|
||||
SAO TOME AND PRINCIPE;ST
|
||||
SAUDI ARABIA;SA
|
||||
SENEGAL;SN
|
||||
SERBIA;RS
|
||||
SEYCHELLES;SC
|
||||
SIERRA LEONE;SL
|
||||
SINGAPORE;SG
|
||||
SINT MAARTEN (DUTCH PART);SX
|
||||
SLOVAKIA;SK
|
||||
SLOVENIA;SI
|
||||
SOLOMON ISLANDS;SB
|
||||
SOMALIA;SO
|
||||
SOUTH AFRICA;ZA
|
||||
SOUTH GEORGIA AND THE SOUTH SANDWICH ISLANDS;GS
|
||||
SOUTH SUDAN;SS
|
||||
SPAIN;ES
|
||||
SRI LANKA;LK
|
||||
SUDAN;SD
|
||||
SURINAME;SR
|
||||
SVALBARD AND JAN MAYEN;SJ
|
||||
SWAZILAND;SZ
|
||||
SWEDEN;SE
|
||||
SWITZERLAND;CH
|
||||
SYRIAN ARAB REPUBLIC;SY
|
||||
TAIWAN, PROVINCE OF CHINA;TW
|
||||
TAJIKISTAN;TJ
|
||||
TANZANIA, UNITED REPUBLIC OF;TZ
|
||||
THAILAND;TH
|
||||
TIMOR-LESTE;TL
|
||||
TOGO;TG
|
||||
TOKELAU;TK
|
||||
TONGA;TO
|
||||
TRINIDAD AND TOBAGO;TT
|
||||
TUNISIA;TN
|
||||
TURKEY;TR
|
||||
TURKMENISTAN;TM
|
||||
TURKS AND CAICOS ISLANDS;TC
|
||||
TUVALU;TV
|
||||
UGANDA;UG
|
||||
UKRAINE;UA
|
||||
UNITED ARAB EMIRATES;AE
|
||||
UNITED KINGDOM;GB
|
||||
UNITED STATES;US
|
||||
UNITED STATES MINOR OUTLYING ISLANDS;UM
|
||||
URUGUAY;UY
|
||||
UZBEKISTAN;UZ
|
||||
VANUATU;VU
|
||||
VENEZUELA, BOLIVARIAN REPUBLIC OF;VE
|
||||
VIET NAM;VN
|
||||
VIRGIN ISLANDS, BRITISH;VG
|
||||
VIRGIN ISLANDS, U.S.;VI
|
||||
WALLIS AND FUTUNA;WF
|
||||
WESTERN SAHARA;EH
|
||||
YEMEN;YE
|
||||
ZAMBIA;ZM
|
||||
ZIMBABWE;ZW
|
||||
7875
lib/babelfish/data/iso-639-3.tab
Executable file
7875
lib/babelfish/data/iso-639-3.tab
Executable file
File diff suppressed because it is too large
Load Diff
176
lib/babelfish/data/iso15924-utf8-20131012.txt
Executable file
176
lib/babelfish/data/iso15924-utf8-20131012.txt
Executable file
@@ -0,0 +1,176 @@
|
||||
#
|
||||
# ISO 15924 - Codes for the representation of names of scripts
|
||||
# Codes pour la représentation des noms d’écritures
|
||||
# Format:
|
||||
# Code;N°;English Name;Nom français;PVA;Date
|
||||
#
|
||||
|
||||
Afak;439;Afaka;afaka;;2010-12-21
|
||||
Aghb;239;Caucasian Albanian;aghbanien;;2012-10-16
|
||||
Ahom;338;Ahom, Tai Ahom;âhom;;2012-11-01
|
||||
Arab;160;Arabic;arabe;Arabic;2004-05-01
|
||||
Armi;124;Imperial Aramaic;araméen impérial;Imperial_Aramaic;2009-06-01
|
||||
Armn;230;Armenian;arménien;Armenian;2004-05-01
|
||||
Avst;134;Avestan;avestique;Avestan;2009-06-01
|
||||
Bali;360;Balinese;balinais;Balinese;2006-10-10
|
||||
Bamu;435;Bamum;bamoum;Bamum;2009-06-01
|
||||
Bass;259;Bassa Vah;bassa;;2010-03-26
|
||||
Batk;365;Batak;batik;Batak;2010-07-23
|
||||
Beng;325;Bengali;bengalî;Bengali;2004-05-01
|
||||
Blis;550;Blissymbols;symboles Bliss;;2004-05-01
|
||||
Bopo;285;Bopomofo;bopomofo;Bopomofo;2004-05-01
|
||||
Brah;300;Brahmi;brahma;Brahmi;2010-07-23
|
||||
Brai;570;Braille;braille;Braille;2004-05-01
|
||||
Bugi;367;Buginese;bouguis;Buginese;2006-06-21
|
||||
Buhd;372;Buhid;bouhide;Buhid;2004-05-01
|
||||
Cakm;349;Chakma;chakma;Chakma;2012-02-06
|
||||
Cans;440;Unified Canadian Aboriginal Syllabics;syllabaire autochtone canadien unifié;Canadian_Aboriginal;2004-05-29
|
||||
Cari;201;Carian;carien;Carian;2007-07-02
|
||||
Cham;358;Cham;cham (čam, tcham);Cham;2009-11-11
|
||||
Cher;445;Cherokee;tchérokî;Cherokee;2004-05-01
|
||||
Cirt;291;Cirth;cirth;;2004-05-01
|
||||
Copt;204;Coptic;copte;Coptic;2006-06-21
|
||||
Cprt;403;Cypriot;syllabaire chypriote;Cypriot;2004-05-01
|
||||
Cyrl;220;Cyrillic;cyrillique;Cyrillic;2004-05-01
|
||||
Cyrs;221;Cyrillic (Old Church Slavonic variant);cyrillique (variante slavonne);;2004-05-01
|
||||
Deva;315;Devanagari (Nagari);dévanâgarî;Devanagari;2004-05-01
|
||||
Dsrt;250;Deseret (Mormon);déseret (mormon);Deseret;2004-05-01
|
||||
Dupl;755;Duployan shorthand, Duployan stenography;sténographie Duployé;;2010-07-18
|
||||
Egyd;070;Egyptian demotic;démotique égyptien;;2004-05-01
|
||||
Egyh;060;Egyptian hieratic;hiératique égyptien;;2004-05-01
|
||||
Egyp;050;Egyptian hieroglyphs;hiéroglyphes égyptiens;Egyptian_Hieroglyphs;2009-06-01
|
||||
Elba;226;Elbasan;elbasan;;2010-07-18
|
||||
Ethi;430;Ethiopic (Geʻez);éthiopien (geʻez, guèze);Ethiopic;2004-10-25
|
||||
Geor;240;Georgian (Mkhedruli);géorgien (mkhédrouli);Georgian;2004-05-29
|
||||
Geok;241;Khutsuri (Asomtavruli and Nuskhuri);khoutsouri (assomtavrouli et nouskhouri);Georgian;2012-10-16
|
||||
Glag;225;Glagolitic;glagolitique;Glagolitic;2006-06-21
|
||||
Goth;206;Gothic;gotique;Gothic;2004-05-01
|
||||
Gran;343;Grantha;grantha;;2009-11-11
|
||||
Grek;200;Greek;grec;Greek;2004-05-01
|
||||
Gujr;320;Gujarati;goudjarâtî (gujrâtî);Gujarati;2004-05-01
|
||||
Guru;310;Gurmukhi;gourmoukhî;Gurmukhi;2004-05-01
|
||||
Hang;286;Hangul (Hangŭl, Hangeul);hangûl (hangŭl, hangeul);Hangul;2004-05-29
|
||||
Hani;500;Han (Hanzi, Kanji, Hanja);idéogrammes han (sinogrammes);Han;2009-02-23
|
||||
Hano;371;Hanunoo (Hanunóo);hanounóo;Hanunoo;2004-05-29
|
||||
Hans;501;Han (Simplified variant);idéogrammes han (variante simplifiée);;2004-05-29
|
||||
Hant;502;Han (Traditional variant);idéogrammes han (variante traditionnelle);;2004-05-29
|
||||
Hatr;127;Hatran;hatrénien;;2012-11-01
|
||||
Hebr;125;Hebrew;hébreu;Hebrew;2004-05-01
|
||||
Hira;410;Hiragana;hiragana;Hiragana;2004-05-01
|
||||
Hluw;080;Anatolian Hieroglyphs (Luwian Hieroglyphs, Hittite Hieroglyphs);hiéroglyphes anatoliens (hiéroglyphes louvites, hiéroglyphes hittites);;2011-12-09
|
||||
Hmng;450;Pahawh Hmong;pahawh hmong;;2004-05-01
|
||||
Hrkt;412;Japanese syllabaries (alias for Hiragana + Katakana);syllabaires japonais (alias pour hiragana + katakana);Katakana_Or_Hiragana;2011-06-21
|
||||
Hung;176;Old Hungarian (Hungarian Runic);runes hongroises (ancien hongrois);;2012-10-16
|
||||
Inds;610;Indus (Harappan);indus;;2004-05-01
|
||||
Ital;210;Old Italic (Etruscan, Oscan, etc.);ancien italique (étrusque, osque, etc.);Old_Italic;2004-05-29
|
||||
Java;361;Javanese;javanais;Javanese;2009-06-01
|
||||
Jpan;413;Japanese (alias for Han + Hiragana + Katakana);japonais (alias pour han + hiragana + katakana);;2006-06-21
|
||||
Jurc;510;Jurchen;jurchen;;2010-12-21
|
||||
Kali;357;Kayah Li;kayah li;Kayah_Li;2007-07-02
|
||||
Kana;411;Katakana;katakana;Katakana;2004-05-01
|
||||
Khar;305;Kharoshthi;kharochthî;Kharoshthi;2006-06-21
|
||||
Khmr;355;Khmer;khmer;Khmer;2004-05-29
|
||||
Khoj;322;Khojki;khojkî;;2011-06-21
|
||||
Knda;345;Kannada;kannara (canara);Kannada;2004-05-29
|
||||
Kore;287;Korean (alias for Hangul + Han);coréen (alias pour hangûl + han);;2007-06-13
|
||||
Kpel;436;Kpelle;kpèllé;;2010-03-26
|
||||
Kthi;317;Kaithi;kaithî;Kaithi;2009-06-01
|
||||
Lana;351;Tai Tham (Lanna);taï tham (lanna);Tai_Tham;2009-06-01
|
||||
Laoo;356;Lao;laotien;Lao;2004-05-01
|
||||
Latf;217;Latin (Fraktur variant);latin (variante brisée);;2004-05-01
|
||||
Latg;216;Latin (Gaelic variant);latin (variante gaélique);;2004-05-01
|
||||
Latn;215;Latin;latin;Latin;2004-05-01
|
||||
Lepc;335;Lepcha (Róng);lepcha (róng);Lepcha;2007-07-02
|
||||
Limb;336;Limbu;limbou;Limbu;2004-05-29
|
||||
Lina;400;Linear A;linéaire A;;2004-05-01
|
||||
Linb;401;Linear B;linéaire B;Linear_B;2004-05-29
|
||||
Lisu;399;Lisu (Fraser);lisu (Fraser);Lisu;2009-06-01
|
||||
Loma;437;Loma;loma;;2010-03-26
|
||||
Lyci;202;Lycian;lycien;Lycian;2007-07-02
|
||||
Lydi;116;Lydian;lydien;Lydian;2007-07-02
|
||||
Mahj;314;Mahajani;mahâjanî;;2012-10-16
|
||||
Mand;140;Mandaic, Mandaean;mandéen;Mandaic;2010-07-23
|
||||
Mani;139;Manichaean;manichéen;;2007-07-15
|
||||
Maya;090;Mayan hieroglyphs;hiéroglyphes mayas;;2004-05-01
|
||||
Mend;438;Mende Kikakui;mendé kikakui;;2013-10-12
|
||||
Merc;101;Meroitic Cursive;cursif méroïtique;Meroitic_Cursive;2012-02-06
|
||||
Mero;100;Meroitic Hieroglyphs;hiéroglyphes méroïtiques;Meroitic_Hieroglyphs;2012-02-06
|
||||
Mlym;347;Malayalam;malayâlam;Malayalam;2004-05-01
|
||||
Modi;323;Modi, Moḍī;modî;;2013-10-12
|
||||
Moon;218;Moon (Moon code, Moon script, Moon type);écriture Moon;;2006-12-11
|
||||
Mong;145;Mongolian;mongol;Mongolian;2004-05-01
|
||||
Mroo;199;Mro, Mru;mro;;2010-12-21
|
||||
Mtei;337;Meitei Mayek (Meithei, Meetei);meitei mayek;Meetei_Mayek;2009-06-01
|
||||
Mult;323; Multani;multanî;;2012-11-01
|
||||
Mymr;350;Myanmar (Burmese);birman;Myanmar;2004-05-01
|
||||
Narb;106;Old North Arabian (Ancient North Arabian);nord-arabique;;2010-03-26
|
||||
Nbat;159;Nabataean;nabatéen;;2010-03-26
|
||||
Nkgb;420;Nakhi Geba ('Na-'Khi ²Ggŏ-¹baw, Naxi Geba);nakhi géba;;2009-02-23
|
||||
Nkoo;165;N’Ko;n’ko;Nko;2006-10-10
|
||||
Nshu;499;Nüshu;nüshu;;2010-12-21
|
||||
Ogam;212;Ogham;ogam;Ogham;2004-05-01
|
||||
Olck;261;Ol Chiki (Ol Cemet’, Ol, Santali);ol tchiki;Ol_Chiki;2007-07-02
|
||||
Orkh;175;Old Turkic, Orkhon Runic;orkhon;Old_Turkic;2009-06-01
|
||||
Orya;327;Oriya;oriyâ;Oriya;2004-05-01
|
||||
Osma;260;Osmanya;osmanais;Osmanya;2004-05-01
|
||||
Palm;126;Palmyrene;palmyrénien;;2010-03-26
|
||||
Pauc;263;Pau Cin Hau;paou chin haou;;2013-10-12
|
||||
Perm;227;Old Permic;ancien permien;;2004-05-01
|
||||
Phag;331;Phags-pa;’phags pa;Phags_Pa;2006-10-10
|
||||
Phli;131;Inscriptional Pahlavi;pehlevi des inscriptions;Inscriptional_Pahlavi;2009-06-01
|
||||
Phlp;132;Psalter Pahlavi;pehlevi des psautiers;;2007-11-26
|
||||
Phlv;133;Book Pahlavi;pehlevi des livres;;2007-07-15
|
||||
Phnx;115;Phoenician;phénicien;Phoenician;2006-10-10
|
||||
Plrd;282;Miao (Pollard);miao (Pollard);Miao;2012-02-06
|
||||
Prti;130;Inscriptional Parthian;parthe des inscriptions;Inscriptional_Parthian;2009-06-01
|
||||
Qaaa;900;Reserved for private use (start);réservé à l’usage privé (début);;2004-05-29
|
||||
Qabx;949;Reserved for private use (end);réservé à l’usage privé (fin);;2004-05-29
|
||||
Rjng;363;Rejang (Redjang, Kaganga);redjang (kaganga);Rejang;2009-02-23
|
||||
Roro;620;Rongorongo;rongorongo;;2004-05-01
|
||||
Runr;211;Runic;runique;Runic;2004-05-01
|
||||
Samr;123;Samaritan;samaritain;Samaritan;2009-06-01
|
||||
Sara;292;Sarati;sarati;;2004-05-29
|
||||
Sarb;105;Old South Arabian;sud-arabique, himyarite;Old_South_Arabian;2009-06-01
|
||||
Saur;344;Saurashtra;saurachtra;Saurashtra;2007-07-02
|
||||
Sgnw;095;SignWriting;SignÉcriture, SignWriting;;2006-10-10
|
||||
Shaw;281;Shavian (Shaw);shavien (Shaw);Shavian;2004-05-01
|
||||
Shrd;319;Sharada, Śāradā;charada, shard;Sharada;2012-02-06
|
||||
Sidd;302;Siddham, Siddhaṃ, Siddhamātṛkā;siddham;;2013-10-12
|
||||
Sind;318;Khudawadi, Sindhi;khoudawadî, sindhî;;2010-12-21
|
||||
Sinh;348;Sinhala;singhalais;Sinhala;2004-05-01
|
||||
Sora;398;Sora Sompeng;sora sompeng;Sora_Sompeng;2012-02-06
|
||||
Sund;362;Sundanese;sundanais;Sundanese;2007-07-02
|
||||
Sylo;316;Syloti Nagri;sylotî nâgrî;Syloti_Nagri;2006-06-21
|
||||
Syrc;135;Syriac;syriaque;Syriac;2004-05-01
|
||||
Syre;138;Syriac (Estrangelo variant);syriaque (variante estranghélo);;2004-05-01
|
||||
Syrj;137;Syriac (Western variant);syriaque (variante occidentale);;2004-05-01
|
||||
Syrn;136;Syriac (Eastern variant);syriaque (variante orientale);;2004-05-01
|
||||
Tagb;373;Tagbanwa;tagbanoua;Tagbanwa;2004-05-01
|
||||
Takr;321;Takri, Ṭākrī, Ṭāṅkrī;tâkrî;Takri;2012-02-06
|
||||
Tale;353;Tai Le;taï-le;Tai_Le;2004-10-25
|
||||
Talu;354;New Tai Lue;nouveau taï-lue;New_Tai_Lue;2006-06-21
|
||||
Taml;346;Tamil;tamoul;Tamil;2004-05-01
|
||||
Tang;520;Tangut;tangoute;;2010-12-21
|
||||
Tavt;359;Tai Viet;taï viêt;Tai_Viet;2009-06-01
|
||||
Telu;340;Telugu;télougou;Telugu;2004-05-01
|
||||
Teng;290;Tengwar;tengwar;;2004-05-01
|
||||
Tfng;120;Tifinagh (Berber);tifinagh (berbère);Tifinagh;2006-06-21
|
||||
Tglg;370;Tagalog (Baybayin, Alibata);tagal (baybayin, alibata);Tagalog;2009-02-23
|
||||
Thaa;170;Thaana;thâna;Thaana;2004-05-01
|
||||
Thai;352;Thai;thaï;Thai;2004-05-01
|
||||
Tibt;330;Tibetan;tibétain;Tibetan;2004-05-01
|
||||
Tirh;326;Tirhuta;tirhouta;;2011-12-09
|
||||
Ugar;040;Ugaritic;ougaritique;Ugaritic;2004-05-01
|
||||
Vaii;470;Vai;vaï;Vai;2007-07-02
|
||||
Visp;280;Visible Speech;parole visible;;2004-05-01
|
||||
Wara;262;Warang Citi (Varang Kshiti);warang citi;;2009-11-11
|
||||
Wole;480;Woleai;woléaï;;2010-12-21
|
||||
Xpeo;030;Old Persian;cunéiforme persépolitain;Old_Persian;2006-06-21
|
||||
Xsux;020;Cuneiform, Sumero-Akkadian;cunéiforme suméro-akkadien;Cuneiform;2006-10-10
|
||||
Yiii;460;Yi;yi;Yi;2004-05-01
|
||||
Zinh;994;Code for inherited script;codet pour écriture héritée;Inherited;2009-02-23
|
||||
Zmth;995;Mathematical notation;notation mathématique;;2007-11-26
|
||||
Zsym;996;Symbols;symboles;;2007-11-26
|
||||
Zxxx;997;Code for unwritten documents;codet pour les documents non écrits;;2011-06-21
|
||||
Zyyy;998;Code for undetermined script;codet pour écriture indéterminée;Common;2004-05-29
|
||||
Zzzz;999;Code for uncoded script;codet pour écriture non codée;Unknown;2006-10-10
|
||||
474
lib/babelfish/data/opensubtitles_languages.txt
Executable file
474
lib/babelfish/data/opensubtitles_languages.txt
Executable file
@@ -0,0 +1,474 @@
|
||||
IdSubLanguage ISO639 LanguageName UploadEnabled WebEnabled
|
||||
aar aa Afar, afar 0 0
|
||||
abk ab Abkhazian 0 0
|
||||
ace Achinese 0 0
|
||||
ach Acoli 0 0
|
||||
ada Adangme 0 0
|
||||
ady adyghé 0 0
|
||||
afa Afro-Asiatic (Other) 0 0
|
||||
afh Afrihili 0 0
|
||||
afr af Afrikaans 1 0
|
||||
ain Ainu 0 0
|
||||
aka ak Akan 0 0
|
||||
akk Akkadian 0 0
|
||||
alb sq Albanian 1 1
|
||||
ale Aleut 0 0
|
||||
alg Algonquian languages 0 0
|
||||
alt Southern Altai 0 0
|
||||
amh am Amharic 0 0
|
||||
ang English, Old (ca.450-1100) 0 0
|
||||
apa Apache languages 0 0
|
||||
ara ar Arabic 1 1
|
||||
arc Aramaic 0 0
|
||||
arg an Aragonese 0 0
|
||||
arm hy Armenian 1 0
|
||||
arn Araucanian 0 0
|
||||
arp Arapaho 0 0
|
||||
art Artificial (Other) 0 0
|
||||
arw Arawak 0 0
|
||||
asm as Assamese 0 0
|
||||
ast Asturian, Bable 0 0
|
||||
ath Athapascan languages 0 0
|
||||
aus Australian languages 0 0
|
||||
ava av Avaric 0 0
|
||||
ave ae Avestan 0 0
|
||||
awa Awadhi 0 0
|
||||
aym ay Aymara 0 0
|
||||
aze az Azerbaijani 0 0
|
||||
bad Banda 0 0
|
||||
bai Bamileke languages 0 0
|
||||
bak ba Bashkir 0 0
|
||||
bal Baluchi 0 0
|
||||
bam bm Bambara 0 0
|
||||
ban Balinese 0 0
|
||||
baq eu Basque 1 1
|
||||
bas Basa 0 0
|
||||
bat Baltic (Other) 0 0
|
||||
bej Beja 0 0
|
||||
bel be Belarusian 0 0
|
||||
bem Bemba 0 0
|
||||
ben bn Bengali 1 0
|
||||
ber Berber (Other) 0 0
|
||||
bho Bhojpuri 0 0
|
||||
bih bh Bihari 0 0
|
||||
bik Bikol 0 0
|
||||
bin Bini 0 0
|
||||
bis bi Bislama 0 0
|
||||
bla Siksika 0 0
|
||||
bnt Bantu (Other) 0 0
|
||||
bos bs Bosnian 1 0
|
||||
bra Braj 0 0
|
||||
bre br Breton 1 0
|
||||
btk Batak (Indonesia) 0 0
|
||||
bua Buriat 0 0
|
||||
bug Buginese 0 0
|
||||
bul bg Bulgarian 1 1
|
||||
bur my Burmese 1 0
|
||||
byn Blin 0 0
|
||||
cad Caddo 0 0
|
||||
cai Central American Indian (Other) 0 0
|
||||
car Carib 0 0
|
||||
cat ca Catalan 1 1
|
||||
cau Caucasian (Other) 0 0
|
||||
ceb Cebuano 0 0
|
||||
cel Celtic (Other) 0 0
|
||||
cha ch Chamorro 0 0
|
||||
chb Chibcha 0 0
|
||||
che ce Chechen 0 0
|
||||
chg Chagatai 0 0
|
||||
chi zh Chinese 1 1
|
||||
chk Chuukese 0 0
|
||||
chm Mari 0 0
|
||||
chn Chinook jargon 0 0
|
||||
cho Choctaw 0 0
|
||||
chp Chipewyan 0 0
|
||||
chr Cherokee 0 0
|
||||
chu cu Church Slavic 0 0
|
||||
chv cv Chuvash 0 0
|
||||
chy Cheyenne 0 0
|
||||
cmc Chamic languages 0 0
|
||||
cop Coptic 0 0
|
||||
cor kw Cornish 0 0
|
||||
cos co Corsican 0 0
|
||||
cpe Creoles and pidgins, English based (Other) 0 0
|
||||
cpf Creoles and pidgins, French-based (Other) 0 0
|
||||
cpp Creoles and pidgins, Portuguese-based (Other) 0 0
|
||||
cre cr Cree 0 0
|
||||
crh Crimean Tatar 0 0
|
||||
crp Creoles and pidgins (Other) 0 0
|
||||
csb Kashubian 0 0
|
||||
cus Cushitic (Other)' couchitiques, autres langues 0 0
|
||||
cze cs Czech 1 1
|
||||
dak Dakota 0 0
|
||||
dan da Danish 1 1
|
||||
dar Dargwa 0 0
|
||||
day Dayak 0 0
|
||||
del Delaware 0 0
|
||||
den Slave (Athapascan) 0 0
|
||||
dgr Dogrib 0 0
|
||||
din Dinka 0 0
|
||||
div dv Divehi 0 0
|
||||
doi Dogri 0 0
|
||||
dra Dravidian (Other) 0 0
|
||||
dua Duala 0 0
|
||||
dum Dutch, Middle (ca.1050-1350) 0 0
|
||||
dut nl Dutch 1 1
|
||||
dyu Dyula 0 0
|
||||
dzo dz Dzongkha 0 0
|
||||
efi Efik 0 0
|
||||
egy Egyptian (Ancient) 0 0
|
||||
eka Ekajuk 0 0
|
||||
elx Elamite 0 0
|
||||
eng en English 1 1
|
||||
enm English, Middle (1100-1500) 0 0
|
||||
epo eo Esperanto 1 0
|
||||
est et Estonian 1 1
|
||||
ewe ee Ewe 0 0
|
||||
ewo Ewondo 0 0
|
||||
fan Fang 0 0
|
||||
fao fo Faroese 0 0
|
||||
fat Fanti 0 0
|
||||
fij fj Fijian 0 0
|
||||
fil Filipino 0 0
|
||||
fin fi Finnish 1 1
|
||||
fiu Finno-Ugrian (Other) 0 0
|
||||
fon Fon 0 0
|
||||
fre fr French 1 1
|
||||
frm French, Middle (ca.1400-1600) 0 0
|
||||
fro French, Old (842-ca.1400) 0 0
|
||||
fry fy Frisian 0 0
|
||||
ful ff Fulah 0 0
|
||||
fur Friulian 0 0
|
||||
gaa Ga 0 0
|
||||
gay Gayo 0 0
|
||||
gba Gbaya 0 0
|
||||
gem Germanic (Other) 0 0
|
||||
geo ka Georgian 1 1
|
||||
ger de German 1 1
|
||||
gez Geez 0 0
|
||||
gil Gilbertese 0 0
|
||||
gla gd Gaelic 0 0
|
||||
gle ga Irish 0 0
|
||||
glg gl Galician 1 1
|
||||
glv gv Manx 0 0
|
||||
gmh German, Middle High (ca.1050-1500) 0 0
|
||||
goh German, Old High (ca.750-1050) 0 0
|
||||
gon Gondi 0 0
|
||||
gor Gorontalo 0 0
|
||||
got Gothic 0 0
|
||||
grb Grebo 0 0
|
||||
grc Greek, Ancient (to 1453) 0 0
|
||||
ell el Greek 1 1
|
||||
grn gn Guarani 0 0
|
||||
guj gu Gujarati 0 0
|
||||
gwi Gwich´in 0 0
|
||||
hai Haida 0 0
|
||||
hat ht Haitian 0 0
|
||||
hau ha Hausa 0 0
|
||||
haw Hawaiian 0 0
|
||||
heb he Hebrew 1 1
|
||||
her hz Herero 0 0
|
||||
hil Hiligaynon 0 0
|
||||
him Himachali 0 0
|
||||
hin hi Hindi 1 1
|
||||
hit Hittite 0 0
|
||||
hmn Hmong 0 0
|
||||
hmo ho Hiri Motu 0 0
|
||||
hrv hr Croatian 1 1
|
||||
hun hu Hungarian 1 1
|
||||
hup Hupa 0 0
|
||||
iba Iban 0 0
|
||||
ibo ig Igbo 0 0
|
||||
ice is Icelandic 1 1
|
||||
ido io Ido 0 0
|
||||
iii ii Sichuan Yi 0 0
|
||||
ijo Ijo 0 0
|
||||
iku iu Inuktitut 0 0
|
||||
ile ie Interlingue 0 0
|
||||
ilo Iloko 0 0
|
||||
ina ia Interlingua (International Auxiliary Language Asso 0 0
|
||||
inc Indic (Other) 0 0
|
||||
ind id Indonesian 1 1
|
||||
ine Indo-European (Other) 0 0
|
||||
inh Ingush 0 0
|
||||
ipk ik Inupiaq 0 0
|
||||
ira Iranian (Other) 0 0
|
||||
iro Iroquoian languages 0 0
|
||||
ita it Italian 1 1
|
||||
jav jv Javanese 0 0
|
||||
jpn ja Japanese 1 1
|
||||
jpr Judeo-Persian 0 0
|
||||
jrb Judeo-Arabic 0 0
|
||||
kaa Kara-Kalpak 0 0
|
||||
kab Kabyle 0 0
|
||||
kac Kachin 0 0
|
||||
kal kl Kalaallisut 0 0
|
||||
kam Kamba 0 0
|
||||
kan kn Kannada 0 0
|
||||
kar Karen 0 0
|
||||
kas ks Kashmiri 0 0
|
||||
kau kr Kanuri 0 0
|
||||
kaw Kawi 0 0
|
||||
kaz kk Kazakh 1 0
|
||||
kbd Kabardian 0 0
|
||||
kha Khasi 0 0
|
||||
khi Khoisan (Other) 0 0
|
||||
khm km Khmer 1 1
|
||||
kho Khotanese 0 0
|
||||
kik ki Kikuyu 0 0
|
||||
kin rw Kinyarwanda 0 0
|
||||
kir ky Kirghiz 0 0
|
||||
kmb Kimbundu 0 0
|
||||
kok Konkani 0 0
|
||||
kom kv Komi 0 0
|
||||
kon kg Kongo 0 0
|
||||
kor ko Korean 1 1
|
||||
kos Kosraean 0 0
|
||||
kpe Kpelle 0 0
|
||||
krc Karachay-Balkar 0 0
|
||||
kro Kru 0 0
|
||||
kru Kurukh 0 0
|
||||
kua kj Kuanyama 0 0
|
||||
kum Kumyk 0 0
|
||||
kur ku Kurdish 0 0
|
||||
kut Kutenai 0 0
|
||||
lad Ladino 0 0
|
||||
lah Lahnda 0 0
|
||||
lam Lamba 0 0
|
||||
lao lo Lao 0 0
|
||||
lat la Latin 0 0
|
||||
lav lv Latvian 1 0
|
||||
lez Lezghian 0 0
|
||||
lim li Limburgan 0 0
|
||||
lin ln Lingala 0 0
|
||||
lit lt Lithuanian 1 0
|
||||
lol Mongo 0 0
|
||||
loz Lozi 0 0
|
||||
ltz lb Luxembourgish 1 0
|
||||
lua Luba-Lulua 0 0
|
||||
lub lu Luba-Katanga 0 0
|
||||
lug lg Ganda 0 0
|
||||
lui Luiseno 0 0
|
||||
lun Lunda 0 0
|
||||
luo Luo (Kenya and Tanzania) 0 0
|
||||
lus lushai 0 0
|
||||
mac mk Macedonian 1 1
|
||||
mad Madurese 0 0
|
||||
mag Magahi 0 0
|
||||
mah mh Marshallese 0 0
|
||||
mai Maithili 0 0
|
||||
mak Makasar 0 0
|
||||
mal ml Malayalam 1 0
|
||||
man Mandingo 0 0
|
||||
mao mi Maori 0 0
|
||||
map Austronesian (Other) 0 0
|
||||
mar mr Marathi 0 0
|
||||
mas Masai 0 0
|
||||
may ms Malay 1 1
|
||||
mdf Moksha 0 0
|
||||
mdr Mandar 0 0
|
||||
men Mende 0 0
|
||||
mga Irish, Middle (900-1200) 0 0
|
||||
mic Mi'kmaq 0 0
|
||||
min Minangkabau 0 0
|
||||
mis Miscellaneous languages 0 0
|
||||
mkh Mon-Khmer (Other) 0 0
|
||||
mlg mg Malagasy 0 0
|
||||
mlt mt Maltese 0 0
|
||||
mnc Manchu 0 0
|
||||
mni Manipuri 0 0
|
||||
mno Manobo languages 0 0
|
||||
moh Mohawk 0 0
|
||||
mol mo Moldavian 0 0
|
||||
mon mn Mongolian 1 0
|
||||
mos Mossi 0 0
|
||||
mwl Mirandese 0 0
|
||||
mul Multiple languages 0 0
|
||||
mun Munda languages 0 0
|
||||
mus Creek 0 0
|
||||
mwr Marwari 0 0
|
||||
myn Mayan languages 0 0
|
||||
myv Erzya 0 0
|
||||
nah Nahuatl 0 0
|
||||
nai North American Indian 0 0
|
||||
nap Neapolitan 0 0
|
||||
nau na Nauru 0 0
|
||||
nav nv Navajo 0 0
|
||||
nbl nr Ndebele, South 0 0
|
||||
nde nd Ndebele, North 0 0
|
||||
ndo ng Ndonga 0 0
|
||||
nds Low German 0 0
|
||||
nep ne Nepali 0 0
|
||||
new Nepal Bhasa 0 0
|
||||
nia Nias 0 0
|
||||
nic Niger-Kordofanian (Other) 0 0
|
||||
niu Niuean 0 0
|
||||
nno nn Norwegian Nynorsk 0 0
|
||||
nob nb Norwegian Bokmal 0 0
|
||||
nog Nogai 0 0
|
||||
non Norse, Old 0 0
|
||||
nor no Norwegian 1 1
|
||||
nso Northern Sotho 0 0
|
||||
nub Nubian languages 0 0
|
||||
nwc Classical Newari 0 0
|
||||
nya ny Chichewa 0 0
|
||||
nym Nyamwezi 0 0
|
||||
nyn Nyankole 0 0
|
||||
nyo Nyoro 0 0
|
||||
nzi Nzima 0 0
|
||||
oci oc Occitan 1 1
|
||||
oji oj Ojibwa 0 0
|
||||
ori or Oriya 0 0
|
||||
orm om Oromo 0 0
|
||||
osa Osage 0 0
|
||||
oss os Ossetian 0 0
|
||||
ota Turkish, Ottoman (1500-1928) 0 0
|
||||
oto Otomian languages 0 0
|
||||
paa Papuan (Other) 0 0
|
||||
pag Pangasinan 0 0
|
||||
pal Pahlavi 0 0
|
||||
pam Pampanga 0 0
|
||||
pan pa Panjabi 0 0
|
||||
pap Papiamento 0 0
|
||||
pau Palauan 0 0
|
||||
peo Persian, Old (ca.600-400 B.C.) 0 0
|
||||
per fa Persian 1 1
|
||||
phi Philippine (Other) 0 0
|
||||
phn Phoenician 0 0
|
||||
pli pi Pali 0 0
|
||||
pol pl Polish 1 1
|
||||
pon Pohnpeian 0 0
|
||||
por pt Portuguese 1 1
|
||||
pra Prakrit languages 0 0
|
||||
pro Provençal, Old (to 1500) 0 0
|
||||
pus ps Pushto 0 0
|
||||
que qu Quechua 0 0
|
||||
raj Rajasthani 0 0
|
||||
rap Rapanui 0 0
|
||||
rar Rarotongan 0 0
|
||||
roa Romance (Other) 0 0
|
||||
roh rm Raeto-Romance 0 0
|
||||
rom Romany 0 0
|
||||
run rn Rundi 0 0
|
||||
rup Aromanian 0 0
|
||||
rus ru Russian 1 1
|
||||
sad Sandawe 0 0
|
||||
sag sg Sango 0 0
|
||||
sah Yakut 0 0
|
||||
sai South American Indian (Other) 0 0
|
||||
sal Salishan languages 0 0
|
||||
sam Samaritan Aramaic 0 0
|
||||
san sa Sanskrit 0 0
|
||||
sas Sasak 0 0
|
||||
sat Santali 0 0
|
||||
scc sr Serbian 1 1
|
||||
scn Sicilian 0 0
|
||||
sco Scots 0 0
|
||||
sel Selkup 0 0
|
||||
sem Semitic (Other) 0 0
|
||||
sga Irish, Old (to 900) 0 0
|
||||
sgn Sign Languages 0 0
|
||||
shn Shan 0 0
|
||||
sid Sidamo 0 0
|
||||
sin si Sinhalese 1 1
|
||||
sio Siouan languages 0 0
|
||||
sit Sino-Tibetan (Other) 0 0
|
||||
sla Slavic (Other) 0 0
|
||||
slo sk Slovak 1 1
|
||||
slv sl Slovenian 1 1
|
||||
sma Southern Sami 0 0
|
||||
sme se Northern Sami 0 0
|
||||
smi Sami languages (Other) 0 0
|
||||
smj Lule Sami 0 0
|
||||
smn Inari Sami 0 0
|
||||
smo sm Samoan 0 0
|
||||
sms Skolt Sami 0 0
|
||||
sna sn Shona 0 0
|
||||
snd sd Sindhi 0 0
|
||||
snk Soninke 0 0
|
||||
sog Sogdian 0 0
|
||||
som so Somali 0 0
|
||||
son Songhai 0 0
|
||||
sot st Sotho, Southern 0 0
|
||||
spa es Spanish 1 1
|
||||
srd sc Sardinian 0 0
|
||||
srr Serer 0 0
|
||||
ssa Nilo-Saharan (Other) 0 0
|
||||
ssw ss Swati 0 0
|
||||
suk Sukuma 0 0
|
||||
sun su Sundanese 0 0
|
||||
sus Susu 0 0
|
||||
sux Sumerian 0 0
|
||||
swa sw Swahili 1 0
|
||||
swe sv Swedish 1 1
|
||||
syr Syriac 1 0
|
||||
tah ty Tahitian 0 0
|
||||
tai Tai (Other) 0 0
|
||||
tam ta Tamil 1 0
|
||||
tat tt Tatar 0 0
|
||||
tel te Telugu 1 0
|
||||
tem Timne 0 0
|
||||
ter Tereno 0 0
|
||||
tet Tetum 0 0
|
||||
tgk tg Tajik 0 0
|
||||
tgl tl Tagalog 1 1
|
||||
tha th Thai 1 1
|
||||
tib bo Tibetan 0 0
|
||||
tig Tigre 0 0
|
||||
tir ti Tigrinya 0 0
|
||||
tiv Tiv 0 0
|
||||
tkl Tokelau 0 0
|
||||
tlh Klingon 0 0
|
||||
tli Tlingit 0 0
|
||||
tmh Tamashek 0 0
|
||||
tog Tonga (Nyasa) 0 0
|
||||
ton to Tonga (Tonga Islands) 0 0
|
||||
tpi Tok Pisin 0 0
|
||||
tsi Tsimshian 0 0
|
||||
tsn tn Tswana 0 0
|
||||
tso ts Tsonga 0 0
|
||||
tuk tk Turkmen 0 0
|
||||
tum Tumbuka 0 0
|
||||
tup Tupi languages 0 0
|
||||
tur tr Turkish 1 1
|
||||
tut Altaic (Other) 0 0
|
||||
tvl Tuvalu 0 0
|
||||
twi tw Twi 0 0
|
||||
tyv Tuvinian 0 0
|
||||
udm Udmurt 0 0
|
||||
uga Ugaritic 0 0
|
||||
uig ug Uighur 0 0
|
||||
ukr uk Ukrainian 1 1
|
||||
umb Umbundu 0 0
|
||||
und Undetermined 0 0
|
||||
urd ur Urdu 1 0
|
||||
uzb uz Uzbek 0 0
|
||||
vai Vai 0 0
|
||||
ven ve Venda 0 0
|
||||
vie vi Vietnamese 1 1
|
||||
vol vo Volapük 0 0
|
||||
vot Votic 0 0
|
||||
wak Wakashan languages 0 0
|
||||
wal Walamo 0 0
|
||||
war Waray 0 0
|
||||
was Washo 0 0
|
||||
wel cy Welsh 0 0
|
||||
wen Sorbian languages 0 0
|
||||
wln wa Walloon 0 0
|
||||
wol wo Wolof 0 0
|
||||
xal Kalmyk 0 0
|
||||
xho xh Xhosa 0 0
|
||||
yao Yao 0 0
|
||||
yap Yapese 0 0
|
||||
yid yi Yiddish 0 0
|
||||
yor yo Yoruba 0 0
|
||||
ypk Yupik languages 0 0
|
||||
zap Zapotec 0 0
|
||||
zen Zenaga 0 0
|
||||
zha za Zhuang 0 0
|
||||
znd Zande 0 0
|
||||
zul zu Zulu 0 0
|
||||
zun Zuni 0 0
|
||||
rum ro Romanian 1 1
|
||||
pob pb Brazilian 1 1
|
||||
mne Montenegrin 1 0
|
||||
85
lib/babelfish/exceptions.py
Executable file
85
lib/babelfish/exceptions.py
Executable file
@@ -0,0 +1,85 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
|
||||
# Use of this source code is governed by the 3-clause BSD license
|
||||
# that can be found in the LICENSE file.
|
||||
#
|
||||
from __future__ import unicode_literals
|
||||
|
||||
|
||||
class Error(Exception):
|
||||
"""Base class for all exceptions in babelfish"""
|
||||
pass
|
||||
|
||||
|
||||
class LanguageError(Error, AttributeError):
|
||||
"""Base class for all language exceptions in babelfish"""
|
||||
pass
|
||||
|
||||
|
||||
class LanguageConvertError(LanguageError):
|
||||
"""Exception raised by converters when :meth:`~babelfish.converters.LanguageConverter.convert` fails
|
||||
|
||||
:param string alpha3: alpha3 code that failed conversion
|
||||
:param country: country code that failed conversion, if any
|
||||
:type country: string or None
|
||||
:param script: script code that failed conversion, if any
|
||||
:type script: string or None
|
||||
|
||||
"""
|
||||
def __init__(self, alpha3, country=None, script=None):
|
||||
self.alpha3 = alpha3
|
||||
self.country = country
|
||||
self.script = script
|
||||
|
||||
def __str__(self):
|
||||
s = self.alpha3
|
||||
if self.country is not None:
|
||||
s += '-' + self.country
|
||||
if self.script is not None:
|
||||
s += '-' + self.script
|
||||
return s
|
||||
|
||||
|
||||
class LanguageReverseError(LanguageError):
|
||||
"""Exception raised by converters when :meth:`~babelfish.converters.LanguageReverseConverter.reverse` fails
|
||||
|
||||
:param string code: code that failed reverse conversion
|
||||
|
||||
"""
|
||||
def __init__(self, code):
|
||||
self.code = code
|
||||
|
||||
def __str__(self):
|
||||
return repr(self.code)
|
||||
|
||||
|
||||
class CountryError(Error, AttributeError):
|
||||
"""Base class for all country exceptions in babelfish"""
|
||||
pass
|
||||
|
||||
|
||||
class CountryConvertError(CountryError):
|
||||
"""Exception raised by converters when :meth:`~babelfish.converters.CountryConverter.convert` fails
|
||||
|
||||
:param string alpha2: alpha2 code that failed conversion
|
||||
|
||||
"""
|
||||
def __init__(self, alpha2):
|
||||
self.alpha2 = alpha2
|
||||
|
||||
def __str__(self):
|
||||
return self.alpha2
|
||||
|
||||
|
||||
class CountryReverseError(CountryError):
|
||||
"""Exception raised by converters when :meth:`~babelfish.converters.CountryReverseConverter.reverse` fails
|
||||
|
||||
:param string code: code that failed reverse conversion
|
||||
|
||||
"""
|
||||
def __init__(self, code):
|
||||
self.code = code
|
||||
|
||||
def __str__(self):
|
||||
return repr(self.code)
|
||||
186
lib/babelfish/language.py
Executable file
186
lib/babelfish/language.py
Executable file
@@ -0,0 +1,186 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
|
||||
# Use of this source code is governed by the 3-clause BSD license
|
||||
# that can be found in the LICENSE file.
|
||||
#
|
||||
from __future__ import unicode_literals
|
||||
from collections import namedtuple
|
||||
from functools import partial
|
||||
import os
|
||||
# from pkg_resources import resource_stream # @UnresolvedImport
|
||||
from .converters import ConverterManager
|
||||
from .country import Country
|
||||
from .exceptions import LanguageConvertError
|
||||
from .script import Script
|
||||
from . import basestr
|
||||
|
||||
|
||||
LANGUAGES = set()
|
||||
LANGUAGE_MATRIX = []
|
||||
|
||||
#: The namedtuple used in the :data:`LANGUAGE_MATRIX`
|
||||
IsoLanguage = namedtuple('IsoLanguage', ['alpha3', 'alpha3b', 'alpha3t', 'alpha2', 'scope', 'type', 'name', 'comment'])
|
||||
|
||||
f = open(os.path.join(os.path.dirname(__file__), 'data/iso-639-3.tab'))
|
||||
f.readline()
|
||||
for l in f:
|
||||
iso_language = IsoLanguage(*l.decode('utf-8').split('\t'))
|
||||
LANGUAGES.add(iso_language.alpha3)
|
||||
LANGUAGE_MATRIX.append(iso_language)
|
||||
f.close()
|
||||
|
||||
|
||||
class LanguageConverterManager(ConverterManager):
|
||||
""":class:`~babelfish.converters.ConverterManager` for language converters"""
|
||||
entry_point = 'babelfish.language_converters'
|
||||
internal_converters = ['alpha2 = babelfish.converters.alpha2:Alpha2Converter',
|
||||
'alpha3b = babelfish.converters.alpha3b:Alpha3BConverter',
|
||||
'alpha3t = babelfish.converters.alpha3t:Alpha3TConverter',
|
||||
'name = babelfish.converters.name:NameConverter',
|
||||
'scope = babelfish.converters.scope:ScopeConverter',
|
||||
'type = babelfish.converters.type:LanguageTypeConverter',
|
||||
'opensubtitles = babelfish.converters.opensubtitles:OpenSubtitlesConverter']
|
||||
|
||||
language_converters = LanguageConverterManager()
|
||||
|
||||
|
||||
class LanguageMeta(type):
|
||||
"""The :class:`Language` metaclass
|
||||
|
||||
Dynamically redirect :meth:`Language.frommycode` to :meth:`Language.fromcode` with the ``mycode`` `converter`
|
||||
|
||||
"""
|
||||
def __getattr__(cls, name):
|
||||
if name.startswith('from'):
|
||||
return partial(cls.fromcode, converter=name[4:])
|
||||
return type.__getattribute__(cls, name)
|
||||
|
||||
|
||||
class Language(LanguageMeta(str('LanguageBase'), (object,), {})):
|
||||
"""A human language
|
||||
|
||||
A human language is composed of a language part following the ISO-639
|
||||
standard and can be country-specific when a :class:`~babelfish.country.Country`
|
||||
is specified.
|
||||
|
||||
The :class:`Language` is extensible with custom converters (see :ref:`custom_converters`)
|
||||
|
||||
:param string language: the language as a 3-letter ISO-639-3 code
|
||||
:param country: the country (if any) as a 2-letter ISO-3166 code or :class:`~babelfish.country.Country` instance
|
||||
:type country: string or :class:`~babelfish.country.Country` or None
|
||||
:param script: the script (if any) as a 4-letter ISO-15924 code or :class:`~babelfish.script.Script` instance
|
||||
:type script: string or :class:`~babelfish.script.Script` or None
|
||||
:param unknown: the unknown language as a three-letters ISO-639-3 code to use as fallback
|
||||
:type unknown: string or None
|
||||
:raise: ValueError if the language could not be recognized and `unknown` is ``None``
|
||||
|
||||
"""
|
||||
def __init__(self, language, country=None, script=None, unknown=None):
|
||||
if unknown is not None and language not in LANGUAGES:
|
||||
language = unknown
|
||||
if language not in LANGUAGES:
|
||||
raise ValueError('%r is not a valid language' % language)
|
||||
self.alpha3 = language
|
||||
self.country = None
|
||||
if isinstance(country, Country):
|
||||
self.country = country
|
||||
elif country is None:
|
||||
self.country = None
|
||||
else:
|
||||
self.country = Country(country)
|
||||
self.script = None
|
||||
if isinstance(script, Script):
|
||||
self.script = script
|
||||
elif script is None:
|
||||
self.script = None
|
||||
else:
|
||||
self.script = Script(script)
|
||||
|
||||
@classmethod
|
||||
def fromcode(cls, code, converter):
|
||||
"""Create a :class:`Language` by its `code` using `converter` to
|
||||
:meth:`~babelfish.converters.LanguageReverseConverter.reverse` it
|
||||
|
||||
:param string code: the code to reverse
|
||||
:param string converter: name of the :class:`~babelfish.converters.LanguageReverseConverter` to use
|
||||
:return: the corresponding :class:`Language` instance
|
||||
:rtype: :class:`Language`
|
||||
|
||||
"""
|
||||
return cls(*language_converters[converter].reverse(code))
|
||||
|
||||
@classmethod
|
||||
def fromietf(cls, ietf):
|
||||
"""Create a :class:`Language` by from an IETF language code
|
||||
|
||||
:param string ietf: the ietf code
|
||||
:return: the corresponding :class:`Language` instance
|
||||
:rtype: :class:`Language`
|
||||
|
||||
"""
|
||||
subtags = ietf.split('-')
|
||||
language_subtag = subtags.pop(0).lower()
|
||||
if len(language_subtag) == 2:
|
||||
language = cls.fromalpha2(language_subtag)
|
||||
else:
|
||||
language = cls(language_subtag)
|
||||
while subtags:
|
||||
subtag = subtags.pop(0)
|
||||
if len(subtag) == 2:
|
||||
language.country = Country(subtag.upper())
|
||||
else:
|
||||
language.script = Script(subtag.capitalize())
|
||||
if language.script is not None:
|
||||
if subtags:
|
||||
raise ValueError('Wrong IETF format. Unmatched subtags: %r' % subtags)
|
||||
break
|
||||
return language
|
||||
|
||||
def __getstate__(self):
|
||||
return self.alpha3, self.country, self.script
|
||||
|
||||
def __setstate__(self, state):
|
||||
self.alpha3, self.country, self.script = state
|
||||
|
||||
def __getattr__(self, name):
|
||||
alpha3 = self.alpha3
|
||||
country = self.country.alpha2 if self.country is not None else None
|
||||
script = self.script.code if self.script is not None else None
|
||||
try:
|
||||
return language_converters[name].convert(alpha3, country, script)
|
||||
except KeyError:
|
||||
raise AttributeError(name)
|
||||
|
||||
def __hash__(self):
|
||||
return hash(str(self))
|
||||
|
||||
def __eq__(self, other):
|
||||
if isinstance(other, basestr):
|
||||
return str(self) == other
|
||||
if not isinstance(other, Language):
|
||||
return False
|
||||
return (self.alpha3 == other.alpha3 and
|
||||
self.country == other.country and
|
||||
self.script == other.script)
|
||||
|
||||
def __ne__(self, other):
|
||||
return not self == other
|
||||
|
||||
def __bool__(self):
|
||||
return self.alpha3 != 'und'
|
||||
__nonzero__ = __bool__
|
||||
|
||||
def __repr__(self):
|
||||
return '<Language [%s]>' % self
|
||||
|
||||
def __str__(self):
|
||||
try:
|
||||
s = self.alpha2
|
||||
except LanguageConvertError:
|
||||
s = self.alpha3
|
||||
if self.country is not None:
|
||||
s += '-' + str(self.country)
|
||||
if self.script is not None:
|
||||
s += '-' + str(self.script)
|
||||
return s
|
||||
78
lib/babelfish/script.py
Executable file
78
lib/babelfish/script.py
Executable file
@@ -0,0 +1,78 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
|
||||
# Use of this source code is governed by the 3-clause BSD license
|
||||
# that can be found in the LICENSE file.
|
||||
#
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import os
|
||||
from collections import namedtuple
|
||||
# from pkg_resources import resource_stream # @UnresolvedImport
|
||||
from . import basestr
|
||||
|
||||
#: Script code to script name mapping
|
||||
SCRIPTS = {}
|
||||
|
||||
#: List of countries in the ISO-15924 as namedtuple of code, number, name, french_name, pva and date
|
||||
SCRIPT_MATRIX = []
|
||||
|
||||
#: The namedtuple used in the :data:`SCRIPT_MATRIX`
|
||||
IsoScript = namedtuple('IsoScript', ['code', 'number', 'name', 'french_name', 'pva', 'date'])
|
||||
|
||||
f = open(os.path.join(os.path.dirname(__file__), 'data/iso15924-utf8-20131012.txt'))
|
||||
f.readline()
|
||||
for l in f:
|
||||
l = l.decode('utf-8').strip()
|
||||
if not l or l.startswith('#'):
|
||||
continue
|
||||
script = IsoScript._make(l.split(';'))
|
||||
SCRIPT_MATRIX.append(script)
|
||||
SCRIPTS[script.code] = script.name
|
||||
f.close()
|
||||
|
||||
|
||||
class Script(object):
|
||||
"""A human writing system
|
||||
|
||||
A script is represented by a 4-letter code from the ISO-15924 standard
|
||||
|
||||
:param string script: 4-letter ISO-15924 script code
|
||||
|
||||
"""
|
||||
def __init__(self, script):
|
||||
if script not in SCRIPTS:
|
||||
raise ValueError('%r is not a valid script' % script)
|
||||
|
||||
#: ISO-15924 4-letter script code
|
||||
self.code = script
|
||||
|
||||
@property
|
||||
def name(self):
|
||||
"""English name of the script"""
|
||||
return SCRIPTS[self.code]
|
||||
|
||||
def __getstate__(self):
|
||||
return self.code
|
||||
|
||||
def __setstate__(self, state):
|
||||
self.code = state
|
||||
|
||||
def __hash__(self):
|
||||
return hash(self.code)
|
||||
|
||||
def __eq__(self, other):
|
||||
if isinstance(other, basestr):
|
||||
return self.code == other
|
||||
if not isinstance(other, Script):
|
||||
return False
|
||||
return self.code == other.code
|
||||
|
||||
def __ne__(self, other):
|
||||
return not self == other
|
||||
|
||||
def __repr__(self):
|
||||
return '<Script [%s]>' % self
|
||||
|
||||
def __str__(self):
|
||||
return self.code
|
||||
9
lib/dateutil/__init__.py
Normal file
9
lib/dateutil/__init__.py
Normal file
@@ -0,0 +1,9 @@
|
||||
"""
|
||||
Copyright (c) 2003-2010 Gustavo Niemeyer <gustavo@niemeyer.net>
|
||||
|
||||
This module offers extensions to the standard python 2.3+
|
||||
datetime module.
|
||||
"""
|
||||
__author__ = "Gustavo Niemeyer <gustavo@niemeyer.net>"
|
||||
__license__ = "PSF License"
|
||||
__version__ = "1.5.0.1"
|
||||
92
lib/dateutil/easter.py
Normal file
92
lib/dateutil/easter.py
Normal file
@@ -0,0 +1,92 @@
|
||||
"""
|
||||
Copyright (c) 2003-2007 Gustavo Niemeyer <gustavo@niemeyer.net>
|
||||
|
||||
This module offers extensions to the standard python 2.3+
|
||||
datetime module.
|
||||
"""
|
||||
__author__ = "Gustavo Niemeyer <gustavo@niemeyer.net>"
|
||||
__license__ = "PSF License"
|
||||
|
||||
import datetime
|
||||
|
||||
__all__ = ["easter", "EASTER_JULIAN", "EASTER_ORTHODOX", "EASTER_WESTERN"]
|
||||
|
||||
EASTER_JULIAN = 1
|
||||
EASTER_ORTHODOX = 2
|
||||
EASTER_WESTERN = 3
|
||||
|
||||
def easter(year, method=EASTER_WESTERN):
|
||||
"""
|
||||
This method was ported from the work done by GM Arts,
|
||||
on top of the algorithm by Claus Tondering, which was
|
||||
based in part on the algorithm of Ouding (1940), as
|
||||
quoted in "Explanatory Supplement to the Astronomical
|
||||
Almanac", P. Kenneth Seidelmann, editor.
|
||||
|
||||
This algorithm implements three different easter
|
||||
calculation methods:
|
||||
|
||||
1 - Original calculation in Julian calendar, valid in
|
||||
dates after 326 AD
|
||||
2 - Original method, with date converted to Gregorian
|
||||
calendar, valid in years 1583 to 4099
|
||||
3 - Revised method, in Gregorian calendar, valid in
|
||||
years 1583 to 4099 as well
|
||||
|
||||
These methods are represented by the constants:
|
||||
|
||||
EASTER_JULIAN = 1
|
||||
EASTER_ORTHODOX = 2
|
||||
EASTER_WESTERN = 3
|
||||
|
||||
The default method is method 3.
|
||||
|
||||
More about the algorithm may be found at:
|
||||
|
||||
http://users.chariot.net.au/~gmarts/eastalg.htm
|
||||
|
||||
and
|
||||
|
||||
http://www.tondering.dk/claus/calendar.html
|
||||
|
||||
"""
|
||||
|
||||
if not (1 <= method <= 3):
|
||||
raise ValueError, "invalid method"
|
||||
|
||||
# g - Golden year - 1
|
||||
# c - Century
|
||||
# h - (23 - Epact) mod 30
|
||||
# i - Number of days from March 21 to Paschal Full Moon
|
||||
# j - Weekday for PFM (0=Sunday, etc)
|
||||
# p - Number of days from March 21 to Sunday on or before PFM
|
||||
# (-6 to 28 methods 1 & 3, to 56 for method 2)
|
||||
# e - Extra days to add for method 2 (converting Julian
|
||||
# date to Gregorian date)
|
||||
|
||||
y = year
|
||||
g = y % 19
|
||||
e = 0
|
||||
if method < 3:
|
||||
# Old method
|
||||
i = (19*g+15)%30
|
||||
j = (y+y//4+i)%7
|
||||
if method == 2:
|
||||
# Extra dates to convert Julian to Gregorian date
|
||||
e = 10
|
||||
if y > 1600:
|
||||
e = e+y//100-16-(y//100-16)//4
|
||||
else:
|
||||
# New method
|
||||
c = y//100
|
||||
h = (c-c//4-(8*c+13)//25+19*g+15)%30
|
||||
i = h-(h//28)*(1-(h//28)*(29//(h+1))*((21-g)//11))
|
||||
j = (y+y//4+i+2-c+c//4)%7
|
||||
|
||||
# p can be from -6 to 56 corresponding to dates 22 March to 23 May
|
||||
# (later dates apply to method 2, although 23 May never actually occurs)
|
||||
p = i-j+e
|
||||
d = 1+(p+27+(p+6)//40)%31
|
||||
m = 3+(p+26)//30
|
||||
return datetime.date(int(y),int(m),int(d))
|
||||
|
||||
886
lib/dateutil/parser.py
Normal file
886
lib/dateutil/parser.py
Normal file
@@ -0,0 +1,886 @@
|
||||
# -*- coding:iso-8859-1 -*-
|
||||
"""
|
||||
Copyright (c) 2003-2007 Gustavo Niemeyer <gustavo@niemeyer.net>
|
||||
|
||||
This module offers extensions to the standard python 2.3+
|
||||
datetime module.
|
||||
"""
|
||||
__author__ = "Gustavo Niemeyer <gustavo@niemeyer.net>"
|
||||
__license__ = "PSF License"
|
||||
|
||||
import datetime
|
||||
import string
|
||||
import time
|
||||
import sys
|
||||
import os
|
||||
|
||||
try:
|
||||
from cStringIO import StringIO
|
||||
except ImportError:
|
||||
from StringIO import StringIO
|
||||
|
||||
import relativedelta
|
||||
import tz
|
||||
|
||||
|
||||
__all__ = ["parse", "parserinfo"]
|
||||
|
||||
|
||||
# Some pointers:
|
||||
#
|
||||
# http://www.cl.cam.ac.uk/~mgk25/iso-time.html
|
||||
# http://www.iso.ch/iso/en/prods-services/popstds/datesandtime.html
|
||||
# http://www.w3.org/TR/NOTE-datetime
|
||||
# http://ringmaster.arc.nasa.gov/tools/time_formats.html
|
||||
# http://search.cpan.org/author/MUIR/Time-modules-2003.0211/lib/Time/ParseDate.pm
|
||||
# http://stein.cshl.org/jade/distrib/docs/java.text.SimpleDateFormat.html
|
||||
|
||||
|
||||
class _timelex(object):
|
||||
|
||||
def __init__(self, instream):
|
||||
if isinstance(instream, basestring):
|
||||
instream = StringIO(instream)
|
||||
self.instream = instream
|
||||
self.wordchars = ('abcdfeghijklmnopqrstuvwxyz'
|
||||
'ABCDEFGHIJKLMNOPQRSTUVWXYZ_'
|
||||
'<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>'
|
||||
'<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>')
|
||||
self.numchars = '0123456789'
|
||||
self.whitespace = ' \t\r\n'
|
||||
self.charstack = []
|
||||
self.tokenstack = []
|
||||
self.eof = False
|
||||
|
||||
def get_token(self):
|
||||
if self.tokenstack:
|
||||
return self.tokenstack.pop(0)
|
||||
seenletters = False
|
||||
token = None
|
||||
state = None
|
||||
wordchars = self.wordchars
|
||||
numchars = self.numchars
|
||||
whitespace = self.whitespace
|
||||
while not self.eof:
|
||||
if self.charstack:
|
||||
nextchar = self.charstack.pop(0)
|
||||
else:
|
||||
nextchar = self.instream.read(1)
|
||||
while nextchar == '\x00':
|
||||
nextchar = self.instream.read(1)
|
||||
if not nextchar:
|
||||
self.eof = True
|
||||
break
|
||||
elif not state:
|
||||
token = nextchar
|
||||
if nextchar in wordchars:
|
||||
state = 'a'
|
||||
elif nextchar in numchars:
|
||||
state = '0'
|
||||
elif nextchar in whitespace:
|
||||
token = ' '
|
||||
break # emit token
|
||||
else:
|
||||
break # emit token
|
||||
elif state == 'a':
|
||||
seenletters = True
|
||||
if nextchar in wordchars:
|
||||
token += nextchar
|
||||
elif nextchar == '.':
|
||||
token += nextchar
|
||||
state = 'a.'
|
||||
else:
|
||||
self.charstack.append(nextchar)
|
||||
break # emit token
|
||||
elif state == '0':
|
||||
if nextchar in numchars:
|
||||
token += nextchar
|
||||
elif nextchar == '.':
|
||||
token += nextchar
|
||||
state = '0.'
|
||||
else:
|
||||
self.charstack.append(nextchar)
|
||||
break # emit token
|
||||
elif state == 'a.':
|
||||
seenletters = True
|
||||
if nextchar == '.' or nextchar in wordchars:
|
||||
token += nextchar
|
||||
elif nextchar in numchars and token[-1] == '.':
|
||||
token += nextchar
|
||||
state = '0.'
|
||||
else:
|
||||
self.charstack.append(nextchar)
|
||||
break # emit token
|
||||
elif state == '0.':
|
||||
if nextchar == '.' or nextchar in numchars:
|
||||
token += nextchar
|
||||
elif nextchar in wordchars and token[-1] == '.':
|
||||
token += nextchar
|
||||
state = 'a.'
|
||||
else:
|
||||
self.charstack.append(nextchar)
|
||||
break # emit token
|
||||
if (state in ('a.', '0.') and
|
||||
(seenletters or token.count('.') > 1 or token[-1] == '.')):
|
||||
l = token.split('.')
|
||||
token = l[0]
|
||||
for tok in l[1:]:
|
||||
self.tokenstack.append('.')
|
||||
if tok:
|
||||
self.tokenstack.append(tok)
|
||||
return token
|
||||
|
||||
def __iter__(self):
|
||||
return self
|
||||
|
||||
def next(self):
|
||||
token = self.get_token()
|
||||
if token is None:
|
||||
raise StopIteration
|
||||
return token
|
||||
|
||||
def split(cls, s):
|
||||
return list(cls(s))
|
||||
split = classmethod(split)
|
||||
|
||||
|
||||
class _resultbase(object):
|
||||
|
||||
def __init__(self):
|
||||
for attr in self.__slots__:
|
||||
setattr(self, attr, None)
|
||||
|
||||
def _repr(self, classname):
|
||||
l = []
|
||||
for attr in self.__slots__:
|
||||
value = getattr(self, attr)
|
||||
if value is not None:
|
||||
l.append("%s=%s" % (attr, `value`))
|
||||
return "%s(%s)" % (classname, ", ".join(l))
|
||||
|
||||
def __repr__(self):
|
||||
return self._repr(self.__class__.__name__)
|
||||
|
||||
|
||||
class parserinfo(object):
|
||||
|
||||
# m from a.m/p.m, t from ISO T separator
|
||||
JUMP = [" ", ".", ",", ";", "-", "/", "'",
|
||||
"at", "on", "and", "ad", "m", "t", "of",
|
||||
"st", "nd", "rd", "th"]
|
||||
|
||||
WEEKDAYS = [("Mon", "Monday"),
|
||||
("Tue", "Tuesday"),
|
||||
("Wed", "Wednesday"),
|
||||
("Thu", "Thursday"),
|
||||
("Fri", "Friday"),
|
||||
("Sat", "Saturday"),
|
||||
("Sun", "Sunday")]
|
||||
MONTHS = [("Jan", "January"),
|
||||
("Feb", "February"),
|
||||
("Mar", "March"),
|
||||
("Apr", "April"),
|
||||
("May", "May"),
|
||||
("Jun", "June"),
|
||||
("Jul", "July"),
|
||||
("Aug", "August"),
|
||||
("Sep", "September"),
|
||||
("Oct", "October"),
|
||||
("Nov", "November"),
|
||||
("Dec", "December")]
|
||||
HMS = [("h", "hour", "hours"),
|
||||
("m", "minute", "minutes"),
|
||||
("s", "second", "seconds")]
|
||||
AMPM = [("am", "a"),
|
||||
("pm", "p")]
|
||||
UTCZONE = ["UTC", "GMT", "Z"]
|
||||
PERTAIN = ["of"]
|
||||
TZOFFSET = {}
|
||||
|
||||
def __init__(self, dayfirst=False, yearfirst=False):
|
||||
self._jump = self._convert(self.JUMP)
|
||||
self._weekdays = self._convert(self.WEEKDAYS)
|
||||
self._months = self._convert(self.MONTHS)
|
||||
self._hms = self._convert(self.HMS)
|
||||
self._ampm = self._convert(self.AMPM)
|
||||
self._utczone = self._convert(self.UTCZONE)
|
||||
self._pertain = self._convert(self.PERTAIN)
|
||||
|
||||
self.dayfirst = dayfirst
|
||||
self.yearfirst = yearfirst
|
||||
|
||||
self._year = time.localtime().tm_year
|
||||
self._century = self._year//100*100
|
||||
|
||||
def _convert(self, lst):
|
||||
dct = {}
|
||||
for i in range(len(lst)):
|
||||
v = lst[i]
|
||||
if isinstance(v, tuple):
|
||||
for v in v:
|
||||
dct[v.lower()] = i
|
||||
else:
|
||||
dct[v.lower()] = i
|
||||
return dct
|
||||
|
||||
def jump(self, name):
|
||||
return name.lower() in self._jump
|
||||
|
||||
def weekday(self, name):
|
||||
if len(name) >= 3:
|
||||
try:
|
||||
return self._weekdays[name.lower()]
|
||||
except KeyError:
|
||||
pass
|
||||
return None
|
||||
|
||||
def month(self, name):
|
||||
if len(name) >= 3:
|
||||
try:
|
||||
return self._months[name.lower()]+1
|
||||
except KeyError:
|
||||
pass
|
||||
return None
|
||||
|
||||
def hms(self, name):
|
||||
try:
|
||||
return self._hms[name.lower()]
|
||||
except KeyError:
|
||||
return None
|
||||
|
||||
def ampm(self, name):
|
||||
try:
|
||||
return self._ampm[name.lower()]
|
||||
except KeyError:
|
||||
return None
|
||||
|
||||
def pertain(self, name):
|
||||
return name.lower() in self._pertain
|
||||
|
||||
def utczone(self, name):
|
||||
return name.lower() in self._utczone
|
||||
|
||||
def tzoffset(self, name):
|
||||
if name in self._utczone:
|
||||
return 0
|
||||
return self.TZOFFSET.get(name)
|
||||
|
||||
def convertyear(self, year):
|
||||
if year < 100:
|
||||
year += self._century
|
||||
if abs(year-self._year) >= 50:
|
||||
if year < self._year:
|
||||
year += 100
|
||||
else:
|
||||
year -= 100
|
||||
return year
|
||||
|
||||
def validate(self, res):
|
||||
# move to info
|
||||
if res.year is not None:
|
||||
res.year = self.convertyear(res.year)
|
||||
if res.tzoffset == 0 and not res.tzname or res.tzname == 'Z':
|
||||
res.tzname = "UTC"
|
||||
res.tzoffset = 0
|
||||
elif res.tzoffset != 0 and res.tzname and self.utczone(res.tzname):
|
||||
res.tzoffset = 0
|
||||
return True
|
||||
|
||||
|
||||
class parser(object):
|
||||
|
||||
def __init__(self, info=None):
|
||||
self.info = info or parserinfo()
|
||||
|
||||
def parse(self, timestr, default=None,
|
||||
ignoretz=False, tzinfos=None,
|
||||
**kwargs):
|
||||
if not default:
|
||||
default = datetime.datetime.now().replace(hour=0, minute=0,
|
||||
second=0, microsecond=0)
|
||||
res = self._parse(timestr, **kwargs)
|
||||
if res is None:
|
||||
raise ValueError, "unknown string format"
|
||||
repl = {}
|
||||
for attr in ["year", "month", "day", "hour",
|
||||
"minute", "second", "microsecond"]:
|
||||
value = getattr(res, attr)
|
||||
if value is not None:
|
||||
repl[attr] = value
|
||||
ret = default.replace(**repl)
|
||||
if res.weekday is not None and not res.day:
|
||||
ret = ret+relativedelta.relativedelta(weekday=res.weekday)
|
||||
if not ignoretz:
|
||||
if callable(tzinfos) or tzinfos and res.tzname in tzinfos:
|
||||
if callable(tzinfos):
|
||||
tzdata = tzinfos(res.tzname, res.tzoffset)
|
||||
else:
|
||||
tzdata = tzinfos.get(res.tzname)
|
||||
if isinstance(tzdata, datetime.tzinfo):
|
||||
tzinfo = tzdata
|
||||
elif isinstance(tzdata, basestring):
|
||||
tzinfo = tz.tzstr(tzdata)
|
||||
elif isinstance(tzdata, int):
|
||||
tzinfo = tz.tzoffset(res.tzname, tzdata)
|
||||
else:
|
||||
raise ValueError, "offset must be tzinfo subclass, " \
|
||||
"tz string, or int offset"
|
||||
ret = ret.replace(tzinfo=tzinfo)
|
||||
elif res.tzname and res.tzname in time.tzname:
|
||||
ret = ret.replace(tzinfo=tz.tzlocal())
|
||||
elif res.tzoffset == 0:
|
||||
ret = ret.replace(tzinfo=tz.tzutc())
|
||||
elif res.tzoffset:
|
||||
ret = ret.replace(tzinfo=tz.tzoffset(res.tzname, res.tzoffset))
|
||||
return ret
|
||||
|
||||
class _result(_resultbase):
|
||||
__slots__ = ["year", "month", "day", "weekday",
|
||||
"hour", "minute", "second", "microsecond",
|
||||
"tzname", "tzoffset"]
|
||||
|
||||
def _parse(self, timestr, dayfirst=None, yearfirst=None, fuzzy=False):
|
||||
info = self.info
|
||||
if dayfirst is None:
|
||||
dayfirst = info.dayfirst
|
||||
if yearfirst is None:
|
||||
yearfirst = info.yearfirst
|
||||
res = self._result()
|
||||
l = _timelex.split(timestr)
|
||||
try:
|
||||
|
||||
# year/month/day list
|
||||
ymd = []
|
||||
|
||||
# Index of the month string in ymd
|
||||
mstridx = -1
|
||||
|
||||
len_l = len(l)
|
||||
i = 0
|
||||
while i < len_l:
|
||||
|
||||
# Check if it's a number
|
||||
try:
|
||||
value_repr = l[i]
|
||||
value = float(value_repr)
|
||||
except ValueError:
|
||||
value = None
|
||||
|
||||
if value is not None:
|
||||
# Token is a number
|
||||
len_li = len(l[i])
|
||||
i += 1
|
||||
if (len(ymd) == 3 and len_li in (2, 4)
|
||||
and (i >= len_l or (l[i] != ':' and
|
||||
info.hms(l[i]) is None))):
|
||||
# 19990101T23[59]
|
||||
s = l[i-1]
|
||||
res.hour = int(s[:2])
|
||||
if len_li == 4:
|
||||
res.minute = int(s[2:])
|
||||
elif len_li == 6 or (len_li > 6 and l[i-1].find('.') == 6):
|
||||
# YYMMDD or HHMMSS[.ss]
|
||||
s = l[i-1]
|
||||
if not ymd and l[i-1].find('.') == -1:
|
||||
ymd.append(info.convertyear(int(s[:2])))
|
||||
ymd.append(int(s[2:4]))
|
||||
ymd.append(int(s[4:]))
|
||||
else:
|
||||
# 19990101T235959[.59]
|
||||
res.hour = int(s[:2])
|
||||
res.minute = int(s[2:4])
|
||||
res.second, res.microsecond = _parsems(s[4:])
|
||||
elif len_li == 8:
|
||||
# YYYYMMDD
|
||||
s = l[i-1]
|
||||
ymd.append(int(s[:4]))
|
||||
ymd.append(int(s[4:6]))
|
||||
ymd.append(int(s[6:]))
|
||||
elif len_li in (12, 14):
|
||||
# YYYYMMDDhhmm[ss]
|
||||
s = l[i-1]
|
||||
ymd.append(int(s[:4]))
|
||||
ymd.append(int(s[4:6]))
|
||||
ymd.append(int(s[6:8]))
|
||||
res.hour = int(s[8:10])
|
||||
res.minute = int(s[10:12])
|
||||
if len_li == 14:
|
||||
res.second = int(s[12:])
|
||||
elif ((i < len_l and info.hms(l[i]) is not None) or
|
||||
(i+1 < len_l and l[i] == ' ' and
|
||||
info.hms(l[i+1]) is not None)):
|
||||
# HH[ ]h or MM[ ]m or SS[.ss][ ]s
|
||||
if l[i] == ' ':
|
||||
i += 1
|
||||
idx = info.hms(l[i])
|
||||
while True:
|
||||
if idx == 0:
|
||||
res.hour = int(value)
|
||||
if value%1:
|
||||
res.minute = int(60*(value%1))
|
||||
elif idx == 1:
|
||||
res.minute = int(value)
|
||||
if value%1:
|
||||
res.second = int(60*(value%1))
|
||||
elif idx == 2:
|
||||
res.second, res.microsecond = \
|
||||
_parsems(value_repr)
|
||||
i += 1
|
||||
if i >= len_l or idx == 2:
|
||||
break
|
||||
# 12h00
|
||||
try:
|
||||
value_repr = l[i]
|
||||
value = float(value_repr)
|
||||
except ValueError:
|
||||
break
|
||||
else:
|
||||
i += 1
|
||||
idx += 1
|
||||
if i < len_l:
|
||||
newidx = info.hms(l[i])
|
||||
if newidx is not None:
|
||||
idx = newidx
|
||||
elif i+1 < len_l and l[i] == ':':
|
||||
# HH:MM[:SS[.ss]]
|
||||
res.hour = int(value)
|
||||
i += 1
|
||||
value = float(l[i])
|
||||
res.minute = int(value)
|
||||
if value%1:
|
||||
res.second = int(60*(value%1))
|
||||
i += 1
|
||||
if i < len_l and l[i] == ':':
|
||||
res.second, res.microsecond = _parsems(l[i+1])
|
||||
i += 2
|
||||
elif i < len_l and l[i] in ('-', '/', '.'):
|
||||
sep = l[i]
|
||||
ymd.append(int(value))
|
||||
i += 1
|
||||
if i < len_l and not info.jump(l[i]):
|
||||
try:
|
||||
# 01-01[-01]
|
||||
ymd.append(int(l[i]))
|
||||
except ValueError:
|
||||
# 01-Jan[-01]
|
||||
value = info.month(l[i])
|
||||
if value is not None:
|
||||
ymd.append(value)
|
||||
assert mstridx == -1
|
||||
mstridx = len(ymd)-1
|
||||
else:
|
||||
return None
|
||||
i += 1
|
||||
if i < len_l and l[i] == sep:
|
||||
# We have three members
|
||||
i += 1
|
||||
value = info.month(l[i])
|
||||
if value is not None:
|
||||
ymd.append(value)
|
||||
mstridx = len(ymd)-1
|
||||
assert mstridx == -1
|
||||
else:
|
||||
ymd.append(int(l[i]))
|
||||
i += 1
|
||||
elif i >= len_l or info.jump(l[i]):
|
||||
if i+1 < len_l and info.ampm(l[i+1]) is not None:
|
||||
# 12 am
|
||||
res.hour = int(value)
|
||||
if res.hour < 12 and info.ampm(l[i+1]) == 1:
|
||||
res.hour += 12
|
||||
elif res.hour == 12 and info.ampm(l[i+1]) == 0:
|
||||
res.hour = 0
|
||||
i += 1
|
||||
else:
|
||||
# Year, month or day
|
||||
ymd.append(int(value))
|
||||
i += 1
|
||||
elif info.ampm(l[i]) is not None:
|
||||
# 12am
|
||||
res.hour = int(value)
|
||||
if res.hour < 12 and info.ampm(l[i]) == 1:
|
||||
res.hour += 12
|
||||
elif res.hour == 12 and info.ampm(l[i]) == 0:
|
||||
res.hour = 0
|
||||
i += 1
|
||||
elif not fuzzy:
|
||||
return None
|
||||
else:
|
||||
i += 1
|
||||
continue
|
||||
|
||||
# Check weekday
|
||||
value = info.weekday(l[i])
|
||||
if value is not None:
|
||||
res.weekday = value
|
||||
i += 1
|
||||
continue
|
||||
|
||||
# Check month name
|
||||
value = info.month(l[i])
|
||||
if value is not None:
|
||||
ymd.append(value)
|
||||
assert mstridx == -1
|
||||
mstridx = len(ymd)-1
|
||||
i += 1
|
||||
if i < len_l:
|
||||
if l[i] in ('-', '/'):
|
||||
# Jan-01[-99]
|
||||
sep = l[i]
|
||||
i += 1
|
||||
ymd.append(int(l[i]))
|
||||
i += 1
|
||||
if i < len_l and l[i] == sep:
|
||||
# Jan-01-99
|
||||
i += 1
|
||||
ymd.append(int(l[i]))
|
||||
i += 1
|
||||
elif (i+3 < len_l and l[i] == l[i+2] == ' '
|
||||
and info.pertain(l[i+1])):
|
||||
# Jan of 01
|
||||
# In this case, 01 is clearly year
|
||||
try:
|
||||
value = int(l[i+3])
|
||||
except ValueError:
|
||||
# Wrong guess
|
||||
pass
|
||||
else:
|
||||
# Convert it here to become unambiguous
|
||||
ymd.append(info.convertyear(value))
|
||||
i += 4
|
||||
continue
|
||||
|
||||
# Check am/pm
|
||||
value = info.ampm(l[i])
|
||||
if value is not None:
|
||||
if value == 1 and res.hour < 12:
|
||||
res.hour += 12
|
||||
elif value == 0 and res.hour == 12:
|
||||
res.hour = 0
|
||||
i += 1
|
||||
continue
|
||||
|
||||
# Check for a timezone name
|
||||
if (res.hour is not None and len(l[i]) <= 5 and
|
||||
res.tzname is None and res.tzoffset is None and
|
||||
not [x for x in l[i] if x not in string.ascii_uppercase]):
|
||||
res.tzname = l[i]
|
||||
res.tzoffset = info.tzoffset(res.tzname)
|
||||
i += 1
|
||||
|
||||
# Check for something like GMT+3, or BRST+3. Notice
|
||||
# that it doesn't mean "I am 3 hours after GMT", but
|
||||
# "my time +3 is GMT". If found, we reverse the
|
||||
# logic so that timezone parsing code will get it
|
||||
# right.
|
||||
if i < len_l and l[i] in ('+', '-'):
|
||||
l[i] = ('+', '-')[l[i] == '+']
|
||||
res.tzoffset = None
|
||||
if info.utczone(res.tzname):
|
||||
# With something like GMT+3, the timezone
|
||||
# is *not* GMT.
|
||||
res.tzname = None
|
||||
|
||||
continue
|
||||
|
||||
# Check for a numbered timezone
|
||||
if res.hour is not None and l[i] in ('+', '-'):
|
||||
signal = (-1,1)[l[i] == '+']
|
||||
i += 1
|
||||
len_li = len(l[i])
|
||||
if len_li == 4:
|
||||
# -0300
|
||||
res.tzoffset = int(l[i][:2])*3600+int(l[i][2:])*60
|
||||
elif i+1 < len_l and l[i+1] == ':':
|
||||
# -03:00
|
||||
res.tzoffset = int(l[i])*3600+int(l[i+2])*60
|
||||
i += 2
|
||||
elif len_li <= 2:
|
||||
# -[0]3
|
||||
res.tzoffset = int(l[i][:2])*3600
|
||||
else:
|
||||
return None
|
||||
i += 1
|
||||
res.tzoffset *= signal
|
||||
|
||||
# Look for a timezone name between parenthesis
|
||||
if (i+3 < len_l and
|
||||
info.jump(l[i]) and l[i+1] == '(' and l[i+3] == ')' and
|
||||
3 <= len(l[i+2]) <= 5 and
|
||||
not [x for x in l[i+2]
|
||||
if x not in string.ascii_uppercase]):
|
||||
# -0300 (BRST)
|
||||
res.tzname = l[i+2]
|
||||
i += 4
|
||||
continue
|
||||
|
||||
# Check jumps
|
||||
if not (info.jump(l[i]) or fuzzy):
|
||||
return None
|
||||
|
||||
i += 1
|
||||
|
||||
# Process year/month/day
|
||||
len_ymd = len(ymd)
|
||||
if len_ymd > 3:
|
||||
# More than three members!?
|
||||
return None
|
||||
elif len_ymd == 1 or (mstridx != -1 and len_ymd == 2):
|
||||
# One member, or two members with a month string
|
||||
if mstridx != -1:
|
||||
res.month = ymd[mstridx]
|
||||
del ymd[mstridx]
|
||||
if len_ymd > 1 or mstridx == -1:
|
||||
if ymd[0] > 31:
|
||||
res.year = ymd[0]
|
||||
else:
|
||||
res.day = ymd[0]
|
||||
elif len_ymd == 2:
|
||||
# Two members with numbers
|
||||
if ymd[0] > 31:
|
||||
# 99-01
|
||||
res.year, res.month = ymd
|
||||
elif ymd[1] > 31:
|
||||
# 01-99
|
||||
res.month, res.year = ymd
|
||||
elif dayfirst and ymd[1] <= 12:
|
||||
# 13-01
|
||||
res.day, res.month = ymd
|
||||
else:
|
||||
# 01-13
|
||||
res.month, res.day = ymd
|
||||
if len_ymd == 3:
|
||||
# Three members
|
||||
if mstridx == 0:
|
||||
res.month, res.day, res.year = ymd
|
||||
elif mstridx == 1:
|
||||
if ymd[0] > 31 or (yearfirst and ymd[2] <= 31):
|
||||
# 99-Jan-01
|
||||
res.year, res.month, res.day = ymd
|
||||
else:
|
||||
# 01-Jan-01
|
||||
# Give precendence to day-first, since
|
||||
# two-digit years is usually hand-written.
|
||||
res.day, res.month, res.year = ymd
|
||||
elif mstridx == 2:
|
||||
# WTF!?
|
||||
if ymd[1] > 31:
|
||||
# 01-99-Jan
|
||||
res.day, res.year, res.month = ymd
|
||||
else:
|
||||
# 99-01-Jan
|
||||
res.year, res.day, res.month = ymd
|
||||
else:
|
||||
if ymd[0] > 31 or \
|
||||
(yearfirst and ymd[1] <= 12 and ymd[2] <= 31):
|
||||
# 99-01-01
|
||||
res.year, res.month, res.day = ymd
|
||||
elif ymd[0] > 12 or (dayfirst and ymd[1] <= 12):
|
||||
# 13-01-01
|
||||
res.day, res.month, res.year = ymd
|
||||
else:
|
||||
# 01-13-01
|
||||
res.month, res.day, res.year = ymd
|
||||
|
||||
except (IndexError, ValueError, AssertionError):
|
||||
return None
|
||||
|
||||
if not info.validate(res):
|
||||
return None
|
||||
return res
|
||||
|
||||
DEFAULTPARSER = parser()
|
||||
def parse(timestr, parserinfo=None, **kwargs):
|
||||
if parserinfo:
|
||||
return parser(parserinfo).parse(timestr, **kwargs)
|
||||
else:
|
||||
return DEFAULTPARSER.parse(timestr, **kwargs)
|
||||
|
||||
|
||||
class _tzparser(object):
|
||||
|
||||
class _result(_resultbase):
|
||||
|
||||
__slots__ = ["stdabbr", "stdoffset", "dstabbr", "dstoffset",
|
||||
"start", "end"]
|
||||
|
||||
class _attr(_resultbase):
|
||||
__slots__ = ["month", "week", "weekday",
|
||||
"yday", "jyday", "day", "time"]
|
||||
|
||||
def __repr__(self):
|
||||
return self._repr("")
|
||||
|
||||
def __init__(self):
|
||||
_resultbase.__init__(self)
|
||||
self.start = self._attr()
|
||||
self.end = self._attr()
|
||||
|
||||
def parse(self, tzstr):
|
||||
res = self._result()
|
||||
l = _timelex.split(tzstr)
|
||||
try:
|
||||
|
||||
len_l = len(l)
|
||||
|
||||
i = 0
|
||||
while i < len_l:
|
||||
# BRST+3[BRDT[+2]]
|
||||
j = i
|
||||
while j < len_l and not [x for x in l[j]
|
||||
if x in "0123456789:,-+"]:
|
||||
j += 1
|
||||
if j != i:
|
||||
if not res.stdabbr:
|
||||
offattr = "stdoffset"
|
||||
res.stdabbr = "".join(l[i:j])
|
||||
else:
|
||||
offattr = "dstoffset"
|
||||
res.dstabbr = "".join(l[i:j])
|
||||
i = j
|
||||
if (i < len_l and
|
||||
(l[i] in ('+', '-') or l[i][0] in "0123456789")):
|
||||
if l[i] in ('+', '-'):
|
||||
# Yes, that's right. See the TZ variable
|
||||
# documentation.
|
||||
signal = (1,-1)[l[i] == '+']
|
||||
i += 1
|
||||
else:
|
||||
signal = -1
|
||||
len_li = len(l[i])
|
||||
if len_li == 4:
|
||||
# -0300
|
||||
setattr(res, offattr,
|
||||
(int(l[i][:2])*3600+int(l[i][2:])*60)*signal)
|
||||
elif i+1 < len_l and l[i+1] == ':':
|
||||
# -03:00
|
||||
setattr(res, offattr,
|
||||
(int(l[i])*3600+int(l[i+2])*60)*signal)
|
||||
i += 2
|
||||
elif len_li <= 2:
|
||||
# -[0]3
|
||||
setattr(res, offattr,
|
||||
int(l[i][:2])*3600*signal)
|
||||
else:
|
||||
return None
|
||||
i += 1
|
||||
if res.dstabbr:
|
||||
break
|
||||
else:
|
||||
break
|
||||
|
||||
if i < len_l:
|
||||
for j in range(i, len_l):
|
||||
if l[j] == ';': l[j] = ','
|
||||
|
||||
assert l[i] == ','
|
||||
|
||||
i += 1
|
||||
|
||||
if i >= len_l:
|
||||
pass
|
||||
elif (8 <= l.count(',') <= 9 and
|
||||
not [y for x in l[i:] if x != ','
|
||||
for y in x if y not in "0123456789"]):
|
||||
# GMT0BST,3,0,30,3600,10,0,26,7200[,3600]
|
||||
for x in (res.start, res.end):
|
||||
x.month = int(l[i])
|
||||
i += 2
|
||||
if l[i] == '-':
|
||||
value = int(l[i+1])*-1
|
||||
i += 1
|
||||
else:
|
||||
value = int(l[i])
|
||||
i += 2
|
||||
if value:
|
||||
x.week = value
|
||||
x.weekday = (int(l[i])-1)%7
|
||||
else:
|
||||
x.day = int(l[i])
|
||||
i += 2
|
||||
x.time = int(l[i])
|
||||
i += 2
|
||||
if i < len_l:
|
||||
if l[i] in ('-','+'):
|
||||
signal = (-1,1)[l[i] == "+"]
|
||||
i += 1
|
||||
else:
|
||||
signal = 1
|
||||
res.dstoffset = (res.stdoffset+int(l[i]))*signal
|
||||
elif (l.count(',') == 2 and l[i:].count('/') <= 2 and
|
||||
not [y for x in l[i:] if x not in (',','/','J','M',
|
||||
'.','-',':')
|
||||
for y in x if y not in "0123456789"]):
|
||||
for x in (res.start, res.end):
|
||||
if l[i] == 'J':
|
||||
# non-leap year day (1 based)
|
||||
i += 1
|
||||
x.jyday = int(l[i])
|
||||
elif l[i] == 'M':
|
||||
# month[-.]week[-.]weekday
|
||||
i += 1
|
||||
x.month = int(l[i])
|
||||
i += 1
|
||||
assert l[i] in ('-', '.')
|
||||
i += 1
|
||||
x.week = int(l[i])
|
||||
if x.week == 5:
|
||||
x.week = -1
|
||||
i += 1
|
||||
assert l[i] in ('-', '.')
|
||||
i += 1
|
||||
x.weekday = (int(l[i])-1)%7
|
||||
else:
|
||||
# year day (zero based)
|
||||
x.yday = int(l[i])+1
|
||||
|
||||
i += 1
|
||||
|
||||
if i < len_l and l[i] == '/':
|
||||
i += 1
|
||||
# start time
|
||||
len_li = len(l[i])
|
||||
if len_li == 4:
|
||||
# -0300
|
||||
x.time = (int(l[i][:2])*3600+int(l[i][2:])*60)
|
||||
elif i+1 < len_l and l[i+1] == ':':
|
||||
# -03:00
|
||||
x.time = int(l[i])*3600+int(l[i+2])*60
|
||||
i += 2
|
||||
if i+1 < len_l and l[i+1] == ':':
|
||||
i += 2
|
||||
x.time += int(l[i])
|
||||
elif len_li <= 2:
|
||||
# -[0]3
|
||||
x.time = (int(l[i][:2])*3600)
|
||||
else:
|
||||
return None
|
||||
i += 1
|
||||
|
||||
assert i == len_l or l[i] == ','
|
||||
|
||||
i += 1
|
||||
|
||||
assert i >= len_l
|
||||
|
||||
except (IndexError, ValueError, AssertionError):
|
||||
return None
|
||||
|
||||
return res
|
||||
|
||||
|
||||
DEFAULTTZPARSER = _tzparser()
|
||||
def _parsetz(tzstr):
|
||||
return DEFAULTTZPARSER.parse(tzstr)
|
||||
|
||||
|
||||
def _parsems(value):
|
||||
"""Parse a I[.F] seconds value into (seconds, microseconds)."""
|
||||
if "." not in value:
|
||||
return int(value), 0
|
||||
else:
|
||||
i, f = value.split(".")
|
||||
return int(i), int(f.ljust(6, "0")[:6])
|
||||
|
||||
|
||||
# vim:ts=4:sw=4:et
|
||||
432
lib/dateutil/relativedelta.py
Normal file
432
lib/dateutil/relativedelta.py
Normal file
@@ -0,0 +1,432 @@
|
||||
"""
|
||||
Copyright (c) 2003-2010 Gustavo Niemeyer <gustavo@niemeyer.net>
|
||||
|
||||
This module offers extensions to the standard python 2.3+
|
||||
datetime module.
|
||||
"""
|
||||
__author__ = "Gustavo Niemeyer <gustavo@niemeyer.net>"
|
||||
__license__ = "PSF License"
|
||||
|
||||
import datetime
|
||||
import calendar
|
||||
|
||||
__all__ = ["relativedelta", "MO", "TU", "WE", "TH", "FR", "SA", "SU"]
|
||||
|
||||
class weekday(object):
|
||||
__slots__ = ["weekday", "n"]
|
||||
|
||||
def __init__(self, weekday, n=None):
|
||||
self.weekday = weekday
|
||||
self.n = n
|
||||
|
||||
def __call__(self, n):
|
||||
if n == self.n:
|
||||
return self
|
||||
else:
|
||||
return self.__class__(self.weekday, n)
|
||||
|
||||
def __eq__(self, other):
|
||||
try:
|
||||
if self.weekday != other.weekday or self.n != other.n:
|
||||
return False
|
||||
except AttributeError:
|
||||
return False
|
||||
return True
|
||||
|
||||
def __repr__(self):
|
||||
s = ("MO", "TU", "WE", "TH", "FR", "SA", "SU")[self.weekday]
|
||||
if not self.n:
|
||||
return s
|
||||
else:
|
||||
return "%s(%+d)" % (s, self.n)
|
||||
|
||||
MO, TU, WE, TH, FR, SA, SU = weekdays = tuple([weekday(x) for x in range(7)])
|
||||
|
||||
class relativedelta:
|
||||
"""
|
||||
The relativedelta type is based on the specification of the excelent
|
||||
work done by M.-A. Lemburg in his mx.DateTime extension. However,
|
||||
notice that this type does *NOT* implement the same algorithm as
|
||||
his work. Do *NOT* expect it to behave like mx.DateTime's counterpart.
|
||||
|
||||
There's two different ways to build a relativedelta instance. The
|
||||
first one is passing it two date/datetime classes:
|
||||
|
||||
relativedelta(datetime1, datetime2)
|
||||
|
||||
And the other way is to use the following keyword arguments:
|
||||
|
||||
year, month, day, hour, minute, second, microsecond:
|
||||
Absolute information.
|
||||
|
||||
years, months, weeks, days, hours, minutes, seconds, microseconds:
|
||||
Relative information, may be negative.
|
||||
|
||||
weekday:
|
||||
One of the weekday instances (MO, TU, etc). These instances may
|
||||
receive a parameter N, specifying the Nth weekday, which could
|
||||
be positive or negative (like MO(+1) or MO(-2). Not specifying
|
||||
it is the same as specifying +1. You can also use an integer,
|
||||
where 0=MO.
|
||||
|
||||
leapdays:
|
||||
Will add given days to the date found, if year is a leap
|
||||
year, and the date found is post 28 of february.
|
||||
|
||||
yearday, nlyearday:
|
||||
Set the yearday or the non-leap year day (jump leap days).
|
||||
These are converted to day/month/leapdays information.
|
||||
|
||||
Here is the behavior of operations with relativedelta:
|
||||
|
||||
1) Calculate the absolute year, using the 'year' argument, or the
|
||||
original datetime year, if the argument is not present.
|
||||
|
||||
2) Add the relative 'years' argument to the absolute year.
|
||||
|
||||
3) Do steps 1 and 2 for month/months.
|
||||
|
||||
4) Calculate the absolute day, using the 'day' argument, or the
|
||||
original datetime day, if the argument is not present. Then,
|
||||
subtract from the day until it fits in the year and month
|
||||
found after their operations.
|
||||
|
||||
5) Add the relative 'days' argument to the absolute day. Notice
|
||||
that the 'weeks' argument is multiplied by 7 and added to
|
||||
'days'.
|
||||
|
||||
6) Do steps 1 and 2 for hour/hours, minute/minutes, second/seconds,
|
||||
microsecond/microseconds.
|
||||
|
||||
7) If the 'weekday' argument is present, calculate the weekday,
|
||||
with the given (wday, nth) tuple. wday is the index of the
|
||||
weekday (0-6, 0=Mon), and nth is the number of weeks to add
|
||||
forward or backward, depending on its signal. Notice that if
|
||||
the calculated date is already Monday, for example, using
|
||||
(0, 1) or (0, -1) won't change the day.
|
||||
"""
|
||||
|
||||
def __init__(self, dt1=None, dt2=None,
|
||||
years=0, months=0, days=0, leapdays=0, weeks=0,
|
||||
hours=0, minutes=0, seconds=0, microseconds=0,
|
||||
year=None, month=None, day=None, weekday=None,
|
||||
yearday=None, nlyearday=None,
|
||||
hour=None, minute=None, second=None, microsecond=None):
|
||||
if dt1 and dt2:
|
||||
if not isinstance(dt1, datetime.date) or \
|
||||
not isinstance(dt2, datetime.date):
|
||||
raise TypeError, "relativedelta only diffs datetime/date"
|
||||
if type(dt1) is not type(dt2):
|
||||
if not isinstance(dt1, datetime.datetime):
|
||||
dt1 = datetime.datetime.fromordinal(dt1.toordinal())
|
||||
elif not isinstance(dt2, datetime.datetime):
|
||||
dt2 = datetime.datetime.fromordinal(dt2.toordinal())
|
||||
self.years = 0
|
||||
self.months = 0
|
||||
self.days = 0
|
||||
self.leapdays = 0
|
||||
self.hours = 0
|
||||
self.minutes = 0
|
||||
self.seconds = 0
|
||||
self.microseconds = 0
|
||||
self.year = None
|
||||
self.month = None
|
||||
self.day = None
|
||||
self.weekday = None
|
||||
self.hour = None
|
||||
self.minute = None
|
||||
self.second = None
|
||||
self.microsecond = None
|
||||
self._has_time = 0
|
||||
|
||||
months = (dt1.year*12+dt1.month)-(dt2.year*12+dt2.month)
|
||||
self._set_months(months)
|
||||
dtm = self.__radd__(dt2)
|
||||
if dt1 < dt2:
|
||||
while dt1 > dtm:
|
||||
months += 1
|
||||
self._set_months(months)
|
||||
dtm = self.__radd__(dt2)
|
||||
else:
|
||||
while dt1 < dtm:
|
||||
months -= 1
|
||||
self._set_months(months)
|
||||
dtm = self.__radd__(dt2)
|
||||
delta = dt1 - dtm
|
||||
self.seconds = delta.seconds+delta.days*86400
|
||||
self.microseconds = delta.microseconds
|
||||
else:
|
||||
self.years = years
|
||||
self.months = months
|
||||
self.days = days+weeks*7
|
||||
self.leapdays = leapdays
|
||||
self.hours = hours
|
||||
self.minutes = minutes
|
||||
self.seconds = seconds
|
||||
self.microseconds = microseconds
|
||||
self.year = year
|
||||
self.month = month
|
||||
self.day = day
|
||||
self.hour = hour
|
||||
self.minute = minute
|
||||
self.second = second
|
||||
self.microsecond = microsecond
|
||||
|
||||
if type(weekday) is int:
|
||||
self.weekday = weekdays[weekday]
|
||||
else:
|
||||
self.weekday = weekday
|
||||
|
||||
yday = 0
|
||||
if nlyearday:
|
||||
yday = nlyearday
|
||||
elif yearday:
|
||||
yday = yearday
|
||||
if yearday > 59:
|
||||
self.leapdays = -1
|
||||
if yday:
|
||||
ydayidx = [31,59,90,120,151,181,212,243,273,304,334,366]
|
||||
for idx, ydays in enumerate(ydayidx):
|
||||
if yday <= ydays:
|
||||
self.month = idx+1
|
||||
if idx == 0:
|
||||
self.day = yday
|
||||
else:
|
||||
self.day = yday-ydayidx[idx-1]
|
||||
break
|
||||
else:
|
||||
raise ValueError, "invalid year day (%d)" % yday
|
||||
|
||||
self._fix()
|
||||
|
||||
def _fix(self):
|
||||
if abs(self.microseconds) > 999999:
|
||||
s = self.microseconds//abs(self.microseconds)
|
||||
div, mod = divmod(self.microseconds*s, 1000000)
|
||||
self.microseconds = mod*s
|
||||
self.seconds += div*s
|
||||
if abs(self.seconds) > 59:
|
||||
s = self.seconds//abs(self.seconds)
|
||||
div, mod = divmod(self.seconds*s, 60)
|
||||
self.seconds = mod*s
|
||||
self.minutes += div*s
|
||||
if abs(self.minutes) > 59:
|
||||
s = self.minutes//abs(self.minutes)
|
||||
div, mod = divmod(self.minutes*s, 60)
|
||||
self.minutes = mod*s
|
||||
self.hours += div*s
|
||||
if abs(self.hours) > 23:
|
||||
s = self.hours//abs(self.hours)
|
||||
div, mod = divmod(self.hours*s, 24)
|
||||
self.hours = mod*s
|
||||
self.days += div*s
|
||||
if abs(self.months) > 11:
|
||||
s = self.months//abs(self.months)
|
||||
div, mod = divmod(self.months*s, 12)
|
||||
self.months = mod*s
|
||||
self.years += div*s
|
||||
if (self.hours or self.minutes or self.seconds or self.microseconds or
|
||||
self.hour is not None or self.minute is not None or
|
||||
self.second is not None or self.microsecond is not None):
|
||||
self._has_time = 1
|
||||
else:
|
||||
self._has_time = 0
|
||||
|
||||
def _set_months(self, months):
|
||||
self.months = months
|
||||
if abs(self.months) > 11:
|
||||
s = self.months//abs(self.months)
|
||||
div, mod = divmod(self.months*s, 12)
|
||||
self.months = mod*s
|
||||
self.years = div*s
|
||||
else:
|
||||
self.years = 0
|
||||
|
||||
def __radd__(self, other):
|
||||
if not isinstance(other, datetime.date):
|
||||
raise TypeError, "unsupported type for add operation"
|
||||
elif self._has_time and not isinstance(other, datetime.datetime):
|
||||
other = datetime.datetime.fromordinal(other.toordinal())
|
||||
year = (self.year or other.year)+self.years
|
||||
month = self.month or other.month
|
||||
if self.months:
|
||||
assert 1 <= abs(self.months) <= 12
|
||||
month += self.months
|
||||
if month > 12:
|
||||
year += 1
|
||||
month -= 12
|
||||
elif month < 1:
|
||||
year -= 1
|
||||
month += 12
|
||||
day = min(calendar.monthrange(year, month)[1],
|
||||
self.day or other.day)
|
||||
repl = {"year": year, "month": month, "day": day}
|
||||
for attr in ["hour", "minute", "second", "microsecond"]:
|
||||
value = getattr(self, attr)
|
||||
if value is not None:
|
||||
repl[attr] = value
|
||||
days = self.days
|
||||
if self.leapdays and month > 2 and calendar.isleap(year):
|
||||
days += self.leapdays
|
||||
ret = (other.replace(**repl)
|
||||
+ datetime.timedelta(days=days,
|
||||
hours=self.hours,
|
||||
minutes=self.minutes,
|
||||
seconds=self.seconds,
|
||||
microseconds=self.microseconds))
|
||||
if self.weekday:
|
||||
weekday, nth = self.weekday.weekday, self.weekday.n or 1
|
||||
jumpdays = (abs(nth)-1)*7
|
||||
if nth > 0:
|
||||
jumpdays += (7-ret.weekday()+weekday)%7
|
||||
else:
|
||||
jumpdays += (ret.weekday()-weekday)%7
|
||||
jumpdays *= -1
|
||||
ret += datetime.timedelta(days=jumpdays)
|
||||
return ret
|
||||
|
||||
def __rsub__(self, other):
|
||||
return self.__neg__().__radd__(other)
|
||||
|
||||
def __add__(self, other):
|
||||
if not isinstance(other, relativedelta):
|
||||
raise TypeError, "unsupported type for add operation"
|
||||
return relativedelta(years=other.years+self.years,
|
||||
months=other.months+self.months,
|
||||
days=other.days+self.days,
|
||||
hours=other.hours+self.hours,
|
||||
minutes=other.minutes+self.minutes,
|
||||
seconds=other.seconds+self.seconds,
|
||||
microseconds=other.microseconds+self.microseconds,
|
||||
leapdays=other.leapdays or self.leapdays,
|
||||
year=other.year or self.year,
|
||||
month=other.month or self.month,
|
||||
day=other.day or self.day,
|
||||
weekday=other.weekday or self.weekday,
|
||||
hour=other.hour or self.hour,
|
||||
minute=other.minute or self.minute,
|
||||
second=other.second or self.second,
|
||||
microsecond=other.second or self.microsecond)
|
||||
|
||||
def __sub__(self, other):
|
||||
if not isinstance(other, relativedelta):
|
||||
raise TypeError, "unsupported type for sub operation"
|
||||
return relativedelta(years=other.years-self.years,
|
||||
months=other.months-self.months,
|
||||
days=other.days-self.days,
|
||||
hours=other.hours-self.hours,
|
||||
minutes=other.minutes-self.minutes,
|
||||
seconds=other.seconds-self.seconds,
|
||||
microseconds=other.microseconds-self.microseconds,
|
||||
leapdays=other.leapdays or self.leapdays,
|
||||
year=other.year or self.year,
|
||||
month=other.month or self.month,
|
||||
day=other.day or self.day,
|
||||
weekday=other.weekday or self.weekday,
|
||||
hour=other.hour or self.hour,
|
||||
minute=other.minute or self.minute,
|
||||
second=other.second or self.second,
|
||||
microsecond=other.second or self.microsecond)
|
||||
|
||||
def __neg__(self):
|
||||
return relativedelta(years=-self.years,
|
||||
months=-self.months,
|
||||
days=-self.days,
|
||||
hours=-self.hours,
|
||||
minutes=-self.minutes,
|
||||
seconds=-self.seconds,
|
||||
microseconds=-self.microseconds,
|
||||
leapdays=self.leapdays,
|
||||
year=self.year,
|
||||
month=self.month,
|
||||
day=self.day,
|
||||
weekday=self.weekday,
|
||||
hour=self.hour,
|
||||
minute=self.minute,
|
||||
second=self.second,
|
||||
microsecond=self.microsecond)
|
||||
|
||||
def __nonzero__(self):
|
||||
return not (not self.years and
|
||||
not self.months and
|
||||
not self.days and
|
||||
not self.hours and
|
||||
not self.minutes and
|
||||
not self.seconds and
|
||||
not self.microseconds and
|
||||
not self.leapdays and
|
||||
self.year is None and
|
||||
self.month is None and
|
||||
self.day is None and
|
||||
self.weekday is None and
|
||||
self.hour is None and
|
||||
self.minute is None and
|
||||
self.second is None and
|
||||
self.microsecond is None)
|
||||
|
||||
def __mul__(self, other):
|
||||
f = float(other)
|
||||
return relativedelta(years = int(round(self.years*f)),
|
||||
months = int(round(self.months*f)),
|
||||
days = int(round(self.days*f)),
|
||||
hours = int(round(self.hours*f)),
|
||||
minutes = int(round(self.minutes*f)),
|
||||
seconds = int(round(self.seconds*f)),
|
||||
microseconds = self.microseconds*f,
|
||||
leapdays = self.leapdays,
|
||||
year = self.year,
|
||||
month = self.month,
|
||||
day = self.day,
|
||||
weekday = self.weekday,
|
||||
hour = self.hour,
|
||||
minute = self.minute,
|
||||
second = self.second,
|
||||
microsecond = self.microsecond)
|
||||
|
||||
def __eq__(self, other):
|
||||
if not isinstance(other, relativedelta):
|
||||
return False
|
||||
if self.weekday or other.weekday:
|
||||
if not self.weekday or not other.weekday:
|
||||
return False
|
||||
if self.weekday.weekday != other.weekday.weekday:
|
||||
return False
|
||||
n1, n2 = self.weekday.n, other.weekday.n
|
||||
if n1 != n2 and not ((not n1 or n1 == 1) and (not n2 or n2 == 1)):
|
||||
return False
|
||||
return (self.years == other.years and
|
||||
self.months == other.months and
|
||||
self.days == other.days and
|
||||
self.hours == other.hours and
|
||||
self.minutes == other.minutes and
|
||||
self.seconds == other.seconds and
|
||||
self.leapdays == other.leapdays and
|
||||
self.year == other.year and
|
||||
self.month == other.month and
|
||||
self.day == other.day and
|
||||
self.hour == other.hour and
|
||||
self.minute == other.minute and
|
||||
self.second == other.second and
|
||||
self.microsecond == other.microsecond)
|
||||
|
||||
def __ne__(self, other):
|
||||
return not self.__eq__(other)
|
||||
|
||||
def __div__(self, other):
|
||||
return self.__mul__(1/float(other))
|
||||
|
||||
def __repr__(self):
|
||||
l = []
|
||||
for attr in ["years", "months", "days", "leapdays",
|
||||
"hours", "minutes", "seconds", "microseconds"]:
|
||||
value = getattr(self, attr)
|
||||
if value:
|
||||
l.append("%s=%+d" % (attr, value))
|
||||
for attr in ["year", "month", "day", "weekday",
|
||||
"hour", "minute", "second", "microsecond"]:
|
||||
value = getattr(self, attr)
|
||||
if value is not None:
|
||||
l.append("%s=%s" % (attr, `value`))
|
||||
return "%s(%s)" % (self.__class__.__name__, ", ".join(l))
|
||||
|
||||
# vim:ts=4:sw=4:et
|
||||
1108
lib/dateutil/rrule.py
Normal file
1108
lib/dateutil/rrule.py
Normal file
File diff suppressed because it is too large
Load Diff
958
lib/dateutil/tz.py
Normal file
958
lib/dateutil/tz.py
Normal file
@@ -0,0 +1,958 @@
|
||||
"""
|
||||
Copyright (c) 2003-2007 Gustavo Niemeyer <gustavo@niemeyer.net>
|
||||
|
||||
This module offers extensions to the standard python 2.3+
|
||||
datetime module.
|
||||
"""
|
||||
__author__ = "Gustavo Niemeyer <gustavo@niemeyer.net>"
|
||||
__license__ = "PSF License"
|
||||
|
||||
import datetime
|
||||
import struct
|
||||
import time
|
||||
import sys
|
||||
import os
|
||||
|
||||
relativedelta = None
|
||||
parser = None
|
||||
rrule = None
|
||||
|
||||
__all__ = ["tzutc", "tzoffset", "tzlocal", "tzfile", "tzrange",
|
||||
"tzstr", "tzical", "tzwin", "tzwinlocal", "gettz"]
|
||||
|
||||
try:
|
||||
from dateutil.tzwin import tzwin, tzwinlocal
|
||||
except (ImportError, OSError):
|
||||
tzwin, tzwinlocal = None, None
|
||||
|
||||
ZERO = datetime.timedelta(0)
|
||||
EPOCHORDINAL = datetime.datetime.utcfromtimestamp(0).toordinal()
|
||||
|
||||
class tzutc(datetime.tzinfo):
|
||||
|
||||
def utcoffset(self, dt):
|
||||
return ZERO
|
||||
|
||||
def dst(self, dt):
|
||||
return ZERO
|
||||
|
||||
def tzname(self, dt):
|
||||
return "UTC"
|
||||
|
||||
def __eq__(self, other):
|
||||
return (isinstance(other, tzutc) or
|
||||
(isinstance(other, tzoffset) and other._offset == ZERO))
|
||||
|
||||
def __ne__(self, other):
|
||||
return not self.__eq__(other)
|
||||
|
||||
def __repr__(self):
|
||||
return "%s()" % self.__class__.__name__
|
||||
|
||||
__reduce__ = object.__reduce__
|
||||
|
||||
class tzoffset(datetime.tzinfo):
|
||||
|
||||
def __init__(self, name, offset):
|
||||
self._name = name
|
||||
self._offset = datetime.timedelta(seconds=offset)
|
||||
|
||||
def utcoffset(self, dt):
|
||||
return self._offset
|
||||
|
||||
def dst(self, dt):
|
||||
return ZERO
|
||||
|
||||
def tzname(self, dt):
|
||||
return self._name
|
||||
|
||||
def __eq__(self, other):
|
||||
return (isinstance(other, tzoffset) and
|
||||
self._offset == other._offset)
|
||||
|
||||
def __ne__(self, other):
|
||||
return not self.__eq__(other)
|
||||
|
||||
def __repr__(self):
|
||||
return "%s(%s, %s)" % (self.__class__.__name__,
|
||||
`self._name`,
|
||||
self._offset.days*86400+self._offset.seconds)
|
||||
|
||||
__reduce__ = object.__reduce__
|
||||
|
||||
class tzlocal(datetime.tzinfo):
|
||||
|
||||
_std_offset = datetime.timedelta(seconds=-time.timezone)
|
||||
if time.daylight:
|
||||
_dst_offset = datetime.timedelta(seconds=-time.altzone)
|
||||
else:
|
||||
_dst_offset = _std_offset
|
||||
|
||||
def utcoffset(self, dt):
|
||||
if self._isdst(dt):
|
||||
return self._dst_offset
|
||||
else:
|
||||
return self._std_offset
|
||||
|
||||
def dst(self, dt):
|
||||
if self._isdst(dt):
|
||||
return self._dst_offset-self._std_offset
|
||||
else:
|
||||
return ZERO
|
||||
|
||||
def tzname(self, dt):
|
||||
return time.tzname[self._isdst(dt)]
|
||||
|
||||
def _isdst(self, dt):
|
||||
# We can't use mktime here. It is unstable when deciding if
|
||||
# the hour near to a change is DST or not.
|
||||
#
|
||||
# timestamp = time.mktime((dt.year, dt.month, dt.day, dt.hour,
|
||||
# dt.minute, dt.second, dt.weekday(), 0, -1))
|
||||
# return time.localtime(timestamp).tm_isdst
|
||||
#
|
||||
# The code above yields the following result:
|
||||
#
|
||||
#>>> import tz, datetime
|
||||
#>>> t = tz.tzlocal()
|
||||
#>>> datetime.datetime(2003,2,15,23,tzinfo=t).tzname()
|
||||
#'BRDT'
|
||||
#>>> datetime.datetime(2003,2,16,0,tzinfo=t).tzname()
|
||||
#'BRST'
|
||||
#>>> datetime.datetime(2003,2,15,23,tzinfo=t).tzname()
|
||||
#'BRST'
|
||||
#>>> datetime.datetime(2003,2,15,22,tzinfo=t).tzname()
|
||||
#'BRDT'
|
||||
#>>> datetime.datetime(2003,2,15,23,tzinfo=t).tzname()
|
||||
#'BRDT'
|
||||
#
|
||||
# Here is a more stable implementation:
|
||||
#
|
||||
timestamp = ((dt.toordinal() - EPOCHORDINAL) * 86400
|
||||
+ dt.hour * 3600
|
||||
+ dt.minute * 60
|
||||
+ dt.second)
|
||||
return time.localtime(timestamp+time.timezone).tm_isdst
|
||||
|
||||
def __eq__(self, other):
|
||||
if not isinstance(other, tzlocal):
|
||||
return False
|
||||
return (self._std_offset == other._std_offset and
|
||||
self._dst_offset == other._dst_offset)
|
||||
return True
|
||||
|
||||
def __ne__(self, other):
|
||||
return not self.__eq__(other)
|
||||
|
||||
def __repr__(self):
|
||||
return "%s()" % self.__class__.__name__
|
||||
|
||||
__reduce__ = object.__reduce__
|
||||
|
||||
class _ttinfo(object):
|
||||
__slots__ = ["offset", "delta", "isdst", "abbr", "isstd", "isgmt"]
|
||||
|
||||
def __init__(self):
|
||||
for attr in self.__slots__:
|
||||
setattr(self, attr, None)
|
||||
|
||||
def __repr__(self):
|
||||
l = []
|
||||
for attr in self.__slots__:
|
||||
value = getattr(self, attr)
|
||||
if value is not None:
|
||||
l.append("%s=%s" % (attr, `value`))
|
||||
return "%s(%s)" % (self.__class__.__name__, ", ".join(l))
|
||||
|
||||
def __eq__(self, other):
|
||||
if not isinstance(other, _ttinfo):
|
||||
return False
|
||||
return (self.offset == other.offset and
|
||||
self.delta == other.delta and
|
||||
self.isdst == other.isdst and
|
||||
self.abbr == other.abbr and
|
||||
self.isstd == other.isstd and
|
||||
self.isgmt == other.isgmt)
|
||||
|
||||
def __ne__(self, other):
|
||||
return not self.__eq__(other)
|
||||
|
||||
def __getstate__(self):
|
||||
state = {}
|
||||
for name in self.__slots__:
|
||||
state[name] = getattr(self, name, None)
|
||||
return state
|
||||
|
||||
def __setstate__(self, state):
|
||||
for name in self.__slots__:
|
||||
if name in state:
|
||||
setattr(self, name, state[name])
|
||||
|
||||
class tzfile(datetime.tzinfo):
|
||||
|
||||
# http://www.twinsun.com/tz/tz-link.htm
|
||||
# ftp://elsie.nci.nih.gov/pub/tz*.tar.gz
|
||||
|
||||
def __init__(self, fileobj):
|
||||
if isinstance(fileobj, basestring):
|
||||
self._filename = fileobj
|
||||
fileobj = open(fileobj)
|
||||
elif hasattr(fileobj, "name"):
|
||||
self._filename = fileobj.name
|
||||
else:
|
||||
self._filename = `fileobj`
|
||||
|
||||
# From tzfile(5):
|
||||
#
|
||||
# The time zone information files used by tzset(3)
|
||||
# begin with the magic characters "TZif" to identify
|
||||
# them as time zone information files, followed by
|
||||
# sixteen bytes reserved for future use, followed by
|
||||
# six four-byte values of type long, written in a
|
||||
# ``standard'' byte order (the high-order byte
|
||||
# of the value is written first).
|
||||
|
||||
if fileobj.read(4) != "TZif":
|
||||
raise ValueError, "magic not found"
|
||||
|
||||
fileobj.read(16)
|
||||
|
||||
(
|
||||
# The number of UTC/local indicators stored in the file.
|
||||
ttisgmtcnt,
|
||||
|
||||
# The number of standard/wall indicators stored in the file.
|
||||
ttisstdcnt,
|
||||
|
||||
# The number of leap seconds for which data is
|
||||
# stored in the file.
|
||||
leapcnt,
|
||||
|
||||
# The number of "transition times" for which data
|
||||
# is stored in the file.
|
||||
timecnt,
|
||||
|
||||
# The number of "local time types" for which data
|
||||
# is stored in the file (must not be zero).
|
||||
typecnt,
|
||||
|
||||
# The number of characters of "time zone
|
||||
# abbreviation strings" stored in the file.
|
||||
charcnt,
|
||||
|
||||
) = struct.unpack(">6l", fileobj.read(24))
|
||||
|
||||
# The above header is followed by tzh_timecnt four-byte
|
||||
# values of type long, sorted in ascending order.
|
||||
# These values are written in ``standard'' byte order.
|
||||
# Each is used as a transition time (as returned by
|
||||
# time(2)) at which the rules for computing local time
|
||||
# change.
|
||||
|
||||
if timecnt:
|
||||
self._trans_list = struct.unpack(">%dl" % timecnt,
|
||||
fileobj.read(timecnt*4))
|
||||
else:
|
||||
self._trans_list = []
|
||||
|
||||
# Next come tzh_timecnt one-byte values of type unsigned
|
||||
# char; each one tells which of the different types of
|
||||
# ``local time'' types described in the file is associated
|
||||
# with the same-indexed transition time. These values
|
||||
# serve as indices into an array of ttinfo structures that
|
||||
# appears next in the file.
|
||||
|
||||
if timecnt:
|
||||
self._trans_idx = struct.unpack(">%dB" % timecnt,
|
||||
fileobj.read(timecnt))
|
||||
else:
|
||||
self._trans_idx = []
|
||||
|
||||
# Each ttinfo structure is written as a four-byte value
|
||||
# for tt_gmtoff of type long, in a standard byte
|
||||
# order, followed by a one-byte value for tt_isdst
|
||||
# and a one-byte value for tt_abbrind. In each
|
||||
# structure, tt_gmtoff gives the number of
|
||||
# seconds to be added to UTC, tt_isdst tells whether
|
||||
# tm_isdst should be set by localtime(3), and
|
||||
# tt_abbrind serves as an index into the array of
|
||||
# time zone abbreviation characters that follow the
|
||||
# ttinfo structure(s) in the file.
|
||||
|
||||
ttinfo = []
|
||||
|
||||
for i in range(typecnt):
|
||||
ttinfo.append(struct.unpack(">lbb", fileobj.read(6)))
|
||||
|
||||
abbr = fileobj.read(charcnt)
|
||||
|
||||
# Then there are tzh_leapcnt pairs of four-byte
|
||||
# values, written in standard byte order; the
|
||||
# first value of each pair gives the time (as
|
||||
# returned by time(2)) at which a leap second
|
||||
# occurs; the second gives the total number of
|
||||
# leap seconds to be applied after the given time.
|
||||
# The pairs of values are sorted in ascending order
|
||||
# by time.
|
||||
|
||||
# Not used, for now
|
||||
if leapcnt:
|
||||
leap = struct.unpack(">%dl" % (leapcnt*2),
|
||||
fileobj.read(leapcnt*8))
|
||||
|
||||
# Then there are tzh_ttisstdcnt standard/wall
|
||||
# indicators, each stored as a one-byte value;
|
||||
# they tell whether the transition times associated
|
||||
# with local time types were specified as standard
|
||||
# time or wall clock time, and are used when
|
||||
# a time zone file is used in handling POSIX-style
|
||||
# time zone environment variables.
|
||||
|
||||
if ttisstdcnt:
|
||||
isstd = struct.unpack(">%db" % ttisstdcnt,
|
||||
fileobj.read(ttisstdcnt))
|
||||
|
||||
# Finally, there are tzh_ttisgmtcnt UTC/local
|
||||
# indicators, each stored as a one-byte value;
|
||||
# they tell whether the transition times associated
|
||||
# with local time types were specified as UTC or
|
||||
# local time, and are used when a time zone file
|
||||
# is used in handling POSIX-style time zone envi-
|
||||
# ronment variables.
|
||||
|
||||
if ttisgmtcnt:
|
||||
isgmt = struct.unpack(">%db" % ttisgmtcnt,
|
||||
fileobj.read(ttisgmtcnt))
|
||||
|
||||
# ** Everything has been read **
|
||||
|
||||
# Build ttinfo list
|
||||
self._ttinfo_list = []
|
||||
for i in range(typecnt):
|
||||
gmtoff, isdst, abbrind = ttinfo[i]
|
||||
# Round to full-minutes if that's not the case. Python's
|
||||
# datetime doesn't accept sub-minute timezones. Check
|
||||
# http://python.org/sf/1447945 for some information.
|
||||
gmtoff = (gmtoff+30)//60*60
|
||||
tti = _ttinfo()
|
||||
tti.offset = gmtoff
|
||||
tti.delta = datetime.timedelta(seconds=gmtoff)
|
||||
tti.isdst = isdst
|
||||
tti.abbr = abbr[abbrind:abbr.find('\x00', abbrind)]
|
||||
tti.isstd = (ttisstdcnt > i and isstd[i] != 0)
|
||||
tti.isgmt = (ttisgmtcnt > i and isgmt[i] != 0)
|
||||
self._ttinfo_list.append(tti)
|
||||
|
||||
# Replace ttinfo indexes for ttinfo objects.
|
||||
trans_idx = []
|
||||
for idx in self._trans_idx:
|
||||
trans_idx.append(self._ttinfo_list[idx])
|
||||
self._trans_idx = tuple(trans_idx)
|
||||
|
||||
# Set standard, dst, and before ttinfos. before will be
|
||||
# used when a given time is before any transitions,
|
||||
# and will be set to the first non-dst ttinfo, or to
|
||||
# the first dst, if all of them are dst.
|
||||
self._ttinfo_std = None
|
||||
self._ttinfo_dst = None
|
||||
self._ttinfo_before = None
|
||||
if self._ttinfo_list:
|
||||
if not self._trans_list:
|
||||
self._ttinfo_std = self._ttinfo_first = self._ttinfo_list[0]
|
||||
else:
|
||||
for i in range(timecnt-1,-1,-1):
|
||||
tti = self._trans_idx[i]
|
||||
if not self._ttinfo_std and not tti.isdst:
|
||||
self._ttinfo_std = tti
|
||||
elif not self._ttinfo_dst and tti.isdst:
|
||||
self._ttinfo_dst = tti
|
||||
if self._ttinfo_std and self._ttinfo_dst:
|
||||
break
|
||||
else:
|
||||
if self._ttinfo_dst and not self._ttinfo_std:
|
||||
self._ttinfo_std = self._ttinfo_dst
|
||||
|
||||
for tti in self._ttinfo_list:
|
||||
if not tti.isdst:
|
||||
self._ttinfo_before = tti
|
||||
break
|
||||
else:
|
||||
self._ttinfo_before = self._ttinfo_list[0]
|
||||
|
||||
# Now fix transition times to become relative to wall time.
|
||||
#
|
||||
# I'm not sure about this. In my tests, the tz source file
|
||||
# is setup to wall time, and in the binary file isstd and
|
||||
# isgmt are off, so it should be in wall time. OTOH, it's
|
||||
# always in gmt time. Let me know if you have comments
|
||||
# about this.
|
||||
laststdoffset = 0
|
||||
self._trans_list = list(self._trans_list)
|
||||
for i in range(len(self._trans_list)):
|
||||
tti = self._trans_idx[i]
|
||||
if not tti.isdst:
|
||||
# This is std time.
|
||||
self._trans_list[i] += tti.offset
|
||||
laststdoffset = tti.offset
|
||||
else:
|
||||
# This is dst time. Convert to std.
|
||||
self._trans_list[i] += laststdoffset
|
||||
self._trans_list = tuple(self._trans_list)
|
||||
|
||||
def _find_ttinfo(self, dt, laststd=0):
|
||||
timestamp = ((dt.toordinal() - EPOCHORDINAL) * 86400
|
||||
+ dt.hour * 3600
|
||||
+ dt.minute * 60
|
||||
+ dt.second)
|
||||
idx = 0
|
||||
for trans in self._trans_list:
|
||||
if timestamp < trans:
|
||||
break
|
||||
idx += 1
|
||||
else:
|
||||
return self._ttinfo_std
|
||||
if idx == 0:
|
||||
return self._ttinfo_before
|
||||
if laststd:
|
||||
while idx > 0:
|
||||
tti = self._trans_idx[idx-1]
|
||||
if not tti.isdst:
|
||||
return tti
|
||||
idx -= 1
|
||||
else:
|
||||
return self._ttinfo_std
|
||||
else:
|
||||
return self._trans_idx[idx-1]
|
||||
|
||||
def utcoffset(self, dt):
|
||||
if not self._ttinfo_std:
|
||||
return ZERO
|
||||
return self._find_ttinfo(dt).delta
|
||||
|
||||
def dst(self, dt):
|
||||
if not self._ttinfo_dst:
|
||||
return ZERO
|
||||
tti = self._find_ttinfo(dt)
|
||||
if not tti.isdst:
|
||||
return ZERO
|
||||
|
||||
# The documentation says that utcoffset()-dst() must
|
||||
# be constant for every dt.
|
||||
return tti.delta-self._find_ttinfo(dt, laststd=1).delta
|
||||
|
||||
# An alternative for that would be:
|
||||
#
|
||||
# return self._ttinfo_dst.offset-self._ttinfo_std.offset
|
||||
#
|
||||
# However, this class stores historical changes in the
|
||||
# dst offset, so I belive that this wouldn't be the right
|
||||
# way to implement this.
|
||||
|
||||
def tzname(self, dt):
|
||||
if not self._ttinfo_std:
|
||||
return None
|
||||
return self._find_ttinfo(dt).abbr
|
||||
|
||||
def __eq__(self, other):
|
||||
if not isinstance(other, tzfile):
|
||||
return False
|
||||
return (self._trans_list == other._trans_list and
|
||||
self._trans_idx == other._trans_idx and
|
||||
self._ttinfo_list == other._ttinfo_list)
|
||||
|
||||
def __ne__(self, other):
|
||||
return not self.__eq__(other)
|
||||
|
||||
|
||||
def __repr__(self):
|
||||
return "%s(%s)" % (self.__class__.__name__, `self._filename`)
|
||||
|
||||
def __reduce__(self):
|
||||
if not os.path.isfile(self._filename):
|
||||
raise ValueError, "Unpickable %s class" % self.__class__.__name__
|
||||
return (self.__class__, (self._filename,))
|
||||
|
||||
class tzrange(datetime.tzinfo):
|
||||
|
||||
def __init__(self, stdabbr, stdoffset=None,
|
||||
dstabbr=None, dstoffset=None,
|
||||
start=None, end=None):
|
||||
global relativedelta
|
||||
if not relativedelta:
|
||||
from dateutil import relativedelta
|
||||
self._std_abbr = stdabbr
|
||||
self._dst_abbr = dstabbr
|
||||
if stdoffset is not None:
|
||||
self._std_offset = datetime.timedelta(seconds=stdoffset)
|
||||
else:
|
||||
self._std_offset = ZERO
|
||||
if dstoffset is not None:
|
||||
self._dst_offset = datetime.timedelta(seconds=dstoffset)
|
||||
elif dstabbr and stdoffset is not None:
|
||||
self._dst_offset = self._std_offset+datetime.timedelta(hours=+1)
|
||||
else:
|
||||
self._dst_offset = ZERO
|
||||
if dstabbr and start is None:
|
||||
self._start_delta = relativedelta.relativedelta(
|
||||
hours=+2, month=4, day=1, weekday=relativedelta.SU(+1))
|
||||
else:
|
||||
self._start_delta = start
|
||||
if dstabbr and end is None:
|
||||
self._end_delta = relativedelta.relativedelta(
|
||||
hours=+1, month=10, day=31, weekday=relativedelta.SU(-1))
|
||||
else:
|
||||
self._end_delta = end
|
||||
|
||||
def utcoffset(self, dt):
|
||||
if self._isdst(dt):
|
||||
return self._dst_offset
|
||||
else:
|
||||
return self._std_offset
|
||||
|
||||
def dst(self, dt):
|
||||
if self._isdst(dt):
|
||||
return self._dst_offset-self._std_offset
|
||||
else:
|
||||
return ZERO
|
||||
|
||||
def tzname(self, dt):
|
||||
if self._isdst(dt):
|
||||
return self._dst_abbr
|
||||
else:
|
||||
return self._std_abbr
|
||||
|
||||
def _isdst(self, dt):
|
||||
if not self._start_delta:
|
||||
return False
|
||||
year = datetime.datetime(dt.year,1,1)
|
||||
start = year+self._start_delta
|
||||
end = year+self._end_delta
|
||||
dt = dt.replace(tzinfo=None)
|
||||
if start < end:
|
||||
return dt >= start and dt < end
|
||||
else:
|
||||
return dt >= start or dt < end
|
||||
|
||||
def __eq__(self, other):
|
||||
if not isinstance(other, tzrange):
|
||||
return False
|
||||
return (self._std_abbr == other._std_abbr and
|
||||
self._dst_abbr == other._dst_abbr and
|
||||
self._std_offset == other._std_offset and
|
||||
self._dst_offset == other._dst_offset and
|
||||
self._start_delta == other._start_delta and
|
||||
self._end_delta == other._end_delta)
|
||||
|
||||
def __ne__(self, other):
|
||||
return not self.__eq__(other)
|
||||
|
||||
def __repr__(self):
|
||||
return "%s(...)" % self.__class__.__name__
|
||||
|
||||
__reduce__ = object.__reduce__
|
||||
|
||||
class tzstr(tzrange):
|
||||
|
||||
def __init__(self, s):
|
||||
global parser
|
||||
if not parser:
|
||||
from dateutil import parser
|
||||
self._s = s
|
||||
|
||||
res = parser._parsetz(s)
|
||||
if res is None:
|
||||
raise ValueError, "unknown string format"
|
||||
|
||||
# Here we break the compatibility with the TZ variable handling.
|
||||
# GMT-3 actually *means* the timezone -3.
|
||||
if res.stdabbr in ("GMT", "UTC"):
|
||||
res.stdoffset *= -1
|
||||
|
||||
# We must initialize it first, since _delta() needs
|
||||
# _std_offset and _dst_offset set. Use False in start/end
|
||||
# to avoid building it two times.
|
||||
tzrange.__init__(self, res.stdabbr, res.stdoffset,
|
||||
res.dstabbr, res.dstoffset,
|
||||
start=False, end=False)
|
||||
|
||||
if not res.dstabbr:
|
||||
self._start_delta = None
|
||||
self._end_delta = None
|
||||
else:
|
||||
self._start_delta = self._delta(res.start)
|
||||
if self._start_delta:
|
||||
self._end_delta = self._delta(res.end, isend=1)
|
||||
|
||||
def _delta(self, x, isend=0):
|
||||
kwargs = {}
|
||||
if x.month is not None:
|
||||
kwargs["month"] = x.month
|
||||
if x.weekday is not None:
|
||||
kwargs["weekday"] = relativedelta.weekday(x.weekday, x.week)
|
||||
if x.week > 0:
|
||||
kwargs["day"] = 1
|
||||
else:
|
||||
kwargs["day"] = 31
|
||||
elif x.day:
|
||||
kwargs["day"] = x.day
|
||||
elif x.yday is not None:
|
||||
kwargs["yearday"] = x.yday
|
||||
elif x.jyday is not None:
|
||||
kwargs["nlyearday"] = x.jyday
|
||||
if not kwargs:
|
||||
# Default is to start on first sunday of april, and end
|
||||
# on last sunday of october.
|
||||
if not isend:
|
||||
kwargs["month"] = 4
|
||||
kwargs["day"] = 1
|
||||
kwargs["weekday"] = relativedelta.SU(+1)
|
||||
else:
|
||||
kwargs["month"] = 10
|
||||
kwargs["day"] = 31
|
||||
kwargs["weekday"] = relativedelta.SU(-1)
|
||||
if x.time is not None:
|
||||
kwargs["seconds"] = x.time
|
||||
else:
|
||||
# Default is 2AM.
|
||||
kwargs["seconds"] = 7200
|
||||
if isend:
|
||||
# Convert to standard time, to follow the documented way
|
||||
# of working with the extra hour. See the documentation
|
||||
# of the tzinfo class.
|
||||
delta = self._dst_offset-self._std_offset
|
||||
kwargs["seconds"] -= delta.seconds+delta.days*86400
|
||||
return relativedelta.relativedelta(**kwargs)
|
||||
|
||||
def __repr__(self):
|
||||
return "%s(%s)" % (self.__class__.__name__, `self._s`)
|
||||
|
||||
class _tzicalvtzcomp:
|
||||
def __init__(self, tzoffsetfrom, tzoffsetto, isdst,
|
||||
tzname=None, rrule=None):
|
||||
self.tzoffsetfrom = datetime.timedelta(seconds=tzoffsetfrom)
|
||||
self.tzoffsetto = datetime.timedelta(seconds=tzoffsetto)
|
||||
self.tzoffsetdiff = self.tzoffsetto-self.tzoffsetfrom
|
||||
self.isdst = isdst
|
||||
self.tzname = tzname
|
||||
self.rrule = rrule
|
||||
|
||||
class _tzicalvtz(datetime.tzinfo):
|
||||
def __init__(self, tzid, comps=[]):
|
||||
self._tzid = tzid
|
||||
self._comps = comps
|
||||
self._cachedate = []
|
||||
self._cachecomp = []
|
||||
|
||||
def _find_comp(self, dt):
|
||||
if len(self._comps) == 1:
|
||||
return self._comps[0]
|
||||
dt = dt.replace(tzinfo=None)
|
||||
try:
|
||||
return self._cachecomp[self._cachedate.index(dt)]
|
||||
except ValueError:
|
||||
pass
|
||||
lastcomp = None
|
||||
lastcompdt = None
|
||||
for comp in self._comps:
|
||||
if not comp.isdst:
|
||||
# Handle the extra hour in DST -> STD
|
||||
compdt = comp.rrule.before(dt-comp.tzoffsetdiff, inc=True)
|
||||
else:
|
||||
compdt = comp.rrule.before(dt, inc=True)
|
||||
if compdt and (not lastcompdt or lastcompdt < compdt):
|
||||
lastcompdt = compdt
|
||||
lastcomp = comp
|
||||
if not lastcomp:
|
||||
# RFC says nothing about what to do when a given
|
||||
# time is before the first onset date. We'll look for the
|
||||
# first standard component, or the first component, if
|
||||
# none is found.
|
||||
for comp in self._comps:
|
||||
if not comp.isdst:
|
||||
lastcomp = comp
|
||||
break
|
||||
else:
|
||||
lastcomp = comp[0]
|
||||
self._cachedate.insert(0, dt)
|
||||
self._cachecomp.insert(0, lastcomp)
|
||||
if len(self._cachedate) > 10:
|
||||
self._cachedate.pop()
|
||||
self._cachecomp.pop()
|
||||
return lastcomp
|
||||
|
||||
def utcoffset(self, dt):
|
||||
return self._find_comp(dt).tzoffsetto
|
||||
|
||||
def dst(self, dt):
|
||||
comp = self._find_comp(dt)
|
||||
if comp.isdst:
|
||||
return comp.tzoffsetdiff
|
||||
else:
|
||||
return ZERO
|
||||
|
||||
def tzname(self, dt):
|
||||
return self._find_comp(dt).tzname
|
||||
|
||||
def __repr__(self):
|
||||
return "<tzicalvtz %s>" % `self._tzid`
|
||||
|
||||
__reduce__ = object.__reduce__
|
||||
|
||||
class tzical:
|
||||
def __init__(self, fileobj):
|
||||
global rrule
|
||||
if not rrule:
|
||||
from dateutil import rrule
|
||||
|
||||
if isinstance(fileobj, basestring):
|
||||
self._s = fileobj
|
||||
fileobj = open(fileobj)
|
||||
elif hasattr(fileobj, "name"):
|
||||
self._s = fileobj.name
|
||||
else:
|
||||
self._s = `fileobj`
|
||||
|
||||
self._vtz = {}
|
||||
|
||||
self._parse_rfc(fileobj.read())
|
||||
|
||||
def keys(self):
|
||||
return self._vtz.keys()
|
||||
|
||||
def get(self, tzid=None):
|
||||
if tzid is None:
|
||||
keys = self._vtz.keys()
|
||||
if len(keys) == 0:
|
||||
raise ValueError, "no timezones defined"
|
||||
elif len(keys) > 1:
|
||||
raise ValueError, "more than one timezone available"
|
||||
tzid = keys[0]
|
||||
return self._vtz.get(tzid)
|
||||
|
||||
def _parse_offset(self, s):
|
||||
s = s.strip()
|
||||
if not s:
|
||||
raise ValueError, "empty offset"
|
||||
if s[0] in ('+', '-'):
|
||||
signal = (-1,+1)[s[0]=='+']
|
||||
s = s[1:]
|
||||
else:
|
||||
signal = +1
|
||||
if len(s) == 4:
|
||||
return (int(s[:2])*3600+int(s[2:])*60)*signal
|
||||
elif len(s) == 6:
|
||||
return (int(s[:2])*3600+int(s[2:4])*60+int(s[4:]))*signal
|
||||
else:
|
||||
raise ValueError, "invalid offset: "+s
|
||||
|
||||
def _parse_rfc(self, s):
|
||||
lines = s.splitlines()
|
||||
if not lines:
|
||||
raise ValueError, "empty string"
|
||||
|
||||
# Unfold
|
||||
i = 0
|
||||
while i < len(lines):
|
||||
line = lines[i].rstrip()
|
||||
if not line:
|
||||
del lines[i]
|
||||
elif i > 0 and line[0] in (" ", "\t"):
|
||||
lines[i-1] += line[1:]
|
||||
del lines[i]
|
||||
else:
|
||||
i += 1
|
||||
|
||||
tzid = None
|
||||
comps = []
|
||||
invtz = False
|
||||
comptype = None
|
||||
for line in lines:
|
||||
if not line:
|
||||
continue
|
||||
name, value = line.split(':', 1)
|
||||
parms = name.split(';')
|
||||
if not parms:
|
||||
raise ValueError, "empty property name"
|
||||
name = parms[0].upper()
|
||||
parms = parms[1:]
|
||||
if invtz:
|
||||
if name == "BEGIN":
|
||||
if value in ("STANDARD", "DAYLIGHT"):
|
||||
# Process component
|
||||
pass
|
||||
else:
|
||||
raise ValueError, "unknown component: "+value
|
||||
comptype = value
|
||||
founddtstart = False
|
||||
tzoffsetfrom = None
|
||||
tzoffsetto = None
|
||||
rrulelines = []
|
||||
tzname = None
|
||||
elif name == "END":
|
||||
if value == "VTIMEZONE":
|
||||
if comptype:
|
||||
raise ValueError, \
|
||||
"component not closed: "+comptype
|
||||
if not tzid:
|
||||
raise ValueError, \
|
||||
"mandatory TZID not found"
|
||||
if not comps:
|
||||
raise ValueError, \
|
||||
"at least one component is needed"
|
||||
# Process vtimezone
|
||||
self._vtz[tzid] = _tzicalvtz(tzid, comps)
|
||||
invtz = False
|
||||
elif value == comptype:
|
||||
if not founddtstart:
|
||||
raise ValueError, \
|
||||
"mandatory DTSTART not found"
|
||||
if tzoffsetfrom is None:
|
||||
raise ValueError, \
|
||||
"mandatory TZOFFSETFROM not found"
|
||||
if tzoffsetto is None:
|
||||
raise ValueError, \
|
||||
"mandatory TZOFFSETFROM not found"
|
||||
# Process component
|
||||
rr = None
|
||||
if rrulelines:
|
||||
rr = rrule.rrulestr("\n".join(rrulelines),
|
||||
compatible=True,
|
||||
ignoretz=True,
|
||||
cache=True)
|
||||
comp = _tzicalvtzcomp(tzoffsetfrom, tzoffsetto,
|
||||
(comptype == "DAYLIGHT"),
|
||||
tzname, rr)
|
||||
comps.append(comp)
|
||||
comptype = None
|
||||
else:
|
||||
raise ValueError, \
|
||||
"invalid component end: "+value
|
||||
elif comptype:
|
||||
if name == "DTSTART":
|
||||
rrulelines.append(line)
|
||||
founddtstart = True
|
||||
elif name in ("RRULE", "RDATE", "EXRULE", "EXDATE"):
|
||||
rrulelines.append(line)
|
||||
elif name == "TZOFFSETFROM":
|
||||
if parms:
|
||||
raise ValueError, \
|
||||
"unsupported %s parm: %s "%(name, parms[0])
|
||||
tzoffsetfrom = self._parse_offset(value)
|
||||
elif name == "TZOFFSETTO":
|
||||
if parms:
|
||||
raise ValueError, \
|
||||
"unsupported TZOFFSETTO parm: "+parms[0]
|
||||
tzoffsetto = self._parse_offset(value)
|
||||
elif name == "TZNAME":
|
||||
if parms:
|
||||
raise ValueError, \
|
||||
"unsupported TZNAME parm: "+parms[0]
|
||||
tzname = value
|
||||
elif name == "COMMENT":
|
||||
pass
|
||||
elif name.upper().startswith('X-'):
|
||||
# Ignore experimental properties.
|
||||
pass
|
||||
else:
|
||||
raise ValueError, "unsupported property: "+name
|
||||
else:
|
||||
if name == "TZID":
|
||||
for p in parms:
|
||||
if not p.upper().startswith('X-'):
|
||||
raise ValueError, \
|
||||
"unsupported TZID parm: "+p
|
||||
tzid = value
|
||||
elif name in ("TZURL", "LAST-MODIFIED", "COMMENT"):
|
||||
pass
|
||||
elif name.upper().startswith('X-'):
|
||||
# Ignore experimental properties.
|
||||
pass
|
||||
else:
|
||||
raise ValueError, "unsupported property: "+name
|
||||
elif name == "BEGIN" and value == "VTIMEZONE":
|
||||
tzid = None
|
||||
comps = []
|
||||
invtz = True
|
||||
|
||||
def __repr__(self):
|
||||
return "%s(%s)" % (self.__class__.__name__, `self._s`)
|
||||
|
||||
if sys.platform != "win32":
|
||||
TZFILES = ["/etc/localtime", "localtime"]
|
||||
TZPATHS = ["/usr/share/zoneinfo", "/usr/lib/zoneinfo", "/etc/zoneinfo"]
|
||||
else:
|
||||
TZFILES = []
|
||||
TZPATHS = []
|
||||
|
||||
def gettz(name=None):
|
||||
tz = None
|
||||
if not name:
|
||||
try:
|
||||
name = os.environ["TZ"]
|
||||
except KeyError:
|
||||
pass
|
||||
if name is None or name == ":":
|
||||
for filepath in TZFILES:
|
||||
if not os.path.isabs(filepath):
|
||||
filename = filepath
|
||||
for path in TZPATHS:
|
||||
filepath = os.path.join(path, filename)
|
||||
if os.path.isfile(filepath):
|
||||
break
|
||||
else:
|
||||
continue
|
||||
if os.path.isfile(filepath):
|
||||
try:
|
||||
tz = tzfile(filepath)
|
||||
break
|
||||
except (IOError, OSError, ValueError):
|
||||
pass
|
||||
else:
|
||||
tz = tzlocal()
|
||||
else:
|
||||
if name.startswith(":"):
|
||||
name = name[:-1]
|
||||
if os.path.isabs(name):
|
||||
if os.path.isfile(name):
|
||||
tz = tzfile(name)
|
||||
else:
|
||||
tz = None
|
||||
else:
|
||||
for path in TZPATHS:
|
||||
filepath = os.path.join(path, name)
|
||||
if not os.path.isfile(filepath):
|
||||
filepath = filepath.replace(' ','_')
|
||||
if not os.path.isfile(filepath):
|
||||
continue
|
||||
try:
|
||||
tz = tzfile(filepath)
|
||||
break
|
||||
except (IOError, OSError, ValueError):
|
||||
pass
|
||||
else:
|
||||
tz = None
|
||||
if tzwin:
|
||||
try:
|
||||
tz = tzwin(name)
|
||||
except OSError:
|
||||
pass
|
||||
if not tz:
|
||||
from dateutil.zoneinfo import gettz
|
||||
tz = gettz(name)
|
||||
if not tz:
|
||||
for c in name:
|
||||
# name must have at least one offset to be a tzstr
|
||||
if c in "0123456789":
|
||||
try:
|
||||
tz = tzstr(name)
|
||||
except ValueError:
|
||||
pass
|
||||
break
|
||||
else:
|
||||
if name in ("GMT", "UTC"):
|
||||
tz = tzutc()
|
||||
elif name in time.tzname:
|
||||
tz = tzlocal()
|
||||
return tz
|
||||
|
||||
# vim:ts=4:sw=4:et
|
||||
180
lib/dateutil/tzwin.py
Normal file
180
lib/dateutil/tzwin.py
Normal file
@@ -0,0 +1,180 @@
|
||||
# This code was originally contributed by Jeffrey Harris.
|
||||
import datetime
|
||||
import struct
|
||||
import _winreg
|
||||
|
||||
__author__ = "Jeffrey Harris & Gustavo Niemeyer <gustavo@niemeyer.net>"
|
||||
|
||||
__all__ = ["tzwin", "tzwinlocal"]
|
||||
|
||||
ONEWEEK = datetime.timedelta(7)
|
||||
|
||||
TZKEYNAMENT = r"SOFTWARE\Microsoft\Windows NT\CurrentVersion\Time Zones"
|
||||
TZKEYNAME9X = r"SOFTWARE\Microsoft\Windows\CurrentVersion\Time Zones"
|
||||
TZLOCALKEYNAME = r"SYSTEM\CurrentControlSet\Control\TimeZoneInformation"
|
||||
|
||||
def _settzkeyname():
|
||||
global TZKEYNAME
|
||||
handle = _winreg.ConnectRegistry(None, _winreg.HKEY_LOCAL_MACHINE)
|
||||
try:
|
||||
_winreg.OpenKey(handle, TZKEYNAMENT).Close()
|
||||
TZKEYNAME = TZKEYNAMENT
|
||||
except WindowsError:
|
||||
TZKEYNAME = TZKEYNAME9X
|
||||
handle.Close()
|
||||
|
||||
_settzkeyname()
|
||||
|
||||
class tzwinbase(datetime.tzinfo):
|
||||
"""tzinfo class based on win32's timezones available in the registry."""
|
||||
|
||||
def utcoffset(self, dt):
|
||||
if self._isdst(dt):
|
||||
return datetime.timedelta(minutes=self._dstoffset)
|
||||
else:
|
||||
return datetime.timedelta(minutes=self._stdoffset)
|
||||
|
||||
def dst(self, dt):
|
||||
if self._isdst(dt):
|
||||
minutes = self._dstoffset - self._stdoffset
|
||||
return datetime.timedelta(minutes=minutes)
|
||||
else:
|
||||
return datetime.timedelta(0)
|
||||
|
||||
def tzname(self, dt):
|
||||
if self._isdst(dt):
|
||||
return self._dstname
|
||||
else:
|
||||
return self._stdname
|
||||
|
||||
def list():
|
||||
"""Return a list of all time zones known to the system."""
|
||||
handle = _winreg.ConnectRegistry(None, _winreg.HKEY_LOCAL_MACHINE)
|
||||
tzkey = _winreg.OpenKey(handle, TZKEYNAME)
|
||||
result = [_winreg.EnumKey(tzkey, i)
|
||||
for i in range(_winreg.QueryInfoKey(tzkey)[0])]
|
||||
tzkey.Close()
|
||||
handle.Close()
|
||||
return result
|
||||
list = staticmethod(list)
|
||||
|
||||
def display(self):
|
||||
return self._display
|
||||
|
||||
def _isdst(self, dt):
|
||||
dston = picknthweekday(dt.year, self._dstmonth, self._dstdayofweek,
|
||||
self._dsthour, self._dstminute,
|
||||
self._dstweeknumber)
|
||||
dstoff = picknthweekday(dt.year, self._stdmonth, self._stddayofweek,
|
||||
self._stdhour, self._stdminute,
|
||||
self._stdweeknumber)
|
||||
if dston < dstoff:
|
||||
return dston <= dt.replace(tzinfo=None) < dstoff
|
||||
else:
|
||||
return not dstoff <= dt.replace(tzinfo=None) < dston
|
||||
|
||||
|
||||
class tzwin(tzwinbase):
|
||||
|
||||
def __init__(self, name):
|
||||
self._name = name
|
||||
|
||||
handle = _winreg.ConnectRegistry(None, _winreg.HKEY_LOCAL_MACHINE)
|
||||
tzkey = _winreg.OpenKey(handle, "%s\%s" % (TZKEYNAME, name))
|
||||
keydict = valuestodict(tzkey)
|
||||
tzkey.Close()
|
||||
handle.Close()
|
||||
|
||||
self._stdname = keydict["Std"].encode("iso-8859-1")
|
||||
self._dstname = keydict["Dlt"].encode("iso-8859-1")
|
||||
|
||||
self._display = keydict["Display"]
|
||||
|
||||
# See http://ww_winreg.jsiinc.com/SUBA/tip0300/rh0398.htm
|
||||
tup = struct.unpack("=3l16h", keydict["TZI"])
|
||||
self._stdoffset = -tup[0]-tup[1] # Bias + StandardBias * -1
|
||||
self._dstoffset = self._stdoffset-tup[2] # + DaylightBias * -1
|
||||
|
||||
(self._stdmonth,
|
||||
self._stddayofweek, # Sunday = 0
|
||||
self._stdweeknumber, # Last = 5
|
||||
self._stdhour,
|
||||
self._stdminute) = tup[4:9]
|
||||
|
||||
(self._dstmonth,
|
||||
self._dstdayofweek, # Sunday = 0
|
||||
self._dstweeknumber, # Last = 5
|
||||
self._dsthour,
|
||||
self._dstminute) = tup[12:17]
|
||||
|
||||
def __repr__(self):
|
||||
return "tzwin(%s)" % repr(self._name)
|
||||
|
||||
def __reduce__(self):
|
||||
return (self.__class__, (self._name,))
|
||||
|
||||
|
||||
class tzwinlocal(tzwinbase):
|
||||
|
||||
def __init__(self):
|
||||
|
||||
handle = _winreg.ConnectRegistry(None, _winreg.HKEY_LOCAL_MACHINE)
|
||||
|
||||
tzlocalkey = _winreg.OpenKey(handle, TZLOCALKEYNAME)
|
||||
keydict = valuestodict(tzlocalkey)
|
||||
tzlocalkey.Close()
|
||||
|
||||
self._stdname = keydict["StandardName"].encode("iso-8859-1")
|
||||
self._dstname = keydict["DaylightName"].encode("iso-8859-1")
|
||||
|
||||
try:
|
||||
tzkey = _winreg.OpenKey(handle, "%s\%s"%(TZKEYNAME, self._stdname))
|
||||
_keydict = valuestodict(tzkey)
|
||||
self._display = _keydict["Display"]
|
||||
tzkey.Close()
|
||||
except OSError:
|
||||
self._display = None
|
||||
|
||||
handle.Close()
|
||||
|
||||
self._stdoffset = -keydict["Bias"]-keydict["StandardBias"]
|
||||
self._dstoffset = self._stdoffset-keydict["DaylightBias"]
|
||||
|
||||
|
||||
# See http://ww_winreg.jsiinc.com/SUBA/tip0300/rh0398.htm
|
||||
tup = struct.unpack("=8h", keydict["StandardStart"])
|
||||
|
||||
(self._stdmonth,
|
||||
self._stddayofweek, # Sunday = 0
|
||||
self._stdweeknumber, # Last = 5
|
||||
self._stdhour,
|
||||
self._stdminute) = tup[1:6]
|
||||
|
||||
tup = struct.unpack("=8h", keydict["DaylightStart"])
|
||||
|
||||
(self._dstmonth,
|
||||
self._dstdayofweek, # Sunday = 0
|
||||
self._dstweeknumber, # Last = 5
|
||||
self._dsthour,
|
||||
self._dstminute) = tup[1:6]
|
||||
|
||||
def __reduce__(self):
|
||||
return (self.__class__, ())
|
||||
|
||||
def picknthweekday(year, month, dayofweek, hour, minute, whichweek):
|
||||
"""dayofweek == 0 means Sunday, whichweek 5 means last instance"""
|
||||
first = datetime.datetime(year, month, 1, hour, minute)
|
||||
weekdayone = first.replace(day=((dayofweek-first.isoweekday())%7+1))
|
||||
for n in xrange(whichweek):
|
||||
dt = weekdayone+(whichweek-n)*ONEWEEK
|
||||
if dt.month == month:
|
||||
return dt
|
||||
|
||||
def valuestodict(key):
|
||||
"""Convert a registry key's values to a dictionary."""
|
||||
dict = {}
|
||||
size = _winreg.QueryInfoKey(key)[1]
|
||||
for i in range(size):
|
||||
data = _winreg.EnumValue(key, i)
|
||||
dict[data[0]] = data[1]
|
||||
return dict
|
||||
85
lib/dateutil/zoneinfo/__init__.py
Normal file
85
lib/dateutil/zoneinfo/__init__.py
Normal file
@@ -0,0 +1,85 @@
|
||||
"""
|
||||
Copyright (c) 2003-2005 Gustavo Niemeyer <gustavo@niemeyer.net>
|
||||
|
||||
This module offers extensions to the standard python 2.3+
|
||||
datetime module.
|
||||
"""
|
||||
from dateutil.tz import tzfile
|
||||
from tarfile import TarFile
|
||||
import os
|
||||
|
||||
__author__ = "Gustavo Niemeyer <gustavo@niemeyer.net>"
|
||||
__license__ = "PSF License"
|
||||
|
||||
__all__ = ["setcachesize", "gettz", "rebuild"]
|
||||
|
||||
CACHE = {}
|
||||
|
||||
class tzfile(tzfile):
|
||||
def __reduce__(self):
|
||||
return (gettz, (self._filename,))
|
||||
|
||||
def getzoneinfofile():
|
||||
filenames = os.listdir(os.path.join(os.path.dirname(__file__)))
|
||||
filenames.sort()
|
||||
filenames.reverse()
|
||||
for entry in filenames:
|
||||
if entry.startswith("zoneinfo") and ".tar." in entry:
|
||||
return os.path.join(os.path.dirname(__file__), entry)
|
||||
return None
|
||||
|
||||
def buildcache():
|
||||
global CACHE
|
||||
zoneinfofile = getzoneinfofile()
|
||||
if zoneinfofile:
|
||||
tf = TarFile.open(zoneinfofile)
|
||||
try:
|
||||
for tarinfo in tf.getmembers():
|
||||
if tarinfo.islnk() or tarinfo.isfile():
|
||||
zonefile = tf.extractfile(tarinfo)
|
||||
CACHE[tarinfo.name] = tzfile(zonefile)
|
||||
finally:
|
||||
tf.close()
|
||||
|
||||
buildcache()
|
||||
|
||||
del getzoneinfofile
|
||||
del buildcache
|
||||
|
||||
def setcachesize(_):
|
||||
# Since the cache now eagerly initialized at
|
||||
# import time, there's no point in controlling
|
||||
# its size.
|
||||
pass
|
||||
|
||||
def gettz(name):
|
||||
return CACHE.get(name)
|
||||
|
||||
def rebuild(filename, tag=None, format="gz"):
|
||||
import tempfile, shutil
|
||||
tmpdir = tempfile.mkdtemp()
|
||||
zonedir = os.path.join(tmpdir, "zoneinfo")
|
||||
moduledir = os.path.dirname(__file__)
|
||||
if tag: tag = "-"+tag
|
||||
targetname = "zoneinfo%s.tar.%s" % (tag, format)
|
||||
try:
|
||||
tf = TarFile.open(filename)
|
||||
for name in tf.getnames():
|
||||
if not (name.endswith(".sh") or
|
||||
name.endswith(".tab") or
|
||||
name == "leapseconds"):
|
||||
tf.extract(name, tmpdir)
|
||||
filepath = os.path.join(tmpdir, name)
|
||||
os.system("zic -d %s %s" % (zonedir, filepath))
|
||||
tf.close()
|
||||
target = os.path.join(moduledir, targetname)
|
||||
for entry in os.listdir(moduledir):
|
||||
if entry.startswith("zoneinfo") and ".tar." in entry:
|
||||
os.unlink(os.path.join(moduledir, entry))
|
||||
tf = TarFile.open(target, "w:%s" % format)
|
||||
for entry in os.listdir(zonedir):
|
||||
entrypath = os.path.join(zonedir, entry)
|
||||
tf.add(entrypath, entry)
|
||||
tf.close()
|
||||
finally:
|
||||
shutil.rmtree(tmpdir)
|
||||
BIN
lib/dateutil/zoneinfo/zoneinfo-2012c.tar.gz
Normal file
BIN
lib/dateutil/zoneinfo/zoneinfo-2012c.tar.gz
Normal file
Binary file not shown.
14
lib/guessit/__init__.py
Normal file
14
lib/guessit/__init__.py
Normal file
@@ -0,0 +1,14 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Extracts as much information as possible from a video file.
|
||||
"""
|
||||
from . import monkeypatch as _monkeypatch
|
||||
|
||||
from .api import guessit, GuessItApi
|
||||
from .options import ConfigurationException
|
||||
from .rules.common.quantity import Size
|
||||
|
||||
from .__version__ import __version__
|
||||
|
||||
_monkeypatch.monkeypatch_rebulk()
|
||||
180
lib/guessit/__main__.py
Normal file
180
lib/guessit/__main__.py
Normal file
@@ -0,0 +1,180 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Entry point module
|
||||
"""
|
||||
# pragma: no cover
|
||||
from __future__ import print_function
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
|
||||
import six
|
||||
from rebulk.__version__ import __version__ as __rebulk_version__
|
||||
|
||||
from guessit import api
|
||||
from guessit.__version__ import __version__
|
||||
from guessit.jsonutils import GuessitEncoder
|
||||
from guessit.options import argument_parser, parse_options, load_config, merge_options
|
||||
|
||||
|
||||
try:
|
||||
from collections import OrderedDict
|
||||
except ImportError: # pragma: no-cover
|
||||
from ordereddict import OrderedDict # pylint:disable=import-error
|
||||
|
||||
|
||||
def guess_filename(filename, options):
|
||||
"""
|
||||
Guess a single filename using given options
|
||||
:param filename: filename to parse
|
||||
:type filename: str
|
||||
:param options:
|
||||
:type options: dict
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
if not options.get('yaml') and not options.get('json') and not options.get('show_property'):
|
||||
print('For:', filename)
|
||||
|
||||
guess = api.guessit(filename, options)
|
||||
|
||||
if options.get('show_property'):
|
||||
print(guess.get(options.get('show_property'), ''))
|
||||
return
|
||||
|
||||
if options.get('json'):
|
||||
print(json.dumps(guess, cls=GuessitEncoder, ensure_ascii=False))
|
||||
elif options.get('yaml'):
|
||||
import yaml
|
||||
from guessit import yamlutils
|
||||
|
||||
ystr = yaml.dump({filename: OrderedDict(guess)}, Dumper=yamlutils.CustomDumper, default_flow_style=False,
|
||||
allow_unicode=True)
|
||||
i = 0
|
||||
for yline in ystr.splitlines():
|
||||
if i == 0:
|
||||
print("? " + yline[:-1])
|
||||
elif i == 1:
|
||||
print(":" + yline[1:])
|
||||
else:
|
||||
print(yline)
|
||||
i += 1
|
||||
else:
|
||||
print('GuessIt found:', json.dumps(guess, cls=GuessitEncoder, indent=4, ensure_ascii=False))
|
||||
|
||||
|
||||
def display_properties(options):
|
||||
"""
|
||||
Display properties
|
||||
"""
|
||||
properties = api.properties(options)
|
||||
|
||||
if options.get('json'):
|
||||
if options.get('values'):
|
||||
print(json.dumps(properties, cls=GuessitEncoder, ensure_ascii=False))
|
||||
else:
|
||||
print(json.dumps(list(properties.keys()), cls=GuessitEncoder, ensure_ascii=False))
|
||||
elif options.get('yaml'):
|
||||
import yaml
|
||||
from guessit import yamlutils
|
||||
if options.get('values'):
|
||||
print(yaml.dump(properties, Dumper=yamlutils.CustomDumper, default_flow_style=False, allow_unicode=True))
|
||||
else:
|
||||
print(yaml.dump(list(properties.keys()), Dumper=yamlutils.CustomDumper, default_flow_style=False,
|
||||
allow_unicode=True))
|
||||
else:
|
||||
print('GuessIt properties:')
|
||||
|
||||
properties_list = list(sorted(properties.keys()))
|
||||
for property_name in properties_list:
|
||||
property_values = properties.get(property_name)
|
||||
print(2 * ' ' + '[+] %s' % (property_name,))
|
||||
if property_values and options.get('values'):
|
||||
for property_value in property_values:
|
||||
print(4 * ' ' + '[!] %s' % (property_value,))
|
||||
|
||||
|
||||
def fix_argv_encoding():
|
||||
"""
|
||||
Fix encoding of sys.argv on windows Python 2
|
||||
"""
|
||||
if six.PY2 and os.name == 'nt': # pragma: no cover
|
||||
# see http://bugs.python.org/issue2128
|
||||
import locale
|
||||
|
||||
for i, j in enumerate(sys.argv):
|
||||
sys.argv[i] = j.decode(locale.getpreferredencoding())
|
||||
|
||||
|
||||
def main(args=None): # pylint:disable=too-many-branches
|
||||
"""
|
||||
Main function for entry point
|
||||
"""
|
||||
fix_argv_encoding()
|
||||
|
||||
if args is None: # pragma: no cover
|
||||
options = parse_options()
|
||||
else:
|
||||
options = parse_options(args)
|
||||
|
||||
config = load_config(options)
|
||||
options = merge_options(config, options)
|
||||
|
||||
if options.get('verbose'):
|
||||
logging.basicConfig(stream=sys.stdout, format='%(message)s')
|
||||
logging.getLogger().setLevel(logging.DEBUG)
|
||||
|
||||
help_required = True
|
||||
|
||||
if options.get('version'):
|
||||
print('+-------------------------------------------------------+')
|
||||
print('+ GuessIt ' + __version__ + (28 - len(__version__)) * ' ' + '+')
|
||||
print('+-------------------------------------------------------+')
|
||||
print('+ Rebulk ' + __rebulk_version__ + (29 - len(__rebulk_version__)) * ' ' + '+')
|
||||
print('+-------------------------------------------------------+')
|
||||
print('| Please report any bug or feature request at |')
|
||||
print('| https://github.com/guessit-io/guessit/issues. |')
|
||||
print('+-------------------------------------------------------+')
|
||||
help_required = False
|
||||
|
||||
if options.get('yaml'):
|
||||
try:
|
||||
import yaml # pylint:disable=unused-variable,unused-import
|
||||
except ImportError: # pragma: no cover
|
||||
del options['yaml']
|
||||
print('PyYAML is not installed. \'--yaml\' option will be ignored ...', file=sys.stderr)
|
||||
|
||||
if options.get('properties') or options.get('values'):
|
||||
display_properties(options)
|
||||
help_required = False
|
||||
|
||||
filenames = []
|
||||
if options.get('filename'):
|
||||
for filename in options.get('filename'):
|
||||
filenames.append(filename)
|
||||
if options.get('input_file'):
|
||||
if six.PY2:
|
||||
input_file = open(options.get('input_file'), 'r')
|
||||
else:
|
||||
input_file = open(options.get('input_file'), 'r', encoding='utf-8')
|
||||
try:
|
||||
filenames.extend([line.strip() for line in input_file.readlines()])
|
||||
finally:
|
||||
input_file.close()
|
||||
|
||||
filenames = list(filter(lambda f: f, filenames))
|
||||
|
||||
if filenames:
|
||||
for filename in filenames:
|
||||
help_required = False
|
||||
guess_filename(filename, options)
|
||||
|
||||
if help_required: # pragma: no cover
|
||||
argument_parser.print_help()
|
||||
|
||||
|
||||
if __name__ == '__main__': # pragma: no cover
|
||||
main()
|
||||
7
lib/guessit/__version__.py
Normal file
7
lib/guessit/__version__.py
Normal file
@@ -0,0 +1,7 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Version module
|
||||
"""
|
||||
# pragma: no cover
|
||||
__version__ = '3.1.2.dev0'
|
||||
263
lib/guessit/api.py
Normal file
263
lib/guessit/api.py
Normal file
@@ -0,0 +1,263 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
API functions that can be used by external software
|
||||
"""
|
||||
|
||||
try:
|
||||
from collections import OrderedDict
|
||||
except ImportError: # pragma: no-cover
|
||||
from ordereddict import OrderedDict # pylint:disable=import-error
|
||||
|
||||
import os
|
||||
import traceback
|
||||
|
||||
import six
|
||||
from rebulk.introspector import introspect
|
||||
|
||||
from .__version__ import __version__
|
||||
from .options import parse_options, load_config, merge_options
|
||||
from .rules import rebulk_builder
|
||||
|
||||
|
||||
class GuessitException(Exception):
|
||||
"""
|
||||
Exception raised when guessit fails to perform a guess because of an internal error.
|
||||
"""
|
||||
|
||||
def __init__(self, string, options):
|
||||
super(GuessitException, self).__init__("An internal error has occured in guessit.\n"
|
||||
"===================== Guessit Exception Report =====================\n"
|
||||
"version=%s\n"
|
||||
"string=%s\n"
|
||||
"options=%s\n"
|
||||
"--------------------------------------------------------------------\n"
|
||||
"%s"
|
||||
"--------------------------------------------------------------------\n"
|
||||
"Please report at "
|
||||
"https://github.com/guessit-io/guessit/issues.\n"
|
||||
"====================================================================" %
|
||||
(__version__, str(string), str(options), traceback.format_exc()))
|
||||
|
||||
self.string = string
|
||||
self.options = options
|
||||
|
||||
|
||||
def configure(options=None, rules_builder=rebulk_builder, force=False):
|
||||
"""
|
||||
Load configuration files and initialize rebulk rules if required.
|
||||
|
||||
:param options:
|
||||
:type options: dict
|
||||
:param rules_builder:
|
||||
:type rules_builder:
|
||||
:param force:
|
||||
:type force: bool
|
||||
:return:
|
||||
"""
|
||||
default_api.configure(options, rules_builder=rules_builder, force=force)
|
||||
|
||||
|
||||
def guessit(string, options=None):
|
||||
"""
|
||||
Retrieves all matches from string as a dict
|
||||
:param string: the filename or release name
|
||||
:type string: str
|
||||
:param options:
|
||||
:type options: str|dict
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
return default_api.guessit(string, options)
|
||||
|
||||
|
||||
def properties(options=None):
|
||||
"""
|
||||
Retrieves all properties with possible values that can be guessed
|
||||
:param options:
|
||||
:type options: str|dict
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
return default_api.properties(options)
|
||||
|
||||
|
||||
def suggested_expected(titles, options=None):
|
||||
"""
|
||||
Return a list of suggested titles to be used as `expected_title` based on the list of titles
|
||||
:param titles: the filename or release name
|
||||
:type titles: list|set|dict
|
||||
:param options:
|
||||
:type options: str|dict
|
||||
:return:
|
||||
:rtype: list of str
|
||||
"""
|
||||
return default_api.suggested_expected(titles, options)
|
||||
|
||||
|
||||
class GuessItApi(object):
|
||||
"""
|
||||
An api class that can be configured with custom Rebulk configuration.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
"""Default constructor."""
|
||||
self.rebulk = None
|
||||
self.config = None
|
||||
self.load_config_options = None
|
||||
self.advanced_config = None
|
||||
|
||||
@classmethod
|
||||
def _fix_encoding(cls, value):
|
||||
if isinstance(value, list):
|
||||
return [cls._fix_encoding(item) for item in value]
|
||||
if isinstance(value, dict):
|
||||
return {cls._fix_encoding(k): cls._fix_encoding(v) for k, v in value.items()}
|
||||
if six.PY2 and isinstance(value, six.text_type):
|
||||
return value.encode('utf-8')
|
||||
if six.PY3 and isinstance(value, six.binary_type):
|
||||
return value.decode('ascii')
|
||||
return value
|
||||
|
||||
@classmethod
|
||||
def _has_same_properties(cls, dic1, dic2, values):
|
||||
for value in values:
|
||||
if dic1.get(value) != dic2.get(value):
|
||||
return False
|
||||
return True
|
||||
|
||||
def configure(self, options=None, rules_builder=rebulk_builder, force=False, sanitize_options=True):
|
||||
"""
|
||||
Load configuration files and initialize rebulk rules if required.
|
||||
|
||||
:param options:
|
||||
:type options: str|dict
|
||||
:param rules_builder:
|
||||
:type rules_builder:
|
||||
:param force:
|
||||
:type force: bool
|
||||
:return:
|
||||
:rtype: dict
|
||||
"""
|
||||
if sanitize_options:
|
||||
options = parse_options(options, True)
|
||||
options = self._fix_encoding(options)
|
||||
|
||||
if self.config is None or self.load_config_options is None or force or \
|
||||
not self._has_same_properties(self.load_config_options,
|
||||
options,
|
||||
['config', 'no_user_config', 'no_default_config']):
|
||||
config = load_config(options)
|
||||
config = self._fix_encoding(config)
|
||||
self.load_config_options = options
|
||||
else:
|
||||
config = self.config
|
||||
|
||||
advanced_config = merge_options(config.get('advanced_config'), options.get('advanced_config'))
|
||||
|
||||
should_build_rebulk = force or not self.rebulk or not self.advanced_config or \
|
||||
self.advanced_config != advanced_config
|
||||
|
||||
if should_build_rebulk:
|
||||
self.advanced_config = advanced_config
|
||||
self.rebulk = rules_builder(advanced_config)
|
||||
|
||||
self.config = config
|
||||
return self.config
|
||||
|
||||
def guessit(self, string, options=None): # pylint: disable=too-many-branches
|
||||
"""
|
||||
Retrieves all matches from string as a dict
|
||||
:param string: the filename or release name
|
||||
:type string: str|Path
|
||||
:param options:
|
||||
:type options: str|dict
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
try:
|
||||
from pathlib import Path
|
||||
if isinstance(string, Path):
|
||||
try:
|
||||
# Handle path-like object
|
||||
string = os.fspath(string)
|
||||
except AttributeError:
|
||||
string = str(string)
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
try:
|
||||
options = parse_options(options, True)
|
||||
options = self._fix_encoding(options)
|
||||
config = self.configure(options, sanitize_options=False)
|
||||
options = merge_options(config, options)
|
||||
result_decode = False
|
||||
result_encode = False
|
||||
|
||||
if six.PY2:
|
||||
if isinstance(string, six.text_type):
|
||||
string = string.encode("utf-8")
|
||||
result_decode = True
|
||||
elif isinstance(string, six.binary_type):
|
||||
string = six.binary_type(string)
|
||||
if six.PY3:
|
||||
if isinstance(string, six.binary_type):
|
||||
string = string.decode('ascii')
|
||||
result_encode = True
|
||||
elif isinstance(string, six.text_type):
|
||||
string = six.text_type(string)
|
||||
|
||||
matches = self.rebulk.matches(string, options)
|
||||
if result_decode:
|
||||
for match in matches:
|
||||
if isinstance(match.value, six.binary_type):
|
||||
match.value = match.value.decode("utf-8")
|
||||
if result_encode:
|
||||
for match in matches:
|
||||
if isinstance(match.value, six.text_type):
|
||||
match.value = match.value.encode("ascii")
|
||||
return matches.to_dict(options.get('advanced', False), options.get('single_value', False),
|
||||
options.get('enforce_list', False))
|
||||
except:
|
||||
raise GuessitException(string, options)
|
||||
|
||||
def properties(self, options=None):
|
||||
"""
|
||||
Grab properties and values that can be generated.
|
||||
:param options:
|
||||
:type options:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
options = parse_options(options, True)
|
||||
options = self._fix_encoding(options)
|
||||
config = self.configure(options, sanitize_options=False)
|
||||
options = merge_options(config, options)
|
||||
unordered = introspect(self.rebulk, options).properties
|
||||
ordered = OrderedDict()
|
||||
for k in sorted(unordered.keys(), key=six.text_type):
|
||||
ordered[k] = list(sorted(unordered[k], key=six.text_type))
|
||||
if hasattr(self.rebulk, 'customize_properties'):
|
||||
ordered = self.rebulk.customize_properties(ordered)
|
||||
return ordered
|
||||
|
||||
def suggested_expected(self, titles, options=None):
|
||||
"""
|
||||
Return a list of suggested titles to be used as `expected_title` based on the list of titles
|
||||
:param titles: the filename or release name
|
||||
:type titles: list|set|dict
|
||||
:param options:
|
||||
:type options: str|dict
|
||||
:return:
|
||||
:rtype: list of str
|
||||
"""
|
||||
suggested = []
|
||||
for title in titles:
|
||||
guess = self.guessit(title, options)
|
||||
if len(guess) != 2 or 'title' not in guess:
|
||||
suggested.append(title)
|
||||
|
||||
return suggested
|
||||
|
||||
|
||||
default_api = GuessItApi()
|
||||
27
lib/guessit/backports.py
Normal file
27
lib/guessit/backports.py
Normal file
@@ -0,0 +1,27 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Backports
|
||||
"""
|
||||
# pragma: no-cover
|
||||
# pylint: skip-file
|
||||
|
||||
def cmp_to_key(mycmp):
|
||||
"""functools.cmp_to_key backport"""
|
||||
class KeyClass(object):
|
||||
"""Key class"""
|
||||
def __init__(self, obj, *args): # pylint: disable=unused-argument
|
||||
self.obj = obj
|
||||
def __lt__(self, other):
|
||||
return mycmp(self.obj, other.obj) < 0
|
||||
def __gt__(self, other):
|
||||
return mycmp(self.obj, other.obj) > 0
|
||||
def __eq__(self, other):
|
||||
return mycmp(self.obj, other.obj) == 0
|
||||
def __le__(self, other):
|
||||
return mycmp(self.obj, other.obj) <= 0
|
||||
def __ge__(self, other):
|
||||
return mycmp(self.obj, other.obj) >= 0
|
||||
def __ne__(self, other):
|
||||
return mycmp(self.obj, other.obj) != 0
|
||||
return KeyClass
|
||||
586
lib/guessit/config/options.json
Normal file
586
lib/guessit/config/options.json
Normal file
@@ -0,0 +1,586 @@
|
||||
{
|
||||
"expected_title": [
|
||||
"OSS 117",
|
||||
"This is Us"
|
||||
],
|
||||
"allowed_countries": [
|
||||
"au",
|
||||
"gb",
|
||||
"us"
|
||||
],
|
||||
"allowed_languages": [
|
||||
"ca",
|
||||
"cs",
|
||||
"de",
|
||||
"en",
|
||||
"es",
|
||||
"fr",
|
||||
"he",
|
||||
"hi",
|
||||
"hu",
|
||||
"it",
|
||||
"ja",
|
||||
"ko",
|
||||
"mul",
|
||||
"nl",
|
||||
"no",
|
||||
"pl",
|
||||
"pt",
|
||||
"ro",
|
||||
"ru",
|
||||
"sv",
|
||||
"te",
|
||||
"uk",
|
||||
"und"
|
||||
],
|
||||
"advanced_config": {
|
||||
"common_words": [
|
||||
"ca",
|
||||
"cat",
|
||||
"de",
|
||||
"he",
|
||||
"it",
|
||||
"no",
|
||||
"por",
|
||||
"rum",
|
||||
"se",
|
||||
"st",
|
||||
"sub"
|
||||
],
|
||||
"groups": {
|
||||
"starting": "([{",
|
||||
"ending": ")]}"
|
||||
},
|
||||
"audio_codec": {
|
||||
"audio_channels": {
|
||||
"1.0": [
|
||||
"1ch",
|
||||
"mono"
|
||||
],
|
||||
"2.0": [
|
||||
"2ch",
|
||||
"stereo",
|
||||
"re:(2[\\W_]0(?:ch)?)(?=[^\\d]|$)"
|
||||
],
|
||||
"5.1": [
|
||||
"5ch",
|
||||
"6ch",
|
||||
"re:(5[\\W_][01](?:ch)?)(?=[^\\d]|$)",
|
||||
"re:(6[\\W_]0(?:ch)?)(?=[^\\d]|$)"
|
||||
],
|
||||
"7.1": [
|
||||
"7ch",
|
||||
"8ch",
|
||||
"re:(7[\\W_][01](?:ch)?)(?=[^\\d]|$)"
|
||||
]
|
||||
}
|
||||
},
|
||||
"container": {
|
||||
"subtitles": [
|
||||
"srt",
|
||||
"idx",
|
||||
"sub",
|
||||
"ssa",
|
||||
"ass"
|
||||
],
|
||||
"info": [
|
||||
"nfo"
|
||||
],
|
||||
"videos": [
|
||||
"3g2",
|
||||
"3gp",
|
||||
"3gp2",
|
||||
"asf",
|
||||
"avi",
|
||||
"divx",
|
||||
"flv",
|
||||
"iso",
|
||||
"m4v",
|
||||
"mk2",
|
||||
"mk3d",
|
||||
"mka",
|
||||
"mkv",
|
||||
"mov",
|
||||
"mp4",
|
||||
"mp4a",
|
||||
"mpeg",
|
||||
"mpg",
|
||||
"ogg",
|
||||
"ogm",
|
||||
"ogv",
|
||||
"qt",
|
||||
"ra",
|
||||
"ram",
|
||||
"rm",
|
||||
"ts",
|
||||
"vob",
|
||||
"wav",
|
||||
"webm",
|
||||
"wma",
|
||||
"wmv"
|
||||
],
|
||||
"torrent": [
|
||||
"torrent"
|
||||
],
|
||||
"nzb": [
|
||||
"nzb"
|
||||
]
|
||||
},
|
||||
"country": {
|
||||
"synonyms": {
|
||||
"ES": [
|
||||
"españa"
|
||||
],
|
||||
"GB": [
|
||||
"UK"
|
||||
],
|
||||
"BR": [
|
||||
"brazilian",
|
||||
"bra"
|
||||
],
|
||||
"CA": [
|
||||
"québec",
|
||||
"quebec",
|
||||
"qc"
|
||||
],
|
||||
"MX": [
|
||||
"Latinoamérica",
|
||||
"latin america"
|
||||
]
|
||||
}
|
||||
},
|
||||
"episodes": {
|
||||
"season_max_range": 100,
|
||||
"episode_max_range": 100,
|
||||
"max_range_gap": 1,
|
||||
"season_markers": [
|
||||
"s"
|
||||
],
|
||||
"season_ep_markers": [
|
||||
"x"
|
||||
],
|
||||
"disc_markers": [
|
||||
"d"
|
||||
],
|
||||
"episode_markers": [
|
||||
"xe",
|
||||
"ex",
|
||||
"ep",
|
||||
"e",
|
||||
"x"
|
||||
],
|
||||
"range_separators": [
|
||||
"-",
|
||||
"~",
|
||||
"to",
|
||||
"a"
|
||||
],
|
||||
"discrete_separators": [
|
||||
"+",
|
||||
"&",
|
||||
"and",
|
||||
"et"
|
||||
],
|
||||
"season_words": [
|
||||
"season",
|
||||
"saison",
|
||||
"seizoen",
|
||||
"seasons",
|
||||
"saisons",
|
||||
"tem",
|
||||
"temp",
|
||||
"temporada",
|
||||
"temporadas",
|
||||
"stagione"
|
||||
],
|
||||
"episode_words": [
|
||||
"episode",
|
||||
"episodes",
|
||||
"eps",
|
||||
"ep",
|
||||
"episodio",
|
||||
"episodios",
|
||||
"capitulo",
|
||||
"capitulos"
|
||||
],
|
||||
"of_words": [
|
||||
"of",
|
||||
"sur"
|
||||
],
|
||||
"all_words": [
|
||||
"All"
|
||||
]
|
||||
},
|
||||
"language": {
|
||||
"synonyms": {
|
||||
"ell": [
|
||||
"gr",
|
||||
"greek"
|
||||
],
|
||||
"spa": [
|
||||
"esp",
|
||||
"español",
|
||||
"espanol"
|
||||
],
|
||||
"fra": [
|
||||
"français",
|
||||
"vf",
|
||||
"vff",
|
||||
"vfi",
|
||||
"vfq"
|
||||
],
|
||||
"swe": [
|
||||
"se"
|
||||
],
|
||||
"por_BR": [
|
||||
"po",
|
||||
"pb",
|
||||
"pob",
|
||||
"ptbr",
|
||||
"br",
|
||||
"brazilian"
|
||||
],
|
||||
"deu_CH": [
|
||||
"swissgerman",
|
||||
"swiss german"
|
||||
],
|
||||
"nld_BE": [
|
||||
"flemish"
|
||||
],
|
||||
"cat": [
|
||||
"català",
|
||||
"castellano",
|
||||
"espanol castellano",
|
||||
"español castellano"
|
||||
],
|
||||
"ces": [
|
||||
"cz"
|
||||
],
|
||||
"ukr": [
|
||||
"ua"
|
||||
],
|
||||
"zho": [
|
||||
"cn"
|
||||
],
|
||||
"jpn": [
|
||||
"jp"
|
||||
],
|
||||
"hrv": [
|
||||
"scr"
|
||||
],
|
||||
"mul": [
|
||||
"multi",
|
||||
"dl"
|
||||
]
|
||||
},
|
||||
"subtitle_affixes": [
|
||||
"sub",
|
||||
"subs",
|
||||
"esub",
|
||||
"esubs",
|
||||
"subbed",
|
||||
"custom subbed",
|
||||
"custom subs",
|
||||
"custom sub",
|
||||
"customsubbed",
|
||||
"customsubs",
|
||||
"customsub",
|
||||
"soft subtitles",
|
||||
"soft subs"
|
||||
],
|
||||
"subtitle_prefixes": [
|
||||
"st",
|
||||
"vost",
|
||||
"subforced",
|
||||
"fansub",
|
||||
"hardsub",
|
||||
"legenda",
|
||||
"legendas",
|
||||
"legendado",
|
||||
"subtitulado",
|
||||
"soft",
|
||||
"subtitles"
|
||||
],
|
||||
"subtitle_suffixes": [
|
||||
"subforced",
|
||||
"fansub",
|
||||
"hardsub"
|
||||
],
|
||||
"language_affixes": [
|
||||
"dublado",
|
||||
"dubbed",
|
||||
"dub"
|
||||
],
|
||||
"language_prefixes": [
|
||||
"true"
|
||||
],
|
||||
"language_suffixes": [
|
||||
"audio"
|
||||
],
|
||||
"weak_affixes": [
|
||||
"v",
|
||||
"audio",
|
||||
"true"
|
||||
]
|
||||
},
|
||||
"part": {
|
||||
"prefixes": [
|
||||
"pt",
|
||||
"part"
|
||||
]
|
||||
},
|
||||
"release_group": {
|
||||
"forbidden_names": [
|
||||
"bonus",
|
||||
"by",
|
||||
"for",
|
||||
"par",
|
||||
"pour",
|
||||
"rip"
|
||||
],
|
||||
"ignored_seps": "[]{}()"
|
||||
},
|
||||
"screen_size": {
|
||||
"frame_rates": [
|
||||
"23.976",
|
||||
"24",
|
||||
"25",
|
||||
"29.970",
|
||||
"30",
|
||||
"48",
|
||||
"50",
|
||||
"60",
|
||||
"120"
|
||||
],
|
||||
"min_ar": 1.333,
|
||||
"max_ar": 1.898,
|
||||
"interlaced": [
|
||||
"360",
|
||||
"480",
|
||||
"576",
|
||||
"900",
|
||||
"1080"
|
||||
],
|
||||
"progressive": [
|
||||
"360",
|
||||
"480",
|
||||
"540",
|
||||
"576",
|
||||
"900",
|
||||
"1080",
|
||||
"368",
|
||||
"720",
|
||||
"1440",
|
||||
"2160",
|
||||
"4320"
|
||||
]
|
||||
},
|
||||
"website": {
|
||||
"safe_tlds": [
|
||||
"com",
|
||||
"net",
|
||||
"org"
|
||||
],
|
||||
"safe_subdomains": [
|
||||
"www"
|
||||
],
|
||||
"safe_prefixes": [
|
||||
"co",
|
||||
"com",
|
||||
"net",
|
||||
"org"
|
||||
],
|
||||
"prefixes": [
|
||||
"from"
|
||||
]
|
||||
},
|
||||
"streaming_service": {
|
||||
"A&E": [
|
||||
"AE",
|
||||
"A&E"
|
||||
],
|
||||
"ABC": "AMBC",
|
||||
"ABC Australia": "AUBC",
|
||||
"Al Jazeera English": "AJAZ",
|
||||
"AMC": "AMC",
|
||||
"Amazon Prime": [
|
||||
"AMZN",
|
||||
"Amazon",
|
||||
"re:Amazon-?Prime"
|
||||
],
|
||||
"Adult Swim": [
|
||||
"AS",
|
||||
"re:Adult-?Swim"
|
||||
],
|
||||
"America's Test Kitchen": "ATK",
|
||||
"Animal Planet": "ANPL",
|
||||
"AnimeLab": "ANLB",
|
||||
"AOL": "AOL",
|
||||
"ARD": "ARD",
|
||||
"BBC iPlayer": [
|
||||
"iP",
|
||||
"re:BBC-?iPlayer"
|
||||
],
|
||||
"BravoTV": "BRAV",
|
||||
"Canal+": "CNLP",
|
||||
"Cartoon Network": "CN",
|
||||
"CBC": "CBC",
|
||||
"CBS": "CBS",
|
||||
"CNBC": "CNBC",
|
||||
"Comedy Central": [
|
||||
"CC",
|
||||
"re:Comedy-?Central"
|
||||
],
|
||||
"Channel 4": "4OD",
|
||||
"CHRGD": "CHGD",
|
||||
"Cinemax": "CMAX",
|
||||
"Country Music Television": "CMT",
|
||||
"Comedians in Cars Getting Coffee": "CCGC",
|
||||
"Crunchy Roll": [
|
||||
"CR",
|
||||
"re:Crunchy-?Roll"
|
||||
],
|
||||
"Crackle": "CRKL",
|
||||
"CSpan": "CSPN",
|
||||
"CTV": "CTV",
|
||||
"CuriosityStream": "CUR",
|
||||
"CWSeed": "CWS",
|
||||
"Daisuki": "DSKI",
|
||||
"DC Universe": "DCU",
|
||||
"Deadhouse Films": "DHF",
|
||||
"DramaFever": [
|
||||
"DF",
|
||||
"DramaFever"
|
||||
],
|
||||
"Digiturk Diledigin Yerde": "DDY",
|
||||
"Discovery": [
|
||||
"DISC",
|
||||
"Discovery"
|
||||
],
|
||||
"Disney": [
|
||||
"DSNY",
|
||||
"Disney"
|
||||
],
|
||||
"DIY Network": "DIY",
|
||||
"Doc Club": "DOCC",
|
||||
"DPlay": "DPLY",
|
||||
"E!": "ETV",
|
||||
"ePix": "EPIX",
|
||||
"El Trece": "ETTV",
|
||||
"ESPN": "ESPN",
|
||||
"Esquire": "ESQ",
|
||||
"Family": "FAM",
|
||||
"Family Jr": "FJR",
|
||||
"Food Network": "FOOD",
|
||||
"Fox": "FOX",
|
||||
"Freeform": "FREE",
|
||||
"FYI Network": "FYI",
|
||||
"Global": "GLBL",
|
||||
"GloboSat Play": "GLOB",
|
||||
"Hallmark": "HLMK",
|
||||
"HBO Go": [
|
||||
"HBO",
|
||||
"re:HBO-?Go"
|
||||
],
|
||||
"HGTV": "HGTV",
|
||||
"History": [
|
||||
"HIST",
|
||||
"History"
|
||||
],
|
||||
"Hulu": "HULU",
|
||||
"Investigation Discovery": "ID",
|
||||
"IFC": "IFC",
|
||||
"iTunes": "iTunes",
|
||||
"ITV": "ITV",
|
||||
"Knowledge Network": "KNOW",
|
||||
"Lifetime": "LIFE",
|
||||
"Motor Trend OnDemand": "MTOD",
|
||||
"MBC": [
|
||||
"MBC",
|
||||
"MBCVOD"
|
||||
],
|
||||
"MSNBC": "MNBC",
|
||||
"MTV": "MTV",
|
||||
"National Geographic": [
|
||||
"NATG",
|
||||
"re:National-?Geographic"
|
||||
],
|
||||
"NBA TV": [
|
||||
"NBA",
|
||||
"re:NBA-?TV"
|
||||
],
|
||||
"NBC": "NBC",
|
||||
"Netflix": [
|
||||
"NF",
|
||||
"Netflix"
|
||||
],
|
||||
"NFL": "NFL",
|
||||
"NFL Now": "NFLN",
|
||||
"NHL GameCenter": "GC",
|
||||
"Nickelodeon": [
|
||||
"NICK",
|
||||
"Nickelodeon"
|
||||
],
|
||||
"Norsk Rikskringkasting": "NRK",
|
||||
"OnDemandKorea": [
|
||||
"ODK",
|
||||
"OnDemandKorea"
|
||||
],
|
||||
"PBS": "PBS",
|
||||
"PBS Kids": "PBSK",
|
||||
"Playstation Network": "PSN",
|
||||
"Pluzz": "PLUZ",
|
||||
"RTE One": "RTE",
|
||||
"SBS (AU)": "SBS",
|
||||
"SeeSo": [
|
||||
"SESO",
|
||||
"SeeSo"
|
||||
],
|
||||
"Shomi": "SHMI",
|
||||
"Spike": "SPIK",
|
||||
"Spike TV": [
|
||||
"SPKE",
|
||||
"re:Spike-?TV"
|
||||
],
|
||||
"Sportsnet": "SNET",
|
||||
"Sprout": "SPRT",
|
||||
"Stan": "STAN",
|
||||
"Starz": "STZ",
|
||||
"Sveriges Television": "SVT",
|
||||
"SwearNet": "SWER",
|
||||
"Syfy": "SYFY",
|
||||
"TBS": "TBS",
|
||||
"TFou": "TFOU",
|
||||
"The CW": [
|
||||
"CW",
|
||||
"re:The-?CW"
|
||||
],
|
||||
"TLC": "TLC",
|
||||
"TubiTV": "TUBI",
|
||||
"TV3 Ireland": "TV3",
|
||||
"TV4 Sweeden": "TV4",
|
||||
"TVING": "TVING",
|
||||
"TV Land": [
|
||||
"TVL",
|
||||
"re:TV-?Land"
|
||||
],
|
||||
"UFC": "UFC",
|
||||
"UKTV": "UKTV",
|
||||
"Univision": "UNIV",
|
||||
"USA Network": "USAN",
|
||||
"Velocity": "VLCT",
|
||||
"VH1": "VH1",
|
||||
"Viceland": "VICE",
|
||||
"Viki": "VIKI",
|
||||
"Vimeo": "VMEO",
|
||||
"VRV": "VRV",
|
||||
"W Network": "WNET",
|
||||
"WatchMe": "WME",
|
||||
"WWE Network": "WWEN",
|
||||
"Xbox Video": "XBOX",
|
||||
"Yahoo": "YHOO",
|
||||
"YouTube Red": "RED",
|
||||
"ZDF": "ZDF"
|
||||
}
|
||||
}
|
||||
}
|
||||
22
lib/guessit/jsonutils.py
Normal file
22
lib/guessit/jsonutils.py
Normal file
@@ -0,0 +1,22 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
JSON Utils
|
||||
"""
|
||||
import json
|
||||
|
||||
from six import text_type
|
||||
from rebulk.match import Match
|
||||
|
||||
class GuessitEncoder(json.JSONEncoder):
|
||||
"""
|
||||
JSON Encoder for guessit response
|
||||
"""
|
||||
|
||||
def default(self, o): # pylint:disable=method-hidden
|
||||
if isinstance(o, Match):
|
||||
return o.advanced
|
||||
if hasattr(o, 'name'): # Babelfish languages/countries long name
|
||||
return text_type(o.name)
|
||||
# pragma: no cover
|
||||
return text_type(o)
|
||||
34
lib/guessit/monkeypatch.py
Normal file
34
lib/guessit/monkeypatch.py
Normal file
@@ -0,0 +1,34 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Monkeypatch initialisation functions
|
||||
"""
|
||||
|
||||
try:
|
||||
from collections import OrderedDict
|
||||
except ImportError: # pragma: no-cover
|
||||
from ordereddict import OrderedDict # pylint:disable=import-error
|
||||
|
||||
from rebulk.match import Match
|
||||
|
||||
|
||||
def monkeypatch_rebulk():
|
||||
"""Monkeypatch rebulk classes"""
|
||||
|
||||
@property
|
||||
def match_advanced(self):
|
||||
"""
|
||||
Build advanced dict from match
|
||||
:param self:
|
||||
:return:
|
||||
"""
|
||||
|
||||
ret = OrderedDict()
|
||||
ret['value'] = self.value
|
||||
if self.raw:
|
||||
ret['raw'] = self.raw
|
||||
ret['start'] = self.start
|
||||
ret['end'] = self.end
|
||||
return ret
|
||||
|
||||
Match.advanced = match_advanced
|
||||
295
lib/guessit/options.py
Normal file
295
lib/guessit/options.py
Normal file
@@ -0,0 +1,295 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Options
|
||||
"""
|
||||
import copy
|
||||
import json
|
||||
import os
|
||||
import pkgutil
|
||||
import shlex
|
||||
|
||||
from argparse import ArgumentParser
|
||||
|
||||
import six
|
||||
|
||||
|
||||
def build_argument_parser():
|
||||
"""
|
||||
Builds the argument parser
|
||||
:return: the argument parser
|
||||
:rtype: ArgumentParser
|
||||
"""
|
||||
opts = ArgumentParser()
|
||||
opts.add_argument(dest='filename', help='Filename or release name to guess', nargs='*')
|
||||
|
||||
naming_opts = opts.add_argument_group("Naming")
|
||||
naming_opts.add_argument('-t', '--type', dest='type', default=None,
|
||||
help='The suggested file type: movie, episode. If undefined, type will be guessed.')
|
||||
naming_opts.add_argument('-n', '--name-only', dest='name_only', action='store_true', default=None,
|
||||
help='Parse files as name only, considering "/" and "\\" like other separators.')
|
||||
naming_opts.add_argument('-Y', '--date-year-first', action='store_true', dest='date_year_first', default=None,
|
||||
help='If short date is found, consider the first digits as the year.')
|
||||
naming_opts.add_argument('-D', '--date-day-first', action='store_true', dest='date_day_first', default=None,
|
||||
help='If short date is found, consider the second digits as the day.')
|
||||
naming_opts.add_argument('-L', '--allowed-languages', action='append', dest='allowed_languages', default=None,
|
||||
help='Allowed language (can be used multiple times)')
|
||||
naming_opts.add_argument('-C', '--allowed-countries', action='append', dest='allowed_countries', default=None,
|
||||
help='Allowed country (can be used multiple times)')
|
||||
naming_opts.add_argument('-E', '--episode-prefer-number', action='store_true', dest='episode_prefer_number',
|
||||
default=None,
|
||||
help='Guess "serie.213.avi" as the episode 213. Without this option, '
|
||||
'it will be guessed as season 2, episode 13')
|
||||
naming_opts.add_argument('-T', '--expected-title', action='append', dest='expected_title', default=None,
|
||||
help='Expected title to parse (can be used multiple times)')
|
||||
naming_opts.add_argument('-G', '--expected-group', action='append', dest='expected_group', default=None,
|
||||
help='Expected release group (can be used multiple times)')
|
||||
naming_opts.add_argument('--includes', action='append', dest='includes', default=None,
|
||||
help='List of properties to be detected')
|
||||
naming_opts.add_argument('--excludes', action='append', dest='excludes', default=None,
|
||||
help='List of properties to be ignored')
|
||||
|
||||
input_opts = opts.add_argument_group("Input")
|
||||
input_opts.add_argument('-f', '--input-file', dest='input_file', default=None,
|
||||
help='Read filenames from an input text file. File should use UTF-8 charset.')
|
||||
|
||||
output_opts = opts.add_argument_group("Output")
|
||||
output_opts.add_argument('-v', '--verbose', action='store_true', dest='verbose', default=None,
|
||||
help='Display debug output')
|
||||
output_opts.add_argument('-P', '--show-property', dest='show_property', default=None,
|
||||
help='Display the value of a single property (title, series, video_codec, year, ...)')
|
||||
output_opts.add_argument('-a', '--advanced', dest='advanced', action='store_true', default=None,
|
||||
help='Display advanced information for filename guesses, as json output')
|
||||
output_opts.add_argument('-s', '--single-value', dest='single_value', action='store_true', default=None,
|
||||
help='Keep only first value found for each property')
|
||||
output_opts.add_argument('-l', '--enforce-list', dest='enforce_list', action='store_true', default=None,
|
||||
help='Wrap each found value in a list even when property has a single value')
|
||||
output_opts.add_argument('-j', '--json', dest='json', action='store_true', default=None,
|
||||
help='Display information for filename guesses as json output')
|
||||
output_opts.add_argument('-y', '--yaml', dest='yaml', action='store_true', default=None,
|
||||
help='Display information for filename guesses as yaml output')
|
||||
|
||||
conf_opts = opts.add_argument_group("Configuration")
|
||||
conf_opts.add_argument('-c', '--config', dest='config', action='append', default=None,
|
||||
help='Filepath to configuration file. Configuration file contains the same '
|
||||
'options as those from command line options, but option names have "-" characters '
|
||||
'replaced with "_". This configuration will be merged with default and user '
|
||||
'configuration files.')
|
||||
conf_opts.add_argument('--no-user-config', dest='no_user_config', action='store_true',
|
||||
default=None,
|
||||
help='Disable user configuration. If not defined, guessit tries to read configuration files '
|
||||
'at ~/.guessit/options.(json|yml|yaml) and ~/.config/guessit/options.(json|yml|yaml)')
|
||||
conf_opts.add_argument('--no-default-config', dest='no_default_config', action='store_true',
|
||||
default=None,
|
||||
help='Disable default configuration. This should be done only if you are providing a full '
|
||||
'configuration through user configuration or --config option. If no "advanced_config" '
|
||||
'is provided by another configuration file, it will still be loaded from default '
|
||||
'configuration.')
|
||||
|
||||
information_opts = opts.add_argument_group("Information")
|
||||
information_opts.add_argument('-p', '--properties', dest='properties', action='store_true', default=None,
|
||||
help='Display properties that can be guessed.')
|
||||
information_opts.add_argument('-V', '--values', dest='values', action='store_true', default=None,
|
||||
help='Display property values that can be guessed.')
|
||||
information_opts.add_argument('--version', dest='version', action='store_true', default=None,
|
||||
help='Display the guessit version.')
|
||||
|
||||
return opts
|
||||
|
||||
|
||||
def parse_options(options=None, api=False):
|
||||
"""
|
||||
Parse given option string
|
||||
|
||||
:param options:
|
||||
:type options:
|
||||
:param api
|
||||
:type api: boolean
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
if isinstance(options, six.string_types):
|
||||
args = shlex.split(options)
|
||||
options = vars(argument_parser.parse_args(args))
|
||||
elif options is None:
|
||||
if api:
|
||||
options = {}
|
||||
else:
|
||||
options = vars(argument_parser.parse_args())
|
||||
elif not isinstance(options, dict):
|
||||
options = vars(argument_parser.parse_args(options))
|
||||
return options
|
||||
|
||||
|
||||
argument_parser = build_argument_parser()
|
||||
|
||||
|
||||
class ConfigurationException(Exception):
|
||||
"""
|
||||
Exception related to configuration file.
|
||||
"""
|
||||
pass # pylint:disable=unnecessary-pass
|
||||
|
||||
|
||||
def load_config(options):
|
||||
"""
|
||||
Load options from configuration files, if defined and present.
|
||||
:param options:
|
||||
:type options:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
configurations = []
|
||||
|
||||
if not options.get('no_default_config'):
|
||||
default_options_data = pkgutil.get_data('guessit', 'config/options.json').decode('utf-8')
|
||||
default_options = json.loads(default_options_data)
|
||||
configurations.append(default_options)
|
||||
|
||||
config_files = []
|
||||
|
||||
if not options.get('no_user_config'):
|
||||
home_directory = os.path.expanduser("~")
|
||||
cwd = os.getcwd()
|
||||
yaml_supported = False
|
||||
try:
|
||||
import yaml # pylint:disable=unused-variable,unused-import
|
||||
yaml_supported = True
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
config_file_locations = get_options_file_locations(home_directory, cwd, yaml_supported)
|
||||
config_files = [f for f in config_file_locations if os.path.exists(f)]
|
||||
|
||||
custom_config_files = options.get('config')
|
||||
if custom_config_files:
|
||||
config_files = config_files + custom_config_files
|
||||
|
||||
for config_file in config_files:
|
||||
config_file_options = load_config_file(config_file)
|
||||
if config_file_options:
|
||||
configurations.append(config_file_options)
|
||||
|
||||
config = {}
|
||||
if configurations:
|
||||
config = merge_options(*configurations)
|
||||
|
||||
if 'advanced_config' not in config:
|
||||
# Guessit doesn't work without advanced_config, so we use default if no configuration files provides it.
|
||||
default_options_data = pkgutil.get_data('guessit', 'config/options.json').decode('utf-8')
|
||||
default_options = json.loads(default_options_data)
|
||||
config['advanced_config'] = default_options['advanced_config']
|
||||
|
||||
return config
|
||||
|
||||
|
||||
def merge_options(*options):
|
||||
"""
|
||||
Merge options into a single options dict.
|
||||
:param options:
|
||||
:type options:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
|
||||
merged = {}
|
||||
if options:
|
||||
if options[0]:
|
||||
merged.update(copy.deepcopy(options[0]))
|
||||
|
||||
for options in options[1:]:
|
||||
if options:
|
||||
pristine = options.get('pristine')
|
||||
|
||||
if pristine is True:
|
||||
merged = {}
|
||||
elif pristine:
|
||||
for to_reset in pristine:
|
||||
if to_reset in merged:
|
||||
del merged[to_reset]
|
||||
|
||||
for (option, value) in options.items():
|
||||
merge_option_value(option, value, merged)
|
||||
|
||||
return merged
|
||||
|
||||
|
||||
def merge_option_value(option, value, merged):
|
||||
"""
|
||||
Merge option value
|
||||
:param option:
|
||||
:param value:
|
||||
:param merged:
|
||||
:return:
|
||||
"""
|
||||
if value is not None and option != 'pristine':
|
||||
if option in merged.keys() and isinstance(merged[option], list):
|
||||
for val in value:
|
||||
if val not in merged[option]:
|
||||
merged[option].append(val)
|
||||
elif option in merged.keys() and isinstance(merged[option], dict):
|
||||
merged[option] = merge_options(merged[option], value)
|
||||
elif isinstance(value, list):
|
||||
merged[option] = list(value)
|
||||
else:
|
||||
merged[option] = value
|
||||
|
||||
|
||||
def load_config_file(filepath):
|
||||
"""
|
||||
Load a configuration as an options dict.
|
||||
|
||||
Format of the file is given with filepath extension.
|
||||
:param filepath:
|
||||
:type filepath:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
if filepath.endswith('.json'):
|
||||
with open(filepath) as config_file_data:
|
||||
return json.load(config_file_data)
|
||||
if filepath.endswith('.yaml') or filepath.endswith('.yml'):
|
||||
try:
|
||||
import yaml
|
||||
with open(filepath) as config_file_data:
|
||||
return yaml.load(config_file_data, yaml.SafeLoader)
|
||||
except ImportError: # pragma: no cover
|
||||
raise ConfigurationException('Configuration file extension is not supported. '
|
||||
'PyYAML should be installed to support "%s" file' % (
|
||||
filepath,))
|
||||
|
||||
try:
|
||||
# Try to load input as JSON
|
||||
return json.loads(filepath)
|
||||
except: # pylint: disable=bare-except
|
||||
pass
|
||||
|
||||
raise ConfigurationException('Configuration file extension is not supported for "%s" file.' % (filepath,))
|
||||
|
||||
|
||||
def get_options_file_locations(homedir, cwd, yaml_supported=False):
|
||||
"""
|
||||
Get all possible locations for options file.
|
||||
:param homedir: user home directory
|
||||
:type homedir: basestring
|
||||
:param cwd: current working directory
|
||||
:type homedir: basestring
|
||||
:return:
|
||||
:rtype: list
|
||||
"""
|
||||
locations = []
|
||||
|
||||
configdirs = [(os.path.join(homedir, '.guessit'), 'options'),
|
||||
(os.path.join(homedir, '.config', 'guessit'), 'options'),
|
||||
(cwd, 'guessit.options')]
|
||||
configexts = ['json']
|
||||
|
||||
if yaml_supported:
|
||||
configexts.append('yaml')
|
||||
configexts.append('yml')
|
||||
|
||||
for configdir in configdirs:
|
||||
for configext in configexts:
|
||||
locations.append(os.path.join(configdir[0], configdir[1] + '.' + configext))
|
||||
|
||||
return locations
|
||||
35
lib/guessit/reutils.py
Normal file
35
lib/guessit/reutils.py
Normal file
@@ -0,0 +1,35 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Utils for re module
|
||||
"""
|
||||
|
||||
from rebulk.remodule import re
|
||||
|
||||
|
||||
def build_or_pattern(patterns, name=None, escape=False):
|
||||
"""
|
||||
Build a or pattern string from a list of possible patterns
|
||||
|
||||
:param patterns:
|
||||
:type patterns:
|
||||
:param name:
|
||||
:type name:
|
||||
:param escape:
|
||||
:type escape:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
or_pattern = []
|
||||
for pattern in patterns:
|
||||
if not or_pattern:
|
||||
or_pattern.append('(?')
|
||||
if name:
|
||||
or_pattern.append('P<' + name + '>')
|
||||
else:
|
||||
or_pattern.append(':')
|
||||
else:
|
||||
or_pattern.append('|')
|
||||
or_pattern.append('(?:%s)' % re.escape(pattern) if escape else pattern)
|
||||
or_pattern.append(')')
|
||||
return ''.join(or_pattern)
|
||||
99
lib/guessit/rules/__init__.py
Normal file
99
lib/guessit/rules/__init__.py
Normal file
@@ -0,0 +1,99 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Rebulk object default builder
|
||||
"""
|
||||
from rebulk import Rebulk
|
||||
|
||||
from .markers.path import path
|
||||
from .markers.groups import groups
|
||||
|
||||
from .properties.episodes import episodes
|
||||
from .properties.container import container
|
||||
from .properties.source import source
|
||||
from .properties.video_codec import video_codec
|
||||
from .properties.audio_codec import audio_codec
|
||||
from .properties.screen_size import screen_size
|
||||
from .properties.website import website
|
||||
from .properties.date import date
|
||||
from .properties.title import title
|
||||
from .properties.episode_title import episode_title
|
||||
from .properties.language import language
|
||||
from .properties.country import country
|
||||
from .properties.release_group import release_group
|
||||
from .properties.streaming_service import streaming_service
|
||||
from .properties.other import other
|
||||
from .properties.size import size
|
||||
from .properties.bit_rate import bit_rate
|
||||
from .properties.edition import edition
|
||||
from .properties.cds import cds
|
||||
from .properties.bonus import bonus
|
||||
from .properties.film import film
|
||||
from .properties.part import part
|
||||
from .properties.crc import crc
|
||||
from .properties.mimetype import mimetype
|
||||
from .properties.type import type_
|
||||
|
||||
from .processors import processors
|
||||
|
||||
|
||||
def rebulk_builder(config):
|
||||
"""
|
||||
Default builder for main Rebulk object used by api.
|
||||
:return: Main Rebulk object
|
||||
:rtype: Rebulk
|
||||
"""
|
||||
def _config(name):
|
||||
return config.get(name, {})
|
||||
|
||||
rebulk = Rebulk()
|
||||
|
||||
common_words = frozenset(_config('common_words'))
|
||||
|
||||
rebulk.rebulk(path(_config('path')))
|
||||
rebulk.rebulk(groups(_config('groups')))
|
||||
|
||||
rebulk.rebulk(episodes(_config('episodes')))
|
||||
rebulk.rebulk(container(_config('container')))
|
||||
rebulk.rebulk(source(_config('source')))
|
||||
rebulk.rebulk(video_codec(_config('video_codec')))
|
||||
rebulk.rebulk(audio_codec(_config('audio_codec')))
|
||||
rebulk.rebulk(screen_size(_config('screen_size')))
|
||||
rebulk.rebulk(website(_config('website')))
|
||||
rebulk.rebulk(date(_config('date')))
|
||||
rebulk.rebulk(title(_config('title')))
|
||||
rebulk.rebulk(episode_title(_config('episode_title')))
|
||||
rebulk.rebulk(language(_config('language'), common_words))
|
||||
rebulk.rebulk(country(_config('country'), common_words))
|
||||
rebulk.rebulk(release_group(_config('release_group')))
|
||||
rebulk.rebulk(streaming_service(_config('streaming_service')))
|
||||
rebulk.rebulk(other(_config('other')))
|
||||
rebulk.rebulk(size(_config('size')))
|
||||
rebulk.rebulk(bit_rate(_config('bit_rate')))
|
||||
rebulk.rebulk(edition(_config('edition')))
|
||||
rebulk.rebulk(cds(_config('cds')))
|
||||
rebulk.rebulk(bonus(_config('bonus')))
|
||||
rebulk.rebulk(film(_config('film')))
|
||||
rebulk.rebulk(part(_config('part')))
|
||||
rebulk.rebulk(crc(_config('crc')))
|
||||
|
||||
rebulk.rebulk(processors(_config('processors')))
|
||||
|
||||
rebulk.rebulk(mimetype(_config('mimetype')))
|
||||
rebulk.rebulk(type_(_config('type')))
|
||||
|
||||
def customize_properties(properties):
|
||||
"""
|
||||
Customize default rebulk properties
|
||||
"""
|
||||
count = properties['count']
|
||||
del properties['count']
|
||||
|
||||
properties['season_count'] = count
|
||||
properties['episode_count'] = count
|
||||
|
||||
return properties
|
||||
|
||||
rebulk.customize_properties = customize_properties
|
||||
|
||||
return rebulk
|
||||
15
lib/guessit/rules/common/__init__.py
Normal file
15
lib/guessit/rules/common/__init__.py
Normal file
@@ -0,0 +1,15 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Common module
|
||||
"""
|
||||
import re
|
||||
|
||||
seps = r' [](){}+*|=-_~#/\\.,;:' # list of tags/words separators
|
||||
seps_no_groups = seps.replace('[](){}', '')
|
||||
seps_no_fs = seps.replace('/', '').replace('\\', '')
|
||||
|
||||
title_seps = r'-+/\|' # separators for title
|
||||
|
||||
dash = (r'-', r'['+re.escape(seps_no_fs)+']') # abbreviation used by many rebulk objects.
|
||||
alt_dash = (r'@', r'['+re.escape(seps_no_fs)+']') # abbreviation used by many rebulk objects.
|
||||
75
lib/guessit/rules/common/comparators.py
Normal file
75
lib/guessit/rules/common/comparators.py
Normal file
@@ -0,0 +1,75 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Comparators
|
||||
"""
|
||||
try:
|
||||
from functools import cmp_to_key
|
||||
except ImportError:
|
||||
from ...backports import cmp_to_key
|
||||
|
||||
|
||||
def marker_comparator_predicate(match):
|
||||
"""
|
||||
Match predicate used in comparator
|
||||
"""
|
||||
return (
|
||||
not match.private
|
||||
and match.name not in ('proper_count', 'title')
|
||||
and not (match.name == 'container' and 'extension' in match.tags)
|
||||
and not (match.name == 'other' and match.value == 'Rip')
|
||||
)
|
||||
|
||||
|
||||
def marker_weight(matches, marker, predicate):
|
||||
"""
|
||||
Compute the comparator weight of a marker
|
||||
:param matches:
|
||||
:param marker:
|
||||
:param predicate:
|
||||
:return:
|
||||
"""
|
||||
return len(set(match.name for match in matches.range(*marker.span, predicate=predicate)))
|
||||
|
||||
|
||||
def marker_comparator(matches, markers, predicate):
|
||||
"""
|
||||
Builds a comparator that returns markers sorted from the most valuable to the less.
|
||||
|
||||
Take the parts where matches count is higher, then when length is higher, then when position is at left.
|
||||
|
||||
:param matches:
|
||||
:type matches:
|
||||
:param markers:
|
||||
:param predicate:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
|
||||
def comparator(marker1, marker2):
|
||||
"""
|
||||
The actual comparator function.
|
||||
"""
|
||||
matches_count = marker_weight(matches, marker2, predicate) - marker_weight(matches, marker1, predicate)
|
||||
if matches_count:
|
||||
return matches_count
|
||||
|
||||
# give preference to rightmost path
|
||||
return markers.index(marker2) - markers.index(marker1)
|
||||
|
||||
return comparator
|
||||
|
||||
|
||||
def marker_sorted(markers, matches, predicate=marker_comparator_predicate):
|
||||
"""
|
||||
Sort markers from matches, from the most valuable to the less.
|
||||
|
||||
:param markers:
|
||||
:type markers:
|
||||
:param matches:
|
||||
:type matches:
|
||||
:param predicate:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
return sorted(markers, key=cmp_to_key(marker_comparator(matches, markers, predicate=predicate)))
|
||||
125
lib/guessit/rules/common/date.py
Normal file
125
lib/guessit/rules/common/date.py
Normal file
@@ -0,0 +1,125 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Date
|
||||
"""
|
||||
from dateutil import parser
|
||||
|
||||
from rebulk.remodule import re
|
||||
|
||||
_dsep = r'[-/ \.]'
|
||||
_dsep_bis = r'[-/ \.x]'
|
||||
|
||||
date_regexps = [
|
||||
re.compile(r'%s((\d{8}))%s' % (_dsep, _dsep), re.IGNORECASE),
|
||||
re.compile(r'%s((\d{6}))%s' % (_dsep, _dsep), re.IGNORECASE),
|
||||
re.compile(r'(?:^|[^\d])((\d{2})%s(\d{1,2})%s(\d{1,2}))(?:$|[^\d])' % (_dsep, _dsep), re.IGNORECASE),
|
||||
re.compile(r'(?:^|[^\d])((\d{1,2})%s(\d{1,2})%s(\d{2}))(?:$|[^\d])' % (_dsep, _dsep), re.IGNORECASE),
|
||||
re.compile(r'(?:^|[^\d])((\d{4})%s(\d{1,2})%s(\d{1,2}))(?:$|[^\d])' % (_dsep_bis, _dsep), re.IGNORECASE),
|
||||
re.compile(r'(?:^|[^\d])((\d{1,2})%s(\d{1,2})%s(\d{4}))(?:$|[^\d])' % (_dsep, _dsep_bis), re.IGNORECASE),
|
||||
re.compile(r'(?:^|[^\d])((\d{1,2}(?:st|nd|rd|th)?%s(?:[a-z]{3,10})%s\d{4}))(?:$|[^\d])' % (_dsep, _dsep),
|
||||
re.IGNORECASE)]
|
||||
|
||||
|
||||
def valid_year(year):
|
||||
"""Check if number is a valid year"""
|
||||
return 1920 <= year < 2030
|
||||
|
||||
|
||||
def _is_int(string):
|
||||
"""
|
||||
Check if the input string is an integer
|
||||
|
||||
:param string:
|
||||
:type string:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
try:
|
||||
int(string)
|
||||
return True
|
||||
except ValueError:
|
||||
return False
|
||||
|
||||
|
||||
def _guess_day_first_parameter(groups): # pylint:disable=inconsistent-return-statements
|
||||
"""
|
||||
If day_first is not defined, use some heuristic to fix it.
|
||||
It helps to solve issues with python dateutils 2.5.3 parser changes.
|
||||
|
||||
:param groups: match groups found for the date
|
||||
:type groups: list of match objects
|
||||
:return: day_first option guessed value
|
||||
:rtype: bool
|
||||
"""
|
||||
|
||||
# If match starts with a long year, then day_first is force to false.
|
||||
if _is_int(groups[0]) and valid_year(int(groups[0][:4])):
|
||||
return False
|
||||
# If match ends with a long year, the day_first is forced to true.
|
||||
if _is_int(groups[-1]) and valid_year(int(groups[-1][-4:])):
|
||||
return True
|
||||
# If match starts with a short year, then day_first is force to false.
|
||||
if _is_int(groups[0]) and int(groups[0][:2]) > 31:
|
||||
return False
|
||||
# If match ends with a short year, then day_first is force to true.
|
||||
if _is_int(groups[-1]) and int(groups[-1][-2:]) > 31:
|
||||
return True
|
||||
|
||||
|
||||
def search_date(string, year_first=None, day_first=None): # pylint:disable=inconsistent-return-statements
|
||||
"""Looks for date patterns, and if found return the date and group span.
|
||||
|
||||
Assumes there are sentinels at the beginning and end of the string that
|
||||
always allow matching a non-digit delimiting the date.
|
||||
|
||||
Year can be defined on two digit only. It will return the nearest possible
|
||||
date from today.
|
||||
|
||||
>>> search_date(' This happened on 2002-04-22. ')
|
||||
(18, 28, datetime.date(2002, 4, 22))
|
||||
|
||||
>>> search_date(' And this on 17-06-1998. ')
|
||||
(13, 23, datetime.date(1998, 6, 17))
|
||||
|
||||
>>> search_date(' no date in here ')
|
||||
"""
|
||||
for date_re in date_regexps:
|
||||
search_match = date_re.search(string)
|
||||
if not search_match:
|
||||
continue
|
||||
|
||||
start, end = search_match.start(1), search_match.end(1)
|
||||
groups = search_match.groups()[1:]
|
||||
match = '-'.join(groups)
|
||||
|
||||
if match is None:
|
||||
continue
|
||||
|
||||
if year_first and day_first is None:
|
||||
day_first = False
|
||||
|
||||
if day_first is None:
|
||||
day_first = _guess_day_first_parameter(groups)
|
||||
|
||||
# If day_first/year_first is undefined, parse is made using both possible values.
|
||||
yearfirst_opts = [False, True]
|
||||
if year_first is not None:
|
||||
yearfirst_opts = [year_first]
|
||||
|
||||
dayfirst_opts = [True, False]
|
||||
if day_first is not None:
|
||||
dayfirst_opts = [day_first]
|
||||
|
||||
kwargs_list = ({'dayfirst': d, 'yearfirst': y}
|
||||
for d in dayfirst_opts for y in yearfirst_opts)
|
||||
for kwargs in kwargs_list:
|
||||
try:
|
||||
date = parser.parse(match, **kwargs)
|
||||
except (ValueError, TypeError): # pragma: no cover
|
||||
# see https://bugs.launchpad.net/dateutil/+bug/1247643
|
||||
date = None
|
||||
|
||||
# check date plausibility
|
||||
if date and valid_year(date.year): # pylint:disable=no-member
|
||||
return start, end, date.date() # pylint:disable=no-member
|
||||
53
lib/guessit/rules/common/expected.py
Normal file
53
lib/guessit/rules/common/expected.py
Normal file
@@ -0,0 +1,53 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Expected property factory
|
||||
"""
|
||||
import re
|
||||
|
||||
from rebulk import Rebulk
|
||||
from rebulk.utils import find_all
|
||||
|
||||
from . import dash, seps
|
||||
|
||||
|
||||
def build_expected_function(context_key):
|
||||
"""
|
||||
Creates a expected property function
|
||||
:param context_key:
|
||||
:type context_key:
|
||||
:param cleanup:
|
||||
:type cleanup:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
|
||||
def expected(input_string, context):
|
||||
"""
|
||||
Expected property functional pattern.
|
||||
:param input_string:
|
||||
:type input_string:
|
||||
:param context:
|
||||
:type context:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
ret = []
|
||||
for search in context.get(context_key):
|
||||
if search.startswith('re:'):
|
||||
search = search[3:]
|
||||
search = search.replace(' ', '-')
|
||||
matches = Rebulk().regex(search, abbreviations=[dash], flags=re.IGNORECASE) \
|
||||
.matches(input_string, context)
|
||||
for match in matches:
|
||||
ret.append(match.span)
|
||||
else:
|
||||
value = search
|
||||
for sep in seps:
|
||||
input_string = input_string.replace(sep, ' ')
|
||||
search = search.replace(sep, ' ')
|
||||
for start in find_all(input_string, search, ignore_case=True):
|
||||
ret.append({'start': start, 'end': start + len(search), 'value': value})
|
||||
return ret
|
||||
|
||||
return expected
|
||||
136
lib/guessit/rules/common/formatters.py
Normal file
136
lib/guessit/rules/common/formatters.py
Normal file
@@ -0,0 +1,136 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Formatters
|
||||
"""
|
||||
from rebulk.formatters import formatters
|
||||
from rebulk.remodule import re
|
||||
from . import seps
|
||||
|
||||
_excluded_clean_chars = ',:;-/\\'
|
||||
clean_chars = ""
|
||||
for sep in seps:
|
||||
if sep not in _excluded_clean_chars:
|
||||
clean_chars += sep
|
||||
|
||||
|
||||
def _potential_before(i, input_string):
|
||||
"""
|
||||
Check if the character at position i can be a potential single char separator considering what's before it.
|
||||
|
||||
:param i:
|
||||
:type i: int
|
||||
:param input_string:
|
||||
:type input_string: str
|
||||
:return:
|
||||
:rtype: bool
|
||||
"""
|
||||
return i - 1 >= 0 and input_string[i] in seps and input_string[i - 2] in seps and input_string[i - 1] not in seps
|
||||
|
||||
|
||||
def _potential_after(i, input_string):
|
||||
"""
|
||||
Check if the character at position i can be a potential single char separator considering what's after it.
|
||||
|
||||
:param i:
|
||||
:type i: int
|
||||
:param input_string:
|
||||
:type input_string: str
|
||||
:return:
|
||||
:rtype: bool
|
||||
"""
|
||||
return i + 2 >= len(input_string) or \
|
||||
input_string[i + 2] == input_string[i] and input_string[i + 1] not in seps
|
||||
|
||||
|
||||
def cleanup(input_string):
|
||||
"""
|
||||
Removes and strip separators from input_string (but keep ',;' characters)
|
||||
|
||||
It also keep separators for single characters (Mavels Agents of S.H.I.E.L.D.)
|
||||
|
||||
:param input_string:
|
||||
:type input_string: str
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
clean_string = input_string
|
||||
for char in clean_chars:
|
||||
clean_string = clean_string.replace(char, ' ')
|
||||
|
||||
# Restore input separator if they separate single characters.
|
||||
# Useful for Mavels Agents of S.H.I.E.L.D.
|
||||
# https://github.com/guessit-io/guessit/issues/278
|
||||
|
||||
indices = [i for i, letter in enumerate(clean_string) if letter in seps]
|
||||
|
||||
dots = set()
|
||||
if indices:
|
||||
clean_list = list(clean_string)
|
||||
|
||||
potential_indices = []
|
||||
|
||||
for i in indices:
|
||||
if _potential_before(i, input_string) and _potential_after(i, input_string):
|
||||
potential_indices.append(i)
|
||||
|
||||
replace_indices = []
|
||||
|
||||
for potential_index in potential_indices:
|
||||
if potential_index - 2 in potential_indices or potential_index + 2 in potential_indices:
|
||||
replace_indices.append(potential_index)
|
||||
|
||||
if replace_indices:
|
||||
for replace_index in replace_indices:
|
||||
dots.add(input_string[replace_index])
|
||||
clean_list[replace_index] = input_string[replace_index]
|
||||
clean_string = ''.join(clean_list)
|
||||
|
||||
clean_string = strip(clean_string, ''.join([c for c in seps if c not in dots]))
|
||||
|
||||
clean_string = re.sub(' +', ' ', clean_string)
|
||||
return clean_string
|
||||
|
||||
|
||||
def strip(input_string, chars=seps):
|
||||
"""
|
||||
Strip separators from input_string
|
||||
:param input_string:
|
||||
:param chars:
|
||||
:type input_string:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
return input_string.strip(chars)
|
||||
|
||||
|
||||
def raw_cleanup(raw):
|
||||
"""
|
||||
Cleanup a raw value to perform raw comparison
|
||||
:param raw:
|
||||
:type raw:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
return formatters(cleanup, strip)(raw.lower())
|
||||
|
||||
|
||||
def reorder_title(title, articles=('the',), separators=(',', ', ')):
|
||||
"""
|
||||
Reorder the title
|
||||
:param title:
|
||||
:type title:
|
||||
:param articles:
|
||||
:type articles:
|
||||
:param separators:
|
||||
:type separators:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
ltitle = title.lower()
|
||||
for article in articles:
|
||||
for separator in separators:
|
||||
suffix = separator + article
|
||||
if ltitle[-len(suffix):] == suffix:
|
||||
return title[-len(suffix) + len(separator):] + ' ' + title[:-len(suffix)]
|
||||
return title
|
||||
165
lib/guessit/rules/common/numeral.py
Normal file
165
lib/guessit/rules/common/numeral.py
Normal file
@@ -0,0 +1,165 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
parse numeral from various formats
|
||||
"""
|
||||
from rebulk.remodule import re
|
||||
|
||||
digital_numeral = r'\d{1,4}'
|
||||
|
||||
roman_numeral = r'(?=[MCDLXVI]+)M{0,4}(?:CM|CD|D?C{0,3})(?:XC|XL|L?X{0,3})(?:IX|IV|V?I{0,3})'
|
||||
|
||||
english_word_numeral_list = [
|
||||
'zero', 'one', 'two', 'three', 'four', 'five', 'six', 'seven', 'eight', 'nine', 'ten',
|
||||
'eleven', 'twelve', 'thirteen', 'fourteen', 'fifteen', 'sixteen', 'seventeen', 'eighteen', 'nineteen', 'twenty'
|
||||
]
|
||||
|
||||
french_word_numeral_list = [
|
||||
'zéro', 'un', 'deux', 'trois', 'quatre', 'cinq', 'six', 'sept', 'huit', 'neuf', 'dix',
|
||||
'onze', 'douze', 'treize', 'quatorze', 'quinze', 'seize', 'dix-sept', 'dix-huit', 'dix-neuf', 'vingt'
|
||||
]
|
||||
|
||||
french_alt_word_numeral_list = [
|
||||
'zero', 'une', 'deux', 'trois', 'quatre', 'cinq', 'six', 'sept', 'huit', 'neuf', 'dix',
|
||||
'onze', 'douze', 'treize', 'quatorze', 'quinze', 'seize', 'dixsept', 'dixhuit', 'dixneuf', 'vingt'
|
||||
]
|
||||
|
||||
|
||||
def __build_word_numeral(*args):
|
||||
"""
|
||||
Build word numeral regexp from list.
|
||||
|
||||
:param args:
|
||||
:type args:
|
||||
:param kwargs:
|
||||
:type kwargs:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
re_ = None
|
||||
for word_list in args:
|
||||
for word in word_list:
|
||||
if not re_:
|
||||
re_ = r'(?:(?=\w+)'
|
||||
else:
|
||||
re_ += '|'
|
||||
re_ += word
|
||||
re_ += ')'
|
||||
return re_
|
||||
|
||||
|
||||
word_numeral = __build_word_numeral(english_word_numeral_list, french_word_numeral_list, french_alt_word_numeral_list)
|
||||
|
||||
numeral = '(?:' + digital_numeral + '|' + roman_numeral + '|' + word_numeral + ')'
|
||||
|
||||
__romanNumeralMap = (
|
||||
('M', 1000),
|
||||
('CM', 900),
|
||||
('D', 500),
|
||||
('CD', 400),
|
||||
('C', 100),
|
||||
('XC', 90),
|
||||
('L', 50),
|
||||
('XL', 40),
|
||||
('X', 10),
|
||||
('IX', 9),
|
||||
('V', 5),
|
||||
('IV', 4),
|
||||
('I', 1)
|
||||
)
|
||||
|
||||
__romanNumeralPattern = re.compile('^' + roman_numeral + '$')
|
||||
|
||||
|
||||
def __parse_roman(value):
|
||||
"""
|
||||
convert Roman numeral to integer
|
||||
|
||||
:param value: Value to parse
|
||||
:type value: string
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
if not __romanNumeralPattern.search(value):
|
||||
raise ValueError('Invalid Roman numeral: %s' % value)
|
||||
|
||||
result = 0
|
||||
index = 0
|
||||
for num, integer in __romanNumeralMap:
|
||||
while value[index:index + len(num)] == num:
|
||||
result += integer
|
||||
index += len(num)
|
||||
return result
|
||||
|
||||
|
||||
def __parse_word(value):
|
||||
"""
|
||||
Convert Word numeral to integer
|
||||
|
||||
:param value: Value to parse
|
||||
:type value: string
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
for word_list in [english_word_numeral_list, french_word_numeral_list, french_alt_word_numeral_list]:
|
||||
try:
|
||||
return word_list.index(value.lower())
|
||||
except ValueError:
|
||||
pass
|
||||
raise ValueError # pragma: no cover
|
||||
|
||||
|
||||
_clean_re = re.compile(r'[^\d]*(\d+)[^\d]*')
|
||||
|
||||
|
||||
def parse_numeral(value, int_enabled=True, roman_enabled=True, word_enabled=True, clean=True):
|
||||
"""
|
||||
Parse a numeric value into integer.
|
||||
|
||||
:param value: Value to parse. Can be an integer, roman numeral or word.
|
||||
:type value: string
|
||||
:param int_enabled:
|
||||
:type int_enabled:
|
||||
:param roman_enabled:
|
||||
:type roman_enabled:
|
||||
:param word_enabled:
|
||||
:type word_enabled:
|
||||
:param clean:
|
||||
:type clean:
|
||||
:return: Numeric value, or None if value can't be parsed
|
||||
:rtype: int
|
||||
"""
|
||||
# pylint: disable=too-many-branches
|
||||
if int_enabled:
|
||||
try:
|
||||
if clean:
|
||||
match = _clean_re.match(value)
|
||||
if match:
|
||||
clean_value = match.group(1)
|
||||
return int(clean_value)
|
||||
return int(value)
|
||||
except ValueError:
|
||||
pass
|
||||
if roman_enabled:
|
||||
try:
|
||||
if clean:
|
||||
for word in value.split():
|
||||
try:
|
||||
return __parse_roman(word.upper())
|
||||
except ValueError:
|
||||
pass
|
||||
return __parse_roman(value)
|
||||
except ValueError:
|
||||
pass
|
||||
if word_enabled:
|
||||
try:
|
||||
if clean:
|
||||
for word in value.split():
|
||||
try:
|
||||
return __parse_word(word)
|
||||
except ValueError: # pragma: no cover
|
||||
pass
|
||||
return __parse_word(value) # pragma: no cover
|
||||
except ValueError: # pragma: no cover
|
||||
pass
|
||||
raise ValueError('Invalid numeral: ' + value) # pragma: no cover
|
||||
27
lib/guessit/rules/common/pattern.py
Normal file
27
lib/guessit/rules/common/pattern.py
Normal file
@@ -0,0 +1,27 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Pattern utility functions
|
||||
"""
|
||||
|
||||
|
||||
def is_disabled(context, name):
|
||||
"""Whether a specific pattern is disabled.
|
||||
|
||||
The context object might define an inclusion list (includes) or an exclusion list (excludes)
|
||||
A pattern is considered disabled if it's found in the exclusion list or
|
||||
it's not found in the inclusion list and the inclusion list is not empty or not defined.
|
||||
|
||||
:param context:
|
||||
:param name:
|
||||
:return:
|
||||
"""
|
||||
if not context:
|
||||
return False
|
||||
|
||||
excludes = context.get('excludes')
|
||||
if excludes and name in excludes:
|
||||
return True
|
||||
|
||||
includes = context.get('includes')
|
||||
return includes and name not in includes
|
||||
106
lib/guessit/rules/common/quantity.py
Normal file
106
lib/guessit/rules/common/quantity.py
Normal file
@@ -0,0 +1,106 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Quantities: Size
|
||||
"""
|
||||
import re
|
||||
from abc import abstractmethod
|
||||
|
||||
import six
|
||||
|
||||
from ..common import seps
|
||||
|
||||
|
||||
class Quantity(object):
|
||||
"""
|
||||
Represent a quantity object with magnitude and units.
|
||||
"""
|
||||
|
||||
parser_re = re.compile(r'(?P<magnitude>\d+(?:[.]\d+)?)(?P<units>[^\d]+)?')
|
||||
|
||||
def __init__(self, magnitude, units):
|
||||
self.magnitude = magnitude
|
||||
self.units = units
|
||||
|
||||
@classmethod
|
||||
@abstractmethod
|
||||
def parse_units(cls, value):
|
||||
"""
|
||||
Parse a string to a proper unit notation.
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
@classmethod
|
||||
def fromstring(cls, string):
|
||||
"""
|
||||
Parse the string into a quantity object.
|
||||
:param string:
|
||||
:return:
|
||||
"""
|
||||
values = cls.parser_re.match(string).groupdict()
|
||||
try:
|
||||
magnitude = int(values['magnitude'])
|
||||
except ValueError:
|
||||
magnitude = float(values['magnitude'])
|
||||
units = cls.parse_units(values['units'])
|
||||
|
||||
return cls(magnitude, units)
|
||||
|
||||
def __hash__(self):
|
||||
return hash(str(self))
|
||||
|
||||
def __eq__(self, other):
|
||||
if isinstance(other, six.string_types):
|
||||
return str(self) == other
|
||||
if not isinstance(other, self.__class__):
|
||||
return NotImplemented
|
||||
return self.magnitude == other.magnitude and self.units == other.units
|
||||
|
||||
def __ne__(self, other):
|
||||
return not self == other
|
||||
|
||||
def __repr__(self):
|
||||
return '<{0} [{1}]>'.format(self.__class__.__name__, self)
|
||||
|
||||
def __str__(self):
|
||||
return '{0}{1}'.format(self.magnitude, self.units)
|
||||
|
||||
|
||||
class Size(Quantity):
|
||||
"""
|
||||
Represent size.
|
||||
|
||||
e.g.: 1.1GB, 300MB
|
||||
"""
|
||||
|
||||
@classmethod
|
||||
def parse_units(cls, value):
|
||||
return value.strip(seps).upper()
|
||||
|
||||
|
||||
class BitRate(Quantity):
|
||||
"""
|
||||
Represent bit rate.
|
||||
|
||||
e.g.: 320Kbps, 1.5Mbps
|
||||
"""
|
||||
|
||||
@classmethod
|
||||
def parse_units(cls, value):
|
||||
value = value.strip(seps).capitalize()
|
||||
for token in ('bits', 'bit'):
|
||||
value = value.replace(token, 'bps')
|
||||
|
||||
return value
|
||||
|
||||
|
||||
class FrameRate(Quantity):
|
||||
"""
|
||||
Represent frame rate.
|
||||
|
||||
e.g.: 24fps, 60fps
|
||||
"""
|
||||
|
||||
@classmethod
|
||||
def parse_units(cls, value):
|
||||
return 'fps'
|
||||
74
lib/guessit/rules/common/validators.py
Normal file
74
lib/guessit/rules/common/validators.py
Normal file
@@ -0,0 +1,74 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Validators
|
||||
"""
|
||||
from functools import partial
|
||||
|
||||
from rebulk.validators import chars_before, chars_after, chars_surround
|
||||
from . import seps
|
||||
|
||||
seps_before = partial(chars_before, seps)
|
||||
seps_after = partial(chars_after, seps)
|
||||
seps_surround = partial(chars_surround, seps)
|
||||
|
||||
|
||||
def int_coercable(string):
|
||||
"""
|
||||
Check if string can be coerced to int
|
||||
:param string:
|
||||
:type string:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
try:
|
||||
int(string)
|
||||
return True
|
||||
except ValueError:
|
||||
return False
|
||||
|
||||
|
||||
def and_(*validators):
|
||||
"""
|
||||
Compose validators functions
|
||||
:param validators:
|
||||
:type validators:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
def composed(string):
|
||||
"""
|
||||
Composed validators function
|
||||
:param string:
|
||||
:type string:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
for validator in validators:
|
||||
if not validator(string):
|
||||
return False
|
||||
return True
|
||||
return composed
|
||||
|
||||
|
||||
def or_(*validators):
|
||||
"""
|
||||
Compose validators functions
|
||||
:param validators:
|
||||
:type validators:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
def composed(string):
|
||||
"""
|
||||
Composed validators function
|
||||
:param string:
|
||||
:type string:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
for validator in validators:
|
||||
if validator(string):
|
||||
return True
|
||||
return False
|
||||
return composed
|
||||
34
lib/guessit/rules/common/words.py
Normal file
34
lib/guessit/rules/common/words.py
Normal file
@@ -0,0 +1,34 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Words utils
|
||||
"""
|
||||
from collections import namedtuple
|
||||
|
||||
from . import seps
|
||||
|
||||
_Word = namedtuple('_Word', ['span', 'value'])
|
||||
|
||||
|
||||
def iter_words(string):
|
||||
"""
|
||||
Iterate on all words in a string
|
||||
:param string:
|
||||
:type string:
|
||||
:return:
|
||||
:rtype: iterable[str]
|
||||
"""
|
||||
i = 0
|
||||
last_sep_index = -1
|
||||
inside_word = False
|
||||
for char in string:
|
||||
if ord(char) < 128 and char in seps: # Make sure we don't exclude unicode characters.
|
||||
if inside_word:
|
||||
yield _Word(span=(last_sep_index+1, i), value=string[last_sep_index+1:i])
|
||||
inside_word = False
|
||||
last_sep_index = i
|
||||
else:
|
||||
inside_word = True
|
||||
i += 1
|
||||
if inside_word:
|
||||
yield _Word(span=(last_sep_index+1, i), value=string[last_sep_index+1:i])
|
||||
5
lib/guessit/rules/markers/__init__.py
Normal file
5
lib/guessit/rules/markers/__init__.py
Normal file
@@ -0,0 +1,5 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Markers
|
||||
"""
|
||||
52
lib/guessit/rules/markers/groups.py
Normal file
52
lib/guessit/rules/markers/groups.py
Normal file
@@ -0,0 +1,52 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Groups markers (...), [...] and {...}
|
||||
"""
|
||||
from rebulk import Rebulk
|
||||
|
||||
|
||||
def groups(config):
|
||||
"""
|
||||
Builder for rebulk object.
|
||||
|
||||
:param config: rule configuration
|
||||
:type config: dict
|
||||
:return: Created Rebulk object
|
||||
:rtype: Rebulk
|
||||
"""
|
||||
rebulk = Rebulk()
|
||||
rebulk.defaults(name="group", marker=True)
|
||||
|
||||
starting = config['starting']
|
||||
ending = config['ending']
|
||||
|
||||
def mark_groups(input_string):
|
||||
"""
|
||||
Functional pattern to mark groups (...), [...] and {...}.
|
||||
|
||||
:param input_string:
|
||||
:return:
|
||||
"""
|
||||
openings = ([], [], [])
|
||||
i = 0
|
||||
|
||||
ret = []
|
||||
for char in input_string:
|
||||
start_type = starting.find(char)
|
||||
if start_type > -1:
|
||||
openings[start_type].append(i)
|
||||
|
||||
i += 1
|
||||
|
||||
end_type = ending.find(char)
|
||||
if end_type > -1:
|
||||
try:
|
||||
start_index = openings[end_type].pop()
|
||||
ret.append((start_index, i))
|
||||
except IndexError:
|
||||
pass
|
||||
return ret
|
||||
|
||||
rebulk.functional(mark_groups)
|
||||
return rebulk
|
||||
47
lib/guessit/rules/markers/path.py
Normal file
47
lib/guessit/rules/markers/path.py
Normal file
@@ -0,0 +1,47 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Path markers
|
||||
"""
|
||||
from rebulk import Rebulk
|
||||
|
||||
from rebulk.utils import find_all
|
||||
|
||||
|
||||
def path(config): # pylint:disable=unused-argument
|
||||
"""
|
||||
Builder for rebulk object.
|
||||
|
||||
:param config: rule configuration
|
||||
:type config: dict
|
||||
:return: Created Rebulk object
|
||||
:rtype: Rebulk
|
||||
"""
|
||||
rebulk = Rebulk()
|
||||
rebulk.defaults(name="path", marker=True)
|
||||
|
||||
def mark_path(input_string, context):
|
||||
"""
|
||||
Functional pattern to mark path elements.
|
||||
|
||||
:param input_string:
|
||||
:param context:
|
||||
:return:
|
||||
"""
|
||||
ret = []
|
||||
if context.get('name_only', False):
|
||||
ret.append((0, len(input_string)))
|
||||
else:
|
||||
indices = list(find_all(input_string, '/'))
|
||||
indices += list(find_all(input_string, '\\'))
|
||||
indices += [-1, len(input_string)]
|
||||
|
||||
indices.sort()
|
||||
|
||||
for i in range(0, len(indices) - 1):
|
||||
ret.append((indices[i] + 1, indices[i + 1]))
|
||||
|
||||
return ret
|
||||
|
||||
rebulk.functional(mark_path)
|
||||
return rebulk
|
||||
20
lib/guessit/rules/match_processors.py
Normal file
20
lib/guessit/rules/match_processors.py
Normal file
@@ -0,0 +1,20 @@
|
||||
"""
|
||||
Match processors
|
||||
"""
|
||||
from guessit.rules.common import seps
|
||||
|
||||
|
||||
def strip(match, chars=seps):
|
||||
"""
|
||||
Strip given characters from match.
|
||||
|
||||
:param chars:
|
||||
:param match:
|
||||
:return:
|
||||
"""
|
||||
while match.input_string[match.start] in chars:
|
||||
match.start += 1
|
||||
while match.input_string[match.end - 1] in chars:
|
||||
match.end -= 1
|
||||
if not match:
|
||||
return False
|
||||
259
lib/guessit/rules/processors.py
Normal file
259
lib/guessit/rules/processors.py
Normal file
@@ -0,0 +1,259 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Processors
|
||||
"""
|
||||
from collections import defaultdict
|
||||
import copy
|
||||
|
||||
import six
|
||||
|
||||
from rebulk import Rebulk, Rule, CustomRule, POST_PROCESS, PRE_PROCESS, AppendMatch, RemoveMatch
|
||||
|
||||
from .common import seps_no_groups
|
||||
from .common.formatters import cleanup
|
||||
from .common.comparators import marker_sorted
|
||||
from .common.date import valid_year
|
||||
from .common.words import iter_words
|
||||
|
||||
|
||||
class EnlargeGroupMatches(CustomRule):
|
||||
"""
|
||||
Enlarge matches that are starting and/or ending group to include brackets in their span.
|
||||
"""
|
||||
priority = PRE_PROCESS
|
||||
|
||||
def when(self, matches, context):
|
||||
starting = []
|
||||
ending = []
|
||||
|
||||
for group in matches.markers.named('group'):
|
||||
for match in matches.starting(group.start + 1):
|
||||
starting.append(match)
|
||||
|
||||
for match in matches.ending(group.end - 1):
|
||||
ending.append(match)
|
||||
|
||||
if starting or ending:
|
||||
return starting, ending
|
||||
return False
|
||||
|
||||
def then(self, matches, when_response, context):
|
||||
starting, ending = when_response
|
||||
for match in starting:
|
||||
matches.remove(match)
|
||||
match.start -= 1
|
||||
match.raw_start += 1
|
||||
matches.append(match)
|
||||
|
||||
for match in ending:
|
||||
matches.remove(match)
|
||||
match.end += 1
|
||||
match.raw_end -= 1
|
||||
matches.append(match)
|
||||
|
||||
|
||||
class EquivalentHoles(Rule):
|
||||
"""
|
||||
Creates equivalent matches for holes that have same values than existing (case insensitive)
|
||||
"""
|
||||
priority = POST_PROCESS
|
||||
consequence = AppendMatch
|
||||
|
||||
def when(self, matches, context):
|
||||
new_matches = []
|
||||
|
||||
for filepath in marker_sorted(matches.markers.named('path'), matches):
|
||||
holes = matches.holes(start=filepath.start, end=filepath.end, formatter=cleanup)
|
||||
for name in matches.names:
|
||||
for hole in list(holes):
|
||||
for current_match in matches.named(name):
|
||||
if isinstance(current_match.value, six.string_types) and \
|
||||
hole.value.lower() == current_match.value.lower():
|
||||
if 'equivalent-ignore' in current_match.tags:
|
||||
continue
|
||||
new_value = _preferred_string(hole.value, current_match.value)
|
||||
if hole.value != new_value:
|
||||
hole.value = new_value
|
||||
if current_match.value != new_value:
|
||||
current_match.value = new_value
|
||||
hole.name = name
|
||||
hole.tags = ['equivalent']
|
||||
new_matches.append(hole)
|
||||
if hole in holes:
|
||||
holes.remove(hole)
|
||||
|
||||
return new_matches
|
||||
|
||||
|
||||
class RemoveAmbiguous(Rule):
|
||||
"""
|
||||
If multiple matches are found with same name and different values, keep the one in the most valuable filepart.
|
||||
Also keep others match with same name and values than those kept ones.
|
||||
"""
|
||||
|
||||
priority = POST_PROCESS
|
||||
consequence = RemoveMatch
|
||||
|
||||
def __init__(self, sort_function=marker_sorted, predicate=None):
|
||||
super(RemoveAmbiguous, self).__init__()
|
||||
self.sort_function = sort_function
|
||||
self.predicate = predicate
|
||||
|
||||
def when(self, matches, context):
|
||||
fileparts = self.sort_function(matches.markers.named('path'), matches)
|
||||
|
||||
previous_fileparts_names = set()
|
||||
values = defaultdict(list)
|
||||
|
||||
to_remove = []
|
||||
for filepart in fileparts:
|
||||
filepart_matches = matches.range(filepart.start, filepart.end, predicate=self.predicate)
|
||||
|
||||
filepart_names = set()
|
||||
for match in filepart_matches:
|
||||
filepart_names.add(match.name)
|
||||
if match.name in previous_fileparts_names:
|
||||
if match.value not in values[match.name]:
|
||||
to_remove.append(match)
|
||||
else:
|
||||
if match.value not in values[match.name]:
|
||||
values[match.name].append(match.value)
|
||||
|
||||
previous_fileparts_names.update(filepart_names)
|
||||
|
||||
return to_remove
|
||||
|
||||
|
||||
class RemoveLessSpecificSeasonEpisode(RemoveAmbiguous):
|
||||
"""
|
||||
If multiple season/episodes matches are found with different values,
|
||||
keep the one tagged as 'SxxExx' or in the rightmost filepart.
|
||||
"""
|
||||
def __init__(self, name):
|
||||
super(RemoveLessSpecificSeasonEpisode, self).__init__(
|
||||
sort_function=(lambda markers, matches:
|
||||
marker_sorted(list(reversed(markers)), matches,
|
||||
lambda match: match.name == name and 'SxxExx' in match.tags)),
|
||||
predicate=lambda match: match.name == name)
|
||||
|
||||
|
||||
def _preferred_string(value1, value2): # pylint:disable=too-many-return-statements
|
||||
"""
|
||||
Retrieves preferred title from both values.
|
||||
:param value1:
|
||||
:type value1: str
|
||||
:param value2:
|
||||
:type value2: str
|
||||
:return: The preferred title
|
||||
:rtype: str
|
||||
"""
|
||||
if value1 == value2:
|
||||
return value1
|
||||
if value1.istitle() and not value2.istitle():
|
||||
return value1
|
||||
if not value1.isupper() and value2.isupper():
|
||||
return value1
|
||||
if not value1.isupper() and value1[0].isupper() and not value2[0].isupper():
|
||||
return value1
|
||||
if _count_title_words(value1) > _count_title_words(value2):
|
||||
return value1
|
||||
return value2
|
||||
|
||||
|
||||
def _count_title_words(value):
|
||||
"""
|
||||
Count only many words are titles in value.
|
||||
:param value:
|
||||
:type value:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
ret = 0
|
||||
for word in iter_words(value):
|
||||
if word.value.istitle():
|
||||
ret += 1
|
||||
return ret
|
||||
|
||||
|
||||
class SeasonYear(Rule):
|
||||
"""
|
||||
If a season is a valid year and no year was found, create an match with year.
|
||||
"""
|
||||
priority = POST_PROCESS
|
||||
consequence = AppendMatch
|
||||
|
||||
def when(self, matches, context):
|
||||
ret = []
|
||||
if not matches.named('year'):
|
||||
for season in matches.named('season'):
|
||||
if valid_year(season.value):
|
||||
year = copy.copy(season)
|
||||
year.name = 'year'
|
||||
ret.append(year)
|
||||
return ret
|
||||
|
||||
|
||||
class YearSeason(Rule):
|
||||
"""
|
||||
If a year is found, no season found, and episode is found, create an match with season.
|
||||
"""
|
||||
priority = POST_PROCESS
|
||||
consequence = AppendMatch
|
||||
|
||||
def when(self, matches, context):
|
||||
ret = []
|
||||
if not matches.named('season') and matches.named('episode'):
|
||||
for year in matches.named('year'):
|
||||
season = copy.copy(year)
|
||||
season.name = 'season'
|
||||
ret.append(season)
|
||||
return ret
|
||||
|
||||
|
||||
class Processors(CustomRule):
|
||||
"""
|
||||
Empty rule for ordering post_processing properly.
|
||||
"""
|
||||
priority = POST_PROCESS
|
||||
|
||||
def when(self, matches, context):
|
||||
pass
|
||||
|
||||
def then(self, matches, when_response, context): # pragma: no cover
|
||||
pass
|
||||
|
||||
|
||||
class StripSeparators(CustomRule):
|
||||
"""
|
||||
Strip separators from matches. Keep separators if they are from acronyms, like in ".S.H.I.E.L.D."
|
||||
"""
|
||||
priority = POST_PROCESS
|
||||
|
||||
def when(self, matches, context):
|
||||
return matches
|
||||
|
||||
def then(self, matches, when_response, context): # pragma: no cover
|
||||
for match in matches:
|
||||
for _ in range(0, len(match.span)):
|
||||
if match.raw[0] in seps_no_groups and (len(match.raw) < 3 or match.raw[2] not in seps_no_groups):
|
||||
match.raw_start += 1
|
||||
|
||||
for _ in reversed(range(0, len(match.span))):
|
||||
if match.raw[-1] in seps_no_groups and (len(match.raw) < 3 or match.raw[-3] not in seps_no_groups):
|
||||
match.raw_end -= 1
|
||||
|
||||
|
||||
def processors(config): # pylint:disable=unused-argument
|
||||
"""
|
||||
Builder for rebulk object.
|
||||
|
||||
:param config: rule configuration
|
||||
:type config: dict
|
||||
:return: Created Rebulk object
|
||||
:rtype: Rebulk
|
||||
"""
|
||||
return Rebulk().rules(EnlargeGroupMatches, EquivalentHoles,
|
||||
RemoveLessSpecificSeasonEpisode('season'),
|
||||
RemoveLessSpecificSeasonEpisode('episode'),
|
||||
RemoveAmbiguous, SeasonYear, YearSeason, Processors, StripSeparators)
|
||||
5
lib/guessit/rules/properties/__init__.py
Normal file
5
lib/guessit/rules/properties/__init__.py
Normal file
@@ -0,0 +1,5 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Properties
|
||||
"""
|
||||
235
lib/guessit/rules/properties/audio_codec.py
Normal file
235
lib/guessit/rules/properties/audio_codec.py
Normal file
@@ -0,0 +1,235 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
audio_codec, audio_profile and audio_channels property
|
||||
"""
|
||||
from rebulk import Rebulk, Rule, RemoveMatch
|
||||
from rebulk.remodule import re
|
||||
|
||||
from ..common import dash
|
||||
from ..common.pattern import is_disabled
|
||||
from ..common.validators import seps_before, seps_after
|
||||
|
||||
audio_properties = ['audio_codec', 'audio_profile', 'audio_channels']
|
||||
|
||||
|
||||
def audio_codec(config): # pylint:disable=unused-argument
|
||||
"""
|
||||
Builder for rebulk object.
|
||||
|
||||
:param config: rule configuration
|
||||
:type config: dict
|
||||
:return: Created Rebulk object
|
||||
:rtype: Rebulk
|
||||
"""
|
||||
rebulk = Rebulk()\
|
||||
.regex_defaults(flags=re.IGNORECASE, abbreviations=[dash])\
|
||||
.string_defaults(ignore_case=True)
|
||||
|
||||
def audio_codec_priority(match1, match2):
|
||||
"""
|
||||
Gives priority to audio_codec
|
||||
:param match1:
|
||||
:type match1:
|
||||
:param match2:
|
||||
:type match2:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
if match1.name == 'audio_codec' and match2.name in ['audio_profile', 'audio_channels']:
|
||||
return match2
|
||||
if match1.name in ['audio_profile', 'audio_channels'] and match2.name == 'audio_codec':
|
||||
return match1
|
||||
return '__default__'
|
||||
|
||||
rebulk.defaults(name='audio_codec',
|
||||
conflict_solver=audio_codec_priority,
|
||||
disabled=lambda context: is_disabled(context, 'audio_codec'))
|
||||
|
||||
rebulk.regex("MP3", "LAME", r"LAME(?:\d)+-?(?:\d)+", value="MP3")
|
||||
rebulk.string("MP2", value="MP2")
|
||||
rebulk.regex('Dolby', 'DolbyDigital', 'Dolby-Digital', 'DD', 'AC3D?', value='Dolby Digital')
|
||||
rebulk.regex('Dolby-?Atmos', 'Atmos', value='Dolby Atmos')
|
||||
rebulk.string("AAC", value="AAC")
|
||||
rebulk.string('EAC3', 'DDP', 'DD+', value='Dolby Digital Plus')
|
||||
rebulk.string("Flac", value="FLAC")
|
||||
rebulk.string("DTS", value="DTS")
|
||||
rebulk.regex('DTS-?HD', 'DTS(?=-?MA)', value='DTS-HD',
|
||||
conflict_solver=lambda match, other: other if other.name == 'audio_codec' else '__default__')
|
||||
rebulk.regex('True-?HD', value='Dolby TrueHD')
|
||||
rebulk.string('Opus', value='Opus')
|
||||
rebulk.string('Vorbis', value='Vorbis')
|
||||
rebulk.string('PCM', value='PCM')
|
||||
rebulk.string('LPCM', value='LPCM')
|
||||
|
||||
rebulk.defaults(clear=True,
|
||||
name='audio_profile',
|
||||
disabled=lambda context: is_disabled(context, 'audio_profile'))
|
||||
rebulk.string('MA', value='Master Audio', tags=['audio_profile.rule', 'DTS-HD'])
|
||||
rebulk.string('HR', 'HRA', value='High Resolution Audio', tags=['audio_profile.rule', 'DTS-HD'])
|
||||
rebulk.string('ES', value='Extended Surround', tags=['audio_profile.rule', 'DTS'])
|
||||
rebulk.string('HE', value='High Efficiency', tags=['audio_profile.rule', 'AAC'])
|
||||
rebulk.string('LC', value='Low Complexity', tags=['audio_profile.rule', 'AAC'])
|
||||
rebulk.string('HQ', value='High Quality', tags=['audio_profile.rule', 'Dolby Digital'])
|
||||
rebulk.string('EX', value='EX', tags=['audio_profile.rule', 'Dolby Digital'])
|
||||
|
||||
rebulk.defaults(clear=True,
|
||||
name="audio_channels",
|
||||
disabled=lambda context: is_disabled(context, 'audio_channels'))
|
||||
rebulk.regex('7[01]', value='7.1', validator=seps_after, tags='weak-audio_channels')
|
||||
rebulk.regex('5[01]', value='5.1', validator=seps_after, tags='weak-audio_channels')
|
||||
rebulk.string('20', value='2.0', validator=seps_after, tags='weak-audio_channels')
|
||||
|
||||
for value, items in config.get('audio_channels').items():
|
||||
for item in items:
|
||||
if item.startswith('re:'):
|
||||
rebulk.regex(item[3:], value=value, children=True)
|
||||
else:
|
||||
rebulk.string(item, value=value)
|
||||
|
||||
rebulk.rules(DtsHDRule, DtsRule, AacRule, DolbyDigitalRule, AudioValidatorRule, HqConflictRule,
|
||||
AudioChannelsValidatorRule)
|
||||
|
||||
return rebulk
|
||||
|
||||
|
||||
class AudioValidatorRule(Rule):
|
||||
"""
|
||||
Remove audio properties if not surrounded by separators and not next each others
|
||||
"""
|
||||
priority = 64
|
||||
consequence = RemoveMatch
|
||||
|
||||
def when(self, matches, context):
|
||||
ret = []
|
||||
|
||||
audio_list = matches.range(predicate=lambda match: match.name in audio_properties)
|
||||
for audio in audio_list:
|
||||
if not seps_before(audio):
|
||||
valid_before = matches.range(audio.start - 1, audio.start,
|
||||
lambda match: match.name in audio_properties)
|
||||
if not valid_before:
|
||||
ret.append(audio)
|
||||
continue
|
||||
if not seps_after(audio):
|
||||
valid_after = matches.range(audio.end, audio.end + 1,
|
||||
lambda match: match.name in audio_properties)
|
||||
if not valid_after:
|
||||
ret.append(audio)
|
||||
continue
|
||||
|
||||
return ret
|
||||
|
||||
|
||||
class AudioProfileRule(Rule):
|
||||
"""
|
||||
Abstract rule to validate audio profiles
|
||||
"""
|
||||
priority = 64
|
||||
dependency = AudioValidatorRule
|
||||
consequence = RemoveMatch
|
||||
|
||||
def __init__(self, codec):
|
||||
super(AudioProfileRule, self).__init__()
|
||||
self.codec = codec
|
||||
|
||||
def enabled(self, context):
|
||||
return not is_disabled(context, 'audio_profile')
|
||||
|
||||
def when(self, matches, context):
|
||||
profile_list = matches.named('audio_profile',
|
||||
lambda match: 'audio_profile.rule' in match.tags and
|
||||
self.codec in match.tags)
|
||||
ret = []
|
||||
for profile in profile_list:
|
||||
codec = matches.at_span(profile.span,
|
||||
lambda match: match.name == 'audio_codec' and
|
||||
match.value == self.codec, 0)
|
||||
if not codec:
|
||||
codec = matches.previous(profile,
|
||||
lambda match: match.name == 'audio_codec' and
|
||||
match.value == self.codec)
|
||||
if not codec:
|
||||
codec = matches.next(profile,
|
||||
lambda match: match.name == 'audio_codec' and
|
||||
match.value == self.codec)
|
||||
if not codec:
|
||||
ret.append(profile)
|
||||
if codec:
|
||||
ret.extend(matches.conflicting(profile))
|
||||
return ret
|
||||
|
||||
|
||||
class DtsHDRule(AudioProfileRule):
|
||||
"""
|
||||
Rule to validate DTS-HD profile
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
super(DtsHDRule, self).__init__('DTS-HD')
|
||||
|
||||
|
||||
class DtsRule(AudioProfileRule):
|
||||
"""
|
||||
Rule to validate DTS profile
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
super(DtsRule, self).__init__('DTS')
|
||||
|
||||
|
||||
class AacRule(AudioProfileRule):
|
||||
"""
|
||||
Rule to validate AAC profile
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
super(AacRule, self).__init__('AAC')
|
||||
|
||||
|
||||
class DolbyDigitalRule(AudioProfileRule):
|
||||
"""
|
||||
Rule to validate Dolby Digital profile
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
super(DolbyDigitalRule, self).__init__('Dolby Digital')
|
||||
|
||||
|
||||
class HqConflictRule(Rule):
|
||||
"""
|
||||
Solve conflict between HQ from other property and from audio_profile.
|
||||
"""
|
||||
|
||||
dependency = [DtsHDRule, DtsRule, AacRule, DolbyDigitalRule]
|
||||
consequence = RemoveMatch
|
||||
|
||||
def enabled(self, context):
|
||||
return not is_disabled(context, 'audio_profile')
|
||||
|
||||
def when(self, matches, context):
|
||||
hq_audio = matches.named('audio_profile', lambda m: m.value == 'High Quality')
|
||||
hq_audio_spans = [match.span for match in hq_audio]
|
||||
return matches.named('other', lambda m: m.span in hq_audio_spans)
|
||||
|
||||
|
||||
class AudioChannelsValidatorRule(Rule):
|
||||
"""
|
||||
Remove audio_channel if no audio codec as previous match.
|
||||
"""
|
||||
priority = 128
|
||||
consequence = RemoveMatch
|
||||
|
||||
def enabled(self, context):
|
||||
return not is_disabled(context, 'audio_channels')
|
||||
|
||||
def when(self, matches, context):
|
||||
ret = []
|
||||
|
||||
for audio_channel in matches.tagged('weak-audio_channels'):
|
||||
valid_before = matches.range(audio_channel.start - 1, audio_channel.start,
|
||||
lambda match: match.name == 'audio_codec')
|
||||
if not valid_before:
|
||||
ret.append(audio_channel)
|
||||
|
||||
return ret
|
||||
74
lib/guessit/rules/properties/bit_rate.py
Normal file
74
lib/guessit/rules/properties/bit_rate.py
Normal file
@@ -0,0 +1,74 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
video_bit_rate and audio_bit_rate properties
|
||||
"""
|
||||
import re
|
||||
|
||||
from rebulk import Rebulk
|
||||
from rebulk.rules import Rule, RemoveMatch, RenameMatch
|
||||
|
||||
from ..common import dash, seps
|
||||
from ..common.pattern import is_disabled
|
||||
from ..common.quantity import BitRate
|
||||
from ..common.validators import seps_surround
|
||||
|
||||
|
||||
def bit_rate(config): # pylint:disable=unused-argument
|
||||
"""
|
||||
Builder for rebulk object.
|
||||
|
||||
:param config: rule configuration
|
||||
:type config: dict
|
||||
:return: Created Rebulk object
|
||||
:rtype: Rebulk
|
||||
"""
|
||||
rebulk = Rebulk(disabled=lambda context: (is_disabled(context, 'audio_bit_rate')
|
||||
and is_disabled(context, 'video_bit_rate')))
|
||||
rebulk = rebulk.regex_defaults(flags=re.IGNORECASE, abbreviations=[dash])
|
||||
rebulk.defaults(name='audio_bit_rate', validator=seps_surround)
|
||||
rebulk.regex(r'\d+-?[kmg]b(ps|its?)', r'\d+\.\d+-?[kmg]b(ps|its?)',
|
||||
conflict_solver=(
|
||||
lambda match, other: match
|
||||
if other.name == 'audio_channels' and 'weak-audio_channels' not in other.tags
|
||||
else other
|
||||
),
|
||||
formatter=BitRate.fromstring, tags=['release-group-prefix'])
|
||||
|
||||
rebulk.rules(BitRateTypeRule)
|
||||
|
||||
return rebulk
|
||||
|
||||
|
||||
class BitRateTypeRule(Rule):
|
||||
"""
|
||||
Convert audio bit rate guess into video bit rate.
|
||||
"""
|
||||
consequence = [RenameMatch('video_bit_rate'), RemoveMatch]
|
||||
|
||||
def when(self, matches, context):
|
||||
to_rename = []
|
||||
to_remove = []
|
||||
|
||||
if is_disabled(context, 'audio_bit_rate'):
|
||||
to_remove.extend(matches.named('audio_bit_rate'))
|
||||
else:
|
||||
video_bit_rate_disabled = is_disabled(context, 'video_bit_rate')
|
||||
for match in matches.named('audio_bit_rate'):
|
||||
previous = matches.previous(match, index=0,
|
||||
predicate=lambda m: m.name in ('source', 'screen_size', 'video_codec'))
|
||||
if previous and not matches.holes(previous.end, match.start, predicate=lambda m: m.value.strip(seps)):
|
||||
after = matches.next(match, index=0, predicate=lambda m: m.name == 'audio_codec')
|
||||
if after and not matches.holes(match.end, after.start, predicate=lambda m: m.value.strip(seps)):
|
||||
bitrate = match.value
|
||||
if bitrate.units == 'Kbps' or (bitrate.units == 'Mbps' and bitrate.magnitude < 10):
|
||||
continue
|
||||
|
||||
if video_bit_rate_disabled:
|
||||
to_remove.append(match)
|
||||
else:
|
||||
to_rename.append(match)
|
||||
|
||||
if to_rename or to_remove:
|
||||
return to_rename, to_remove
|
||||
return False
|
||||
56
lib/guessit/rules/properties/bonus.py
Normal file
56
lib/guessit/rules/properties/bonus.py
Normal file
@@ -0,0 +1,56 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
bonus property
|
||||
"""
|
||||
from rebulk.remodule import re
|
||||
|
||||
from rebulk import Rebulk, AppendMatch, Rule
|
||||
|
||||
from .title import TitleFromPosition
|
||||
from ..common.formatters import cleanup
|
||||
from ..common.pattern import is_disabled
|
||||
from ..common.validators import seps_surround
|
||||
|
||||
|
||||
def bonus(config): # pylint:disable=unused-argument
|
||||
"""
|
||||
Builder for rebulk object.
|
||||
|
||||
:param config: rule configuration
|
||||
:type config: dict
|
||||
:return: Created Rebulk object
|
||||
:rtype: Rebulk
|
||||
"""
|
||||
rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'bonus'))
|
||||
rebulk = rebulk.regex_defaults(flags=re.IGNORECASE)
|
||||
|
||||
rebulk.regex(r'x(\d+)', name='bonus', private_parent=True, children=True, formatter=int,
|
||||
validator={'__parent__': seps_surround},
|
||||
validate_all=True,
|
||||
conflict_solver=lambda match, conflicting: match
|
||||
if conflicting.name in ('video_codec', 'episode') and 'weak-episode' not in conflicting.tags
|
||||
else '__default__')
|
||||
|
||||
rebulk.rules(BonusTitleRule)
|
||||
|
||||
return rebulk
|
||||
|
||||
|
||||
class BonusTitleRule(Rule):
|
||||
"""
|
||||
Find bonus title after bonus.
|
||||
"""
|
||||
dependency = TitleFromPosition
|
||||
consequence = AppendMatch
|
||||
|
||||
properties = {'bonus_title': [None]}
|
||||
|
||||
def when(self, matches, context): # pylint:disable=inconsistent-return-statements
|
||||
bonus_number = matches.named('bonus', lambda match: not match.private, index=0)
|
||||
if bonus_number:
|
||||
filepath = matches.markers.at_match(bonus_number, lambda marker: marker.name == 'path', 0)
|
||||
hole = matches.holes(bonus_number.end, filepath.end + 1, formatter=cleanup, index=0)
|
||||
if hole and hole.value:
|
||||
hole.name = 'bonus_title'
|
||||
return hole
|
||||
41
lib/guessit/rules/properties/cds.py
Normal file
41
lib/guessit/rules/properties/cds.py
Normal file
@@ -0,0 +1,41 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
cd and cd_count properties
|
||||
"""
|
||||
from rebulk.remodule import re
|
||||
|
||||
from rebulk import Rebulk
|
||||
|
||||
from ..common import dash
|
||||
from ..common.pattern import is_disabled
|
||||
|
||||
|
||||
def cds(config): # pylint:disable=unused-argument
|
||||
"""
|
||||
Builder for rebulk object.
|
||||
|
||||
:param config: rule configuration
|
||||
:type config: dict
|
||||
:return: Created Rebulk object
|
||||
:rtype: Rebulk
|
||||
"""
|
||||
rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'cd'))
|
||||
rebulk = rebulk.regex_defaults(flags=re.IGNORECASE, abbreviations=[dash])
|
||||
|
||||
rebulk.regex(r'cd-?(?P<cd>\d+)(?:-?of-?(?P<cd_count>\d+))?',
|
||||
validator={'cd': lambda match: 0 < match.value < 100,
|
||||
'cd_count': lambda match: 0 < match.value < 100},
|
||||
formatter={'cd': int, 'cd_count': int},
|
||||
children=True,
|
||||
private_parent=True,
|
||||
properties={'cd': [None], 'cd_count': [None]})
|
||||
rebulk.regex(r'(?P<cd_count>\d+)-?cds?',
|
||||
validator={'cd': lambda match: 0 < match.value < 100,
|
||||
'cd_count': lambda match: 0 < match.value < 100},
|
||||
formatter={'cd_count': int},
|
||||
children=True,
|
||||
private_parent=True,
|
||||
properties={'cd': [None], 'cd_count': [None]})
|
||||
|
||||
return rebulk
|
||||
61
lib/guessit/rules/properties/container.py
Normal file
61
lib/guessit/rules/properties/container.py
Normal file
@@ -0,0 +1,61 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
container property
|
||||
"""
|
||||
from rebulk.remodule import re
|
||||
|
||||
from rebulk import Rebulk
|
||||
|
||||
from ..common import seps
|
||||
from ..common.pattern import is_disabled
|
||||
from ..common.validators import seps_surround
|
||||
from ...reutils import build_or_pattern
|
||||
|
||||
|
||||
def container(config):
|
||||
"""
|
||||
Builder for rebulk object.
|
||||
|
||||
:param config: rule configuration
|
||||
:type config: dict
|
||||
:return: Created Rebulk object
|
||||
:rtype: Rebulk
|
||||
"""
|
||||
rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'container'))
|
||||
rebulk = rebulk.regex_defaults(flags=re.IGNORECASE).string_defaults(ignore_case=True)
|
||||
rebulk.defaults(name='container',
|
||||
formatter=lambda value: value.strip(seps),
|
||||
tags=['extension'],
|
||||
conflict_solver=lambda match, other: other
|
||||
if other.name in ('source', 'video_codec') or
|
||||
other.name == 'container' and 'extension' not in other.tags
|
||||
else '__default__')
|
||||
|
||||
subtitles = config['subtitles']
|
||||
info = config['info']
|
||||
videos = config['videos']
|
||||
torrent = config['torrent']
|
||||
nzb = config['nzb']
|
||||
|
||||
rebulk.regex(r'\.'+build_or_pattern(subtitles)+'$', exts=subtitles, tags=['extension', 'subtitle'])
|
||||
rebulk.regex(r'\.'+build_or_pattern(info)+'$', exts=info, tags=['extension', 'info'])
|
||||
rebulk.regex(r'\.'+build_or_pattern(videos)+'$', exts=videos, tags=['extension', 'video'])
|
||||
rebulk.regex(r'\.'+build_or_pattern(torrent)+'$', exts=torrent, tags=['extension', 'torrent'])
|
||||
rebulk.regex(r'\.'+build_or_pattern(nzb)+'$', exts=nzb, tags=['extension', 'nzb'])
|
||||
|
||||
rebulk.defaults(clear=True,
|
||||
name='container',
|
||||
validator=seps_surround,
|
||||
formatter=lambda s: s.lower(),
|
||||
conflict_solver=lambda match, other: match
|
||||
if other.name in ('source',
|
||||
'video_codec') or other.name == 'container' and 'extension' in other.tags
|
||||
else '__default__')
|
||||
|
||||
rebulk.string(*[sub for sub in subtitles if sub not in ('sub', 'ass')], tags=['subtitle'])
|
||||
rebulk.string(*videos, tags=['video'])
|
||||
rebulk.string(*torrent, tags=['torrent'])
|
||||
rebulk.string(*nzb, tags=['nzb'])
|
||||
|
||||
return rebulk
|
||||
114
lib/guessit/rules/properties/country.py
Normal file
114
lib/guessit/rules/properties/country.py
Normal file
@@ -0,0 +1,114 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
country property
|
||||
"""
|
||||
# pylint: disable=no-member
|
||||
import babelfish
|
||||
|
||||
from rebulk import Rebulk
|
||||
from ..common.pattern import is_disabled
|
||||
from ..common.words import iter_words
|
||||
|
||||
|
||||
def country(config, common_words):
|
||||
"""
|
||||
Builder for rebulk object.
|
||||
|
||||
:param config: rule configuration
|
||||
:type config: dict
|
||||
:param common_words: common words
|
||||
:type common_words: set
|
||||
:return: Created Rebulk object
|
||||
:rtype: Rebulk
|
||||
"""
|
||||
rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'country'))
|
||||
rebulk = rebulk.defaults(name='country')
|
||||
|
||||
def find_countries(string, context=None):
|
||||
"""
|
||||
Find countries in given string.
|
||||
"""
|
||||
allowed_countries = context.get('allowed_countries') if context else None
|
||||
return CountryFinder(allowed_countries, common_words).find(string)
|
||||
|
||||
rebulk.functional(find_countries,
|
||||
# Prefer language and any other property over country if not US or GB.
|
||||
conflict_solver=lambda match, other: match
|
||||
if other.name != 'language' or match.value not in (babelfish.Country('US'),
|
||||
babelfish.Country('GB'))
|
||||
else other,
|
||||
properties={'country': [None]},
|
||||
disabled=lambda context: not context.get('allowed_countries'))
|
||||
|
||||
babelfish.country_converters['guessit'] = GuessitCountryConverter(config['synonyms'])
|
||||
|
||||
return rebulk
|
||||
|
||||
|
||||
class GuessitCountryConverter(babelfish.CountryReverseConverter): # pylint: disable=missing-docstring
|
||||
def __init__(self, synonyms):
|
||||
self.guessit_exceptions = {}
|
||||
|
||||
for alpha2, synlist in synonyms.items():
|
||||
for syn in synlist:
|
||||
self.guessit_exceptions[syn.lower()] = alpha2
|
||||
|
||||
@property
|
||||
def codes(self): # pylint: disable=missing-docstring
|
||||
return (babelfish.country_converters['name'].codes |
|
||||
frozenset(babelfish.COUNTRIES.values()) |
|
||||
frozenset(self.guessit_exceptions.keys()))
|
||||
|
||||
def convert(self, alpha2):
|
||||
if alpha2 == 'GB':
|
||||
return 'UK'
|
||||
return str(babelfish.Country(alpha2))
|
||||
|
||||
def reverse(self, name): # pylint:disable=arguments-differ
|
||||
# exceptions come first, as they need to override a potential match
|
||||
# with any of the other guessers
|
||||
try:
|
||||
return self.guessit_exceptions[name.lower()]
|
||||
except KeyError:
|
||||
pass
|
||||
|
||||
try:
|
||||
return babelfish.Country(name.upper()).alpha2
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
for conv in [babelfish.Country.fromname]:
|
||||
try:
|
||||
return conv(name).alpha2
|
||||
except babelfish.CountryReverseError:
|
||||
pass
|
||||
|
||||
raise babelfish.CountryReverseError(name)
|
||||
|
||||
|
||||
class CountryFinder(object):
|
||||
"""Helper class to search and return country matches."""
|
||||
|
||||
def __init__(self, allowed_countries, common_words):
|
||||
self.allowed_countries = {l.lower() for l in allowed_countries or []}
|
||||
self.common_words = common_words
|
||||
|
||||
def find(self, string):
|
||||
"""Return all matches for country."""
|
||||
for word_match in iter_words(string.strip().lower()):
|
||||
word = word_match.value
|
||||
if word.lower() in self.common_words:
|
||||
continue
|
||||
|
||||
try:
|
||||
country_object = babelfish.Country.fromguessit(word)
|
||||
if (country_object.name.lower() in self.allowed_countries or
|
||||
country_object.alpha2.lower() in self.allowed_countries):
|
||||
yield self._to_rebulk_match(word_match, country_object)
|
||||
except babelfish.Error:
|
||||
continue
|
||||
|
||||
@classmethod
|
||||
def _to_rebulk_match(cls, word, value):
|
||||
return word.span[0], word.span[1], {'value': value}
|
||||
90
lib/guessit/rules/properties/crc.py
Normal file
90
lib/guessit/rules/properties/crc.py
Normal file
@@ -0,0 +1,90 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
crc and uuid properties
|
||||
"""
|
||||
from rebulk.remodule import re
|
||||
|
||||
from rebulk import Rebulk
|
||||
from ..common.pattern import is_disabled
|
||||
from ..common.validators import seps_surround
|
||||
|
||||
|
||||
def crc(config): # pylint:disable=unused-argument
|
||||
"""
|
||||
Builder for rebulk object.
|
||||
|
||||
:param config: rule configuration
|
||||
:type config: dict
|
||||
:return: Created Rebulk object
|
||||
:rtype: Rebulk
|
||||
"""
|
||||
rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'crc32'))
|
||||
rebulk = rebulk.regex_defaults(flags=re.IGNORECASE)
|
||||
rebulk.defaults(validator=seps_surround)
|
||||
|
||||
rebulk.regex('(?:[a-fA-F]|[0-9]){8}', name='crc32',
|
||||
conflict_solver=lambda match, other: other
|
||||
if other.name in ['episode', 'season']
|
||||
else '__default__')
|
||||
|
||||
rebulk.functional(guess_idnumber, name='uuid',
|
||||
conflict_solver=lambda match, other: match
|
||||
if other.name in ['episode', 'season']
|
||||
else '__default__')
|
||||
return rebulk
|
||||
|
||||
|
||||
_DIGIT = 0
|
||||
_LETTER = 1
|
||||
_OTHER = 2
|
||||
|
||||
_idnum = re.compile(r'(?P<uuid>[a-zA-Z0-9-]{20,})') # 1.0, (0, 0))
|
||||
|
||||
|
||||
def guess_idnumber(string):
|
||||
"""
|
||||
Guess id number function
|
||||
:param string:
|
||||
:type string:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
# pylint:disable=invalid-name
|
||||
ret = []
|
||||
|
||||
matches = list(_idnum.finditer(string))
|
||||
for match in matches:
|
||||
result = match.groupdict()
|
||||
switch_count = 0
|
||||
switch_letter_count = 0
|
||||
letter_count = 0
|
||||
last_letter = None
|
||||
|
||||
last = _LETTER
|
||||
for c in result['uuid']:
|
||||
if c in '0123456789':
|
||||
ci = _DIGIT
|
||||
elif c in 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ':
|
||||
ci = _LETTER
|
||||
if c != last_letter:
|
||||
switch_letter_count += 1
|
||||
last_letter = c
|
||||
letter_count += 1
|
||||
else:
|
||||
ci = _OTHER
|
||||
|
||||
if ci != last:
|
||||
switch_count += 1
|
||||
|
||||
last = ci
|
||||
|
||||
# only return the result as probable if we alternate often between
|
||||
# char type (more likely for hash values than for common words)
|
||||
switch_ratio = float(switch_count) / len(result['uuid'])
|
||||
letters_ratio = (float(switch_letter_count) / letter_count) if letter_count > 0 else 1
|
||||
|
||||
if switch_ratio > 0.4 and letters_ratio > 0.4:
|
||||
ret.append(match.span())
|
||||
|
||||
return ret
|
||||
84
lib/guessit/rules/properties/date.py
Normal file
84
lib/guessit/rules/properties/date.py
Normal file
@@ -0,0 +1,84 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
date and year properties
|
||||
"""
|
||||
from rebulk import Rebulk, RemoveMatch, Rule
|
||||
|
||||
from ..common.date import search_date, valid_year
|
||||
from ..common.pattern import is_disabled
|
||||
from ..common.validators import seps_surround
|
||||
|
||||
|
||||
def date(config): # pylint:disable=unused-argument
|
||||
"""
|
||||
Builder for rebulk object.
|
||||
|
||||
:param config: rule configuration
|
||||
:type config: dict
|
||||
:return: Created Rebulk object
|
||||
:rtype: Rebulk
|
||||
"""
|
||||
rebulk = Rebulk().defaults(validator=seps_surround)
|
||||
|
||||
rebulk.regex(r"\d{4}", name="year", formatter=int,
|
||||
disabled=lambda context: is_disabled(context, 'year'),
|
||||
conflict_solver=lambda match, other: other
|
||||
if other.name in ('episode', 'season') and len(other.raw) < len(match.raw)
|
||||
else '__default__',
|
||||
validator=lambda match: seps_surround(match) and valid_year(match.value))
|
||||
|
||||
def date_functional(string, context): # pylint:disable=inconsistent-return-statements
|
||||
"""
|
||||
Search for date in the string and retrieves match
|
||||
|
||||
:param string:
|
||||
:return:
|
||||
"""
|
||||
|
||||
ret = search_date(string, context.get('date_year_first'), context.get('date_day_first'))
|
||||
if ret:
|
||||
return ret[0], ret[1], {'value': ret[2]}
|
||||
|
||||
rebulk.functional(date_functional, name="date", properties={'date': [None]},
|
||||
disabled=lambda context: is_disabled(context, 'date'),
|
||||
conflict_solver=lambda match, other: other
|
||||
if other.name in ('episode', 'season', 'crc32')
|
||||
else '__default__')
|
||||
|
||||
rebulk.rules(KeepMarkedYearInFilepart)
|
||||
|
||||
return rebulk
|
||||
|
||||
|
||||
class KeepMarkedYearInFilepart(Rule):
|
||||
"""
|
||||
Keep first years marked with [](){} in filepart, or if no year is marked, ensure it won't override titles.
|
||||
"""
|
||||
priority = 64
|
||||
consequence = RemoveMatch
|
||||
|
||||
def enabled(self, context):
|
||||
return not is_disabled(context, 'year')
|
||||
|
||||
def when(self, matches, context):
|
||||
ret = []
|
||||
if len(matches.named('year')) > 1:
|
||||
for filepart in matches.markers.named('path'):
|
||||
years = matches.range(filepart.start, filepart.end, lambda match: match.name == 'year')
|
||||
if len(years) > 1:
|
||||
group_years = []
|
||||
ungroup_years = []
|
||||
for year in years:
|
||||
if matches.markers.at_match(year, lambda marker: marker.name == 'group'):
|
||||
group_years.append(year)
|
||||
else:
|
||||
ungroup_years.append(year)
|
||||
if group_years and ungroup_years:
|
||||
ret.extend(ungroup_years)
|
||||
ret.extend(group_years[1:]) # Keep the first year in marker.
|
||||
elif not group_years:
|
||||
ret.append(ungroup_years[0]) # Keep first year for title.
|
||||
if len(ungroup_years) > 2:
|
||||
ret.extend(ungroup_years[2:])
|
||||
return ret
|
||||
52
lib/guessit/rules/properties/edition.py
Normal file
52
lib/guessit/rules/properties/edition.py
Normal file
@@ -0,0 +1,52 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
edition property
|
||||
"""
|
||||
from rebulk.remodule import re
|
||||
|
||||
from rebulk import Rebulk
|
||||
from ..common import dash
|
||||
from ..common.pattern import is_disabled
|
||||
from ..common.validators import seps_surround
|
||||
|
||||
|
||||
def edition(config): # pylint:disable=unused-argument
|
||||
"""
|
||||
Builder for rebulk object.
|
||||
|
||||
:param config: rule configuration
|
||||
:type config: dict
|
||||
:return: Created Rebulk object
|
||||
:rtype: Rebulk
|
||||
"""
|
||||
rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'edition'))
|
||||
rebulk = rebulk.regex_defaults(flags=re.IGNORECASE, abbreviations=[dash]).string_defaults(ignore_case=True)
|
||||
rebulk.defaults(name='edition', validator=seps_surround)
|
||||
|
||||
rebulk.regex('collector', "collector'?s?-edition", 'edition-collector', value='Collector')
|
||||
rebulk.regex('special-edition', 'edition-special', value='Special',
|
||||
conflict_solver=lambda match, other: other
|
||||
if other.name == 'episode_details' and other.value == 'Special'
|
||||
else '__default__')
|
||||
rebulk.string('se', value='Special', tags='has-neighbor')
|
||||
rebulk.string('ddc', value="Director's Definitive Cut")
|
||||
rebulk.regex('criterion-edition', 'edition-criterion', 'CC', value='Criterion')
|
||||
rebulk.regex('deluxe', 'deluxe-edition', 'edition-deluxe', value='Deluxe')
|
||||
rebulk.regex('limited', 'limited-edition', value='Limited', tags=['has-neighbor', 'release-group-prefix'])
|
||||
rebulk.regex(r'theatrical-cut', r'theatrical-edition', r'theatrical', value='Theatrical')
|
||||
rebulk.regex(r"director'?s?-cut", r"director'?s?-cut-edition", r"edition-director'?s?-cut", 'DC',
|
||||
value="Director's Cut")
|
||||
rebulk.regex('extended', 'extended-?cut', 'extended-?version',
|
||||
value='Extended', tags=['has-neighbor', 'release-group-prefix'])
|
||||
rebulk.regex('alternat(e|ive)(?:-?Cut)?', value='Alternative Cut', tags=['has-neighbor', 'release-group-prefix'])
|
||||
for value in ('Remastered', 'Uncensored', 'Uncut', 'Unrated'):
|
||||
rebulk.string(value, value=value, tags=['has-neighbor', 'release-group-prefix'])
|
||||
rebulk.string('Festival', value='Festival', tags=['has-neighbor-before', 'has-neighbor-after'])
|
||||
rebulk.regex('imax', 'imax-edition', value='IMAX')
|
||||
rebulk.regex('fan-edit(?:ion)?', 'fan-collection', value='Fan')
|
||||
rebulk.regex('ultimate-edition', value='Ultimate')
|
||||
rebulk.regex("ultimate-collector'?s?-edition", value=['Ultimate', 'Collector'])
|
||||
rebulk.regex('ultimate-fan-edit(?:ion)?', 'ultimate-fan-collection', value=['Ultimate', 'Fan'])
|
||||
|
||||
return rebulk
|
||||
300
lib/guessit/rules/properties/episode_title.py
Normal file
300
lib/guessit/rules/properties/episode_title.py
Normal file
@@ -0,0 +1,300 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Episode title
|
||||
"""
|
||||
from collections import defaultdict
|
||||
|
||||
from rebulk import Rebulk, Rule, AppendMatch, RemoveMatch, RenameMatch, POST_PROCESS
|
||||
|
||||
from ..common import seps, title_seps
|
||||
from ..common.formatters import cleanup
|
||||
from ..common.pattern import is_disabled
|
||||
from ..common.validators import or_
|
||||
from ..properties.title import TitleFromPosition, TitleBaseRule
|
||||
from ..properties.type import TypeProcessor
|
||||
|
||||
|
||||
def episode_title(config): # pylint:disable=unused-argument
|
||||
"""
|
||||
Builder for rebulk object.
|
||||
|
||||
:param config: rule configuration
|
||||
:type config: dict
|
||||
:return: Created Rebulk object
|
||||
:rtype: Rebulk
|
||||
"""
|
||||
previous_names = ('episode', 'episode_count',
|
||||
'season', 'season_count', 'date', 'title', 'year')
|
||||
|
||||
rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'episode_title'))
|
||||
rebulk = rebulk.rules(RemoveConflictsWithEpisodeTitle(previous_names),
|
||||
EpisodeTitleFromPosition(previous_names),
|
||||
AlternativeTitleReplace(previous_names),
|
||||
TitleToEpisodeTitle,
|
||||
Filepart3EpisodeTitle,
|
||||
Filepart2EpisodeTitle,
|
||||
RenameEpisodeTitleWhenMovieType)
|
||||
return rebulk
|
||||
|
||||
|
||||
class RemoveConflictsWithEpisodeTitle(Rule):
|
||||
"""
|
||||
Remove conflicting matches that might lead to wrong episode_title parsing.
|
||||
"""
|
||||
|
||||
priority = 64
|
||||
consequence = RemoveMatch
|
||||
|
||||
def __init__(self, previous_names):
|
||||
super(RemoveConflictsWithEpisodeTitle, self).__init__()
|
||||
self.previous_names = previous_names
|
||||
self.next_names = ('streaming_service', 'screen_size', 'source',
|
||||
'video_codec', 'audio_codec', 'other', 'container')
|
||||
self.affected_if_holes_after = ('part', )
|
||||
self.affected_names = ('part', 'year')
|
||||
|
||||
def when(self, matches, context):
|
||||
to_remove = []
|
||||
for filepart in matches.markers.named('path'):
|
||||
for match in matches.range(filepart.start, filepart.end,
|
||||
predicate=lambda m: m.name in self.affected_names):
|
||||
before = matches.range(filepart.start, match.start, predicate=lambda m: not m.private, index=-1)
|
||||
if not before or before.name not in self.previous_names:
|
||||
continue
|
||||
|
||||
after = matches.range(match.end, filepart.end, predicate=lambda m: not m.private, index=0)
|
||||
if not after or after.name not in self.next_names:
|
||||
continue
|
||||
|
||||
group = matches.markers.at_match(match, predicate=lambda m: m.name == 'group', index=0)
|
||||
|
||||
def has_value_in_same_group(current_match, current_group=group):
|
||||
"""Return true if current match has value and belongs to the current group."""
|
||||
return current_match.value.strip(seps) and (
|
||||
current_group == matches.markers.at_match(current_match,
|
||||
predicate=lambda mm: mm.name == 'group', index=0)
|
||||
)
|
||||
|
||||
holes_before = matches.holes(before.end, match.start, predicate=has_value_in_same_group)
|
||||
holes_after = matches.holes(match.end, after.start, predicate=has_value_in_same_group)
|
||||
|
||||
if not holes_before and not holes_after:
|
||||
continue
|
||||
|
||||
if match.name in self.affected_if_holes_after and not holes_after:
|
||||
continue
|
||||
|
||||
to_remove.append(match)
|
||||
if match.parent:
|
||||
to_remove.append(match.parent)
|
||||
|
||||
return to_remove
|
||||
|
||||
|
||||
class TitleToEpisodeTitle(Rule):
|
||||
"""
|
||||
If multiple different title are found, convert the one following episode number to episode_title.
|
||||
"""
|
||||
dependency = TitleFromPosition
|
||||
|
||||
def when(self, matches, context):
|
||||
titles = matches.named('title')
|
||||
title_groups = defaultdict(list)
|
||||
for title in titles:
|
||||
title_groups[title.value].append(title)
|
||||
|
||||
episode_titles = []
|
||||
if len(title_groups) < 2:
|
||||
return episode_titles
|
||||
|
||||
for title in titles:
|
||||
if matches.previous(title, lambda match: match.name == 'episode'):
|
||||
episode_titles.append(title)
|
||||
|
||||
return episode_titles
|
||||
|
||||
def then(self, matches, when_response, context):
|
||||
for title in when_response:
|
||||
matches.remove(title)
|
||||
title.name = 'episode_title'
|
||||
matches.append(title)
|
||||
|
||||
|
||||
class EpisodeTitleFromPosition(TitleBaseRule):
|
||||
"""
|
||||
Add episode title match in existing matches
|
||||
Must run after TitleFromPosition rule.
|
||||
"""
|
||||
dependency = TitleToEpisodeTitle
|
||||
|
||||
def __init__(self, previous_names):
|
||||
super(EpisodeTitleFromPosition, self).__init__('episode_title', ['title'])
|
||||
self.previous_names = previous_names
|
||||
|
||||
def hole_filter(self, hole, matches):
|
||||
episode = matches.previous(hole,
|
||||
lambda previous: previous.named(*self.previous_names),
|
||||
0)
|
||||
|
||||
crc32 = matches.named('crc32')
|
||||
|
||||
return episode or crc32
|
||||
|
||||
def filepart_filter(self, filepart, matches):
|
||||
# Filepart where title was found.
|
||||
if matches.range(filepart.start, filepart.end, lambda match: match.name == 'title'):
|
||||
return True
|
||||
return False
|
||||
|
||||
def should_remove(self, match, matches, filepart, hole, context):
|
||||
if match.name == 'episode_details':
|
||||
return False
|
||||
return super(EpisodeTitleFromPosition, self).should_remove(match, matches, filepart, hole, context)
|
||||
|
||||
def when(self, matches, context): # pylint:disable=inconsistent-return-statements
|
||||
if matches.named('episode_title'):
|
||||
return
|
||||
return super(EpisodeTitleFromPosition, self).when(matches, context)
|
||||
|
||||
|
||||
class AlternativeTitleReplace(Rule):
|
||||
"""
|
||||
If alternateTitle was found and title is next to episode, season or date, replace it with episode_title.
|
||||
"""
|
||||
dependency = EpisodeTitleFromPosition
|
||||
consequence = RenameMatch
|
||||
|
||||
def __init__(self, previous_names):
|
||||
super(AlternativeTitleReplace, self).__init__()
|
||||
self.previous_names = previous_names
|
||||
|
||||
def when(self, matches, context): # pylint:disable=inconsistent-return-statements
|
||||
if matches.named('episode_title'):
|
||||
return
|
||||
|
||||
alternative_title = matches.range(predicate=lambda match: match.name == 'alternative_title', index=0)
|
||||
if alternative_title:
|
||||
main_title = matches.chain_before(alternative_title.start, seps=seps,
|
||||
predicate=lambda match: 'title' in match.tags, index=0)
|
||||
if main_title:
|
||||
episode = matches.previous(main_title,
|
||||
lambda previous: previous.named(*self.previous_names),
|
||||
0)
|
||||
|
||||
crc32 = matches.named('crc32')
|
||||
|
||||
if episode or crc32:
|
||||
return alternative_title
|
||||
|
||||
def then(self, matches, when_response, context):
|
||||
matches.remove(when_response)
|
||||
when_response.name = 'episode_title'
|
||||
when_response.tags.append('alternative-replaced')
|
||||
matches.append(when_response)
|
||||
|
||||
|
||||
class RenameEpisodeTitleWhenMovieType(Rule):
|
||||
"""
|
||||
Rename episode_title by alternative_title when type is movie.
|
||||
"""
|
||||
priority = POST_PROCESS
|
||||
|
||||
dependency = TypeProcessor
|
||||
consequence = RenameMatch
|
||||
|
||||
def when(self, matches, context): # pylint:disable=inconsistent-return-statements
|
||||
if matches.named('episode_title', lambda m: 'alternative-replaced' not in m.tags) \
|
||||
and not matches.named('type', lambda m: m.value == 'episode'):
|
||||
return matches.named('episode_title')
|
||||
|
||||
def then(self, matches, when_response, context):
|
||||
for match in when_response:
|
||||
matches.remove(match)
|
||||
match.name = 'alternative_title'
|
||||
matches.append(match)
|
||||
|
||||
|
||||
class Filepart3EpisodeTitle(Rule):
|
||||
"""
|
||||
If we have at least 3 filepart structured like this:
|
||||
|
||||
Serie name/SO1/E01-episode_title.mkv
|
||||
AAAAAAAAAA/BBB/CCCCCCCCCCCCCCCCCCCC
|
||||
|
||||
Serie name/SO1/episode_title-E01.mkv
|
||||
AAAAAAAAAA/BBB/CCCCCCCCCCCCCCCCCCCC
|
||||
|
||||
If CCCC contains episode and BBB contains seasonNumber
|
||||
Then title is to be found in AAAA.
|
||||
"""
|
||||
consequence = AppendMatch('title')
|
||||
|
||||
def when(self, matches, context): # pylint:disable=inconsistent-return-statements
|
||||
if matches.tagged('filepart-title'):
|
||||
return
|
||||
|
||||
fileparts = matches.markers.named('path')
|
||||
if len(fileparts) < 3:
|
||||
return
|
||||
|
||||
filename = fileparts[-1]
|
||||
directory = fileparts[-2]
|
||||
subdirectory = fileparts[-3]
|
||||
|
||||
episode_number = matches.range(filename.start, filename.end, lambda match: match.name == 'episode', 0)
|
||||
if episode_number:
|
||||
season = matches.range(directory.start, directory.end, lambda match: match.name == 'season', 0)
|
||||
|
||||
if season:
|
||||
hole = matches.holes(subdirectory.start, subdirectory.end,
|
||||
ignore=or_(lambda match: 'weak-episode' in match.tags, TitleBaseRule.is_ignored),
|
||||
formatter=cleanup, seps=title_seps, predicate=lambda match: match.value,
|
||||
index=0)
|
||||
if hole:
|
||||
return hole
|
||||
|
||||
|
||||
class Filepart2EpisodeTitle(Rule):
|
||||
"""
|
||||
If we have at least 2 filepart structured like this:
|
||||
|
||||
Serie name SO1/E01-episode_title.mkv
|
||||
AAAAAAAAAAAAA/BBBBBBBBBBBBBBBBBBBBB
|
||||
|
||||
If BBBB contains episode and AAA contains a hole followed by seasonNumber
|
||||
then title is to be found in AAAA.
|
||||
|
||||
or
|
||||
|
||||
Serie name/SO1E01-episode_title.mkv
|
||||
AAAAAAAAAA/BBBBBBBBBBBBBBBBBBBBB
|
||||
|
||||
If BBBB contains season and episode and AAA contains a hole
|
||||
then title is to be found in AAAA.
|
||||
"""
|
||||
consequence = AppendMatch('title')
|
||||
|
||||
def when(self, matches, context): # pylint:disable=inconsistent-return-statements
|
||||
if matches.tagged('filepart-title'):
|
||||
return
|
||||
|
||||
fileparts = matches.markers.named('path')
|
||||
if len(fileparts) < 2:
|
||||
return
|
||||
|
||||
filename = fileparts[-1]
|
||||
directory = fileparts[-2]
|
||||
|
||||
episode_number = matches.range(filename.start, filename.end, lambda match: match.name == 'episode', 0)
|
||||
if episode_number:
|
||||
season = (matches.range(directory.start, directory.end, lambda match: match.name == 'season', 0) or
|
||||
matches.range(filename.start, filename.end, lambda match: match.name == 'season', 0))
|
||||
if season:
|
||||
hole = matches.holes(directory.start, directory.end,
|
||||
ignore=or_(lambda match: 'weak-episode' in match.tags, TitleBaseRule.is_ignored),
|
||||
formatter=cleanup, seps=title_seps,
|
||||
predicate=lambda match: match.value, index=0)
|
||||
if hole:
|
||||
hole.tags.append('filepart-title')
|
||||
return hole
|
||||
912
lib/guessit/rules/properties/episodes.py
Normal file
912
lib/guessit/rules/properties/episodes.py
Normal file
@@ -0,0 +1,912 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
episode, season, disc, episode_count, season_count and episode_details properties
|
||||
"""
|
||||
import copy
|
||||
from collections import defaultdict
|
||||
|
||||
from rebulk import Rebulk, RemoveMatch, Rule, AppendMatch, RenameMatch
|
||||
from rebulk.match import Match
|
||||
from rebulk.remodule import re
|
||||
from rebulk.utils import is_iterable
|
||||
|
||||
from guessit.rules import match_processors
|
||||
from guessit.rules.common.numeral import parse_numeral, numeral
|
||||
from .title import TitleFromPosition
|
||||
from ..common import dash, alt_dash, seps, seps_no_fs
|
||||
from ..common.formatters import strip
|
||||
from ..common.pattern import is_disabled
|
||||
from ..common.validators import seps_surround, int_coercable, and_
|
||||
from ...reutils import build_or_pattern
|
||||
|
||||
|
||||
def episodes(config):
|
||||
"""
|
||||
Builder for rebulk object.
|
||||
|
||||
:param config: rule configuration
|
||||
:type config: dict
|
||||
:return: Created Rebulk object
|
||||
:rtype: Rebulk
|
||||
"""
|
||||
|
||||
# pylint: disable=too-many-branches,too-many-statements,too-many-locals
|
||||
def is_season_episode_disabled(context):
|
||||
"""Whether season and episode rules should be enabled."""
|
||||
return is_disabled(context, 'episode') or is_disabled(context, 'season')
|
||||
|
||||
def episodes_season_chain_breaker(matches):
|
||||
"""
|
||||
Break chains if there's more than 100 offset between two neighbor values.
|
||||
:param matches:
|
||||
:type matches:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
eps = matches.named('episode')
|
||||
if len(eps) > 1 and abs(eps[-1].value - eps[-2].value) > episode_max_range:
|
||||
return True
|
||||
|
||||
seasons = matches.named('season')
|
||||
if len(seasons) > 1 and abs(seasons[-1].value - seasons[-2].value) > season_max_range:
|
||||
return True
|
||||
return False
|
||||
|
||||
def season_episode_conflict_solver(match, other):
|
||||
"""
|
||||
Conflict solver for episode/season patterns
|
||||
|
||||
:param match:
|
||||
:param other:
|
||||
:return:
|
||||
"""
|
||||
if match.name != other.name:
|
||||
if match.name == 'episode' and other.name == 'year':
|
||||
return match
|
||||
if match.name in ('season', 'episode'):
|
||||
if other.name in ('video_codec', 'audio_codec', 'container', 'date'):
|
||||
return match
|
||||
if (other.name == 'audio_channels' and 'weak-audio_channels' not in other.tags
|
||||
and not match.initiator.children.named(match.name + 'Marker')) or (
|
||||
other.name == 'screen_size' and not int_coercable(other.raw)):
|
||||
return match
|
||||
if other.name in ('season', 'episode') and match.initiator != other.initiator:
|
||||
if (match.initiator.name in ('weak_episode', 'weak_duplicate')
|
||||
and other.initiator.name in ('weak_episode', 'weak_duplicate')):
|
||||
return '__default__'
|
||||
for current in (match, other):
|
||||
if 'weak-episode' in current.tags or 'x' in current.initiator.raw.lower():
|
||||
return current
|
||||
return '__default__'
|
||||
|
||||
def ordering_validator(match):
|
||||
"""
|
||||
Validator for season list. They should be in natural order to be validated.
|
||||
|
||||
episode/season separated by a weak discrete separator should be consecutive, unless a strong discrete separator
|
||||
or a range separator is present in the chain (1.3&5 is valid, but 1.3-5 is not valid and 1.3.5 is not valid)
|
||||
"""
|
||||
values = match.children.to_dict()
|
||||
if 'season' in values and is_iterable(values['season']):
|
||||
# Season numbers must be in natural order to be validated.
|
||||
if not list(sorted(values['season'])) == values['season']:
|
||||
return False
|
||||
if 'episode' in values and is_iterable(values['episode']):
|
||||
# Season numbers must be in natural order to be validated.
|
||||
if not list(sorted(values['episode'])) == values['episode']:
|
||||
return False
|
||||
|
||||
def is_consecutive(property_name):
|
||||
"""
|
||||
Check if the property season or episode has valid consecutive values.
|
||||
:param property_name:
|
||||
:type property_name:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
previous_match = None
|
||||
valid = True
|
||||
for current_match in match.children.named(property_name):
|
||||
if previous_match:
|
||||
match.children.previous(current_match,
|
||||
lambda m: m.name == property_name + 'Separator')
|
||||
separator = match.children.previous(current_match,
|
||||
lambda m: m.name == property_name + 'Separator', 0)
|
||||
if separator:
|
||||
if separator.raw not in range_separators and separator.raw in weak_discrete_separators:
|
||||
if not 0 < current_match.value - previous_match.value <= max_range_gap + 1:
|
||||
valid = False
|
||||
if separator.raw in strong_discrete_separators:
|
||||
valid = True
|
||||
break
|
||||
previous_match = current_match
|
||||
return valid
|
||||
|
||||
return is_consecutive('episode') and is_consecutive('season')
|
||||
|
||||
def validate_roman(match):
|
||||
"""
|
||||
Validate a roman match if surrounded by separators
|
||||
:param match:
|
||||
:type match:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
if int_coercable(match.raw):
|
||||
return True
|
||||
return seps_surround(match)
|
||||
|
||||
season_words = config['season_words']
|
||||
episode_words = config['episode_words']
|
||||
of_words = config['of_words']
|
||||
all_words = config['all_words']
|
||||
season_markers = config['season_markers']
|
||||
season_ep_markers = config['season_ep_markers']
|
||||
disc_markers = config['disc_markers']
|
||||
episode_markers = config['episode_markers']
|
||||
range_separators = config['range_separators']
|
||||
weak_discrete_separators = list(sep for sep in seps_no_fs if sep not in range_separators)
|
||||
strong_discrete_separators = config['discrete_separators']
|
||||
discrete_separators = strong_discrete_separators + weak_discrete_separators
|
||||
episode_max_range = config['episode_max_range']
|
||||
season_max_range = config['season_max_range']
|
||||
max_range_gap = config['max_range_gap']
|
||||
|
||||
rebulk = Rebulk() \
|
||||
.regex_defaults(flags=re.IGNORECASE) \
|
||||
.string_defaults(ignore_case=True) \
|
||||
.chain_defaults(chain_breaker=episodes_season_chain_breaker) \
|
||||
.defaults(private_names=['episodeSeparator', 'seasonSeparator', 'episodeMarker', 'seasonMarker'],
|
||||
formatter={'season': int, 'episode': int, 'version': int, 'count': int},
|
||||
children=True,
|
||||
private_parent=True,
|
||||
conflict_solver=season_episode_conflict_solver,
|
||||
abbreviations=[alt_dash])
|
||||
|
||||
# S01E02, 01x02, S01S02S03
|
||||
rebulk.chain(
|
||||
tags=['SxxExx'],
|
||||
validate_all=True,
|
||||
validator={'__parent__': and_(seps_surround, ordering_validator)},
|
||||
disabled=is_season_episode_disabled) \
|
||||
.defaults(tags=['SxxExx']) \
|
||||
.regex(build_or_pattern(season_markers, name='seasonMarker') + r'(?P<season>\d+)@?' +
|
||||
build_or_pattern(episode_markers + disc_markers, name='episodeMarker') + r'@?(?P<episode>\d+)')\
|
||||
.repeater('+') \
|
||||
.regex(build_or_pattern(episode_markers + disc_markers + discrete_separators + range_separators,
|
||||
name='episodeSeparator',
|
||||
escape=True) +
|
||||
r'(?P<episode>\d+)').repeater('*')
|
||||
|
||||
rebulk.chain(tags=['SxxExx'],
|
||||
validate_all=True,
|
||||
validator={'__parent__': and_(seps_surround, ordering_validator)},
|
||||
disabled=is_season_episode_disabled) \
|
||||
.defaults(tags=['SxxExx']) \
|
||||
.regex(r'(?P<season>\d+)@?' +
|
||||
build_or_pattern(season_ep_markers, name='episodeMarker') +
|
||||
r'@?(?P<episode>\d+)').repeater('+') \
|
||||
|
||||
rebulk.chain(tags=['SxxExx'],
|
||||
validate_all=True,
|
||||
validator={'__parent__': and_(seps_surround, ordering_validator)},
|
||||
disabled=is_season_episode_disabled) \
|
||||
.defaults(tags=['SxxExx']) \
|
||||
.regex(r'(?P<season>\d+)@?' +
|
||||
build_or_pattern(season_ep_markers, name='episodeMarker') +
|
||||
r'@?(?P<episode>\d+)') \
|
||||
.regex(build_or_pattern(season_ep_markers + discrete_separators + range_separators,
|
||||
name='episodeSeparator',
|
||||
escape=True) +
|
||||
r'(?P<episode>\d+)').repeater('*')
|
||||
|
||||
rebulk.chain(tags=['SxxExx'],
|
||||
validate_all=True,
|
||||
validator={'__parent__': and_(seps_surround, ordering_validator)},
|
||||
disabled=is_season_episode_disabled) \
|
||||
.defaults(tags=['SxxExx']) \
|
||||
.regex(build_or_pattern(season_markers, name='seasonMarker') + r'(?P<season>\d+)') \
|
||||
.regex('(?P<other>Extras)', name='other', value='Extras', tags=['no-release-group-prefix']).repeater('?') \
|
||||
.regex(build_or_pattern(season_markers + discrete_separators + range_separators,
|
||||
name='seasonSeparator',
|
||||
escape=True) +
|
||||
r'(?P<season>\d+)').repeater('*')
|
||||
|
||||
# episode_details property
|
||||
for episode_detail in ('Special', 'Pilot', 'Unaired', 'Final'):
|
||||
rebulk.string(episode_detail,
|
||||
private_parent=False,
|
||||
children=False,
|
||||
value=episode_detail,
|
||||
name='episode_details',
|
||||
disabled=lambda context: is_disabled(context, 'episode_details'))
|
||||
|
||||
rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator', 'episodeMarker', 'seasonMarker'],
|
||||
validate_all=True,
|
||||
validator={'__parent__': and_(seps_surround, ordering_validator)},
|
||||
children=True,
|
||||
private_parent=True,
|
||||
conflict_solver=season_episode_conflict_solver)
|
||||
|
||||
rebulk.chain(validate_all=True,
|
||||
conflict_solver=season_episode_conflict_solver,
|
||||
formatter={'season': parse_numeral, 'count': parse_numeral},
|
||||
validator={'__parent__': and_(seps_surround, ordering_validator),
|
||||
'season': validate_roman,
|
||||
'count': validate_roman},
|
||||
disabled=lambda context: context.get('type') == 'movie' or is_disabled(context, 'season')) \
|
||||
.defaults(formatter={'season': parse_numeral, 'count': parse_numeral},
|
||||
validator={'season': validate_roman, 'count': validate_roman},
|
||||
conflict_solver=season_episode_conflict_solver) \
|
||||
.regex(build_or_pattern(season_words, name='seasonMarker') + '@?(?P<season>' + numeral + ')') \
|
||||
.regex(r'' + build_or_pattern(of_words) + '@?(?P<count>' + numeral + ')').repeater('?') \
|
||||
.regex(r'@?' + build_or_pattern(range_separators + discrete_separators + ['@'],
|
||||
name='seasonSeparator', escape=True) +
|
||||
r'@?(?P<season>\d+)').repeater('*')
|
||||
|
||||
rebulk.defaults(abbreviations=[dash])
|
||||
|
||||
rebulk.regex(build_or_pattern(episode_words, name='episodeMarker') + r'-?(?P<episode>\d+)' +
|
||||
r'(?:v(?P<version>\d+))?' +
|
||||
r'(?:-?' + build_or_pattern(of_words) + r'-?(?P<count>\d+))?', # Episode 4
|
||||
disabled=lambda context: context.get('type') == 'episode' or is_disabled(context, 'episode'))
|
||||
|
||||
rebulk.regex(build_or_pattern(episode_words, name='episodeMarker') + r'-?(?P<episode>' + numeral + ')' +
|
||||
r'(?:v(?P<version>\d+))?' +
|
||||
r'(?:-?' + build_or_pattern(of_words) + r'-?(?P<count>\d+))?', # Episode 4
|
||||
validator={'episode': validate_roman},
|
||||
formatter={'episode': parse_numeral},
|
||||
disabled=lambda context: context.get('type') != 'episode' or is_disabled(context, 'episode'))
|
||||
|
||||
rebulk.regex(r'S?(?P<season>\d+)-?(?:xE|Ex|E|x)-?(?P<other>' + build_or_pattern(all_words) + ')',
|
||||
tags=['SxxExx'],
|
||||
formatter={'other': lambda match: 'Complete'},
|
||||
disabled=lambda context: is_disabled(context, 'season'))
|
||||
|
||||
# 12, 13
|
||||
rebulk.chain(tags=['weak-episode'],
|
||||
disabled=lambda context: context.get('type') == 'movie' or is_disabled(context, 'episode')) \
|
||||
.defaults(validator=None, tags=['weak-episode']) \
|
||||
.regex(r'(?P<episode>\d{2})') \
|
||||
.regex(r'v(?P<version>\d+)').repeater('?') \
|
||||
.regex(r'(?P<episodeSeparator>[x-])(?P<episode>\d{2})', abbreviations=None).repeater('*')
|
||||
|
||||
# 012, 013
|
||||
rebulk.chain(tags=['weak-episode'],
|
||||
disabled=lambda context: context.get('type') == 'movie' or is_disabled(context, 'episode')) \
|
||||
.defaults(validator=None, tags=['weak-episode']) \
|
||||
.regex(r'0(?P<episode>\d{1,2})') \
|
||||
.regex(r'v(?P<version>\d+)').repeater('?') \
|
||||
.regex(r'(?P<episodeSeparator>[x-])0(?P<episode>\d{1,2})', abbreviations=None).repeater('*')
|
||||
|
||||
# 112, 113
|
||||
rebulk.chain(tags=['weak-episode'],
|
||||
name='weak_episode',
|
||||
disabled=lambda context: context.get('type') == 'movie' or is_disabled(context, 'episode')) \
|
||||
.defaults(validator=None, tags=['weak-episode'], name='weak_episode') \
|
||||
.regex(r'(?P<episode>\d{3,4})') \
|
||||
.regex(r'v(?P<version>\d+)').repeater('?') \
|
||||
.regex(r'(?P<episodeSeparator>[x-])(?P<episode>\d{3,4})', abbreviations=None).repeater('*')
|
||||
|
||||
# 1, 2, 3
|
||||
rebulk.chain(tags=['weak-episode'],
|
||||
disabled=lambda context: context.get('type') != 'episode' or is_disabled(context, 'episode')) \
|
||||
.defaults(validator=None, tags=['weak-episode']) \
|
||||
.regex(r'(?P<episode>\d)') \
|
||||
.regex(r'v(?P<version>\d+)').repeater('?') \
|
||||
.regex(r'(?P<episodeSeparator>[x-])(?P<episode>\d{1,2})', abbreviations=None).repeater('*')
|
||||
|
||||
# e112, e113, 1e18, 3e19
|
||||
rebulk.chain(disabled=lambda context: is_disabled(context, 'episode')) \
|
||||
.defaults(validator=None) \
|
||||
.regex(r'(?P<season>\d{1,2})?(?P<episodeMarker>e)(?P<episode>\d{1,4})') \
|
||||
.regex(r'v(?P<version>\d+)').repeater('?') \
|
||||
.regex(r'(?P<episodeSeparator>e|x|-)(?P<episode>\d{1,4})', abbreviations=None).repeater('*')
|
||||
|
||||
# ep 112, ep113, ep112, ep113
|
||||
rebulk.chain(disabled=lambda context: is_disabled(context, 'episode')) \
|
||||
.defaults(validator=None) \
|
||||
.regex(r'ep-?(?P<episode>\d{1,4})') \
|
||||
.regex(r'v(?P<version>\d+)').repeater('?') \
|
||||
.regex(r'(?P<episodeSeparator>ep|e|x|-)(?P<episode>\d{1,4})', abbreviations=None).repeater('*')
|
||||
|
||||
# cap 112, cap 112_114
|
||||
rebulk.chain(tags=['see-pattern'],
|
||||
disabled=is_season_episode_disabled) \
|
||||
.defaults(validator=None, tags=['see-pattern']) \
|
||||
.regex(r'(?P<seasonMarker>cap)-?(?P<season>\d{1,2})(?P<episode>\d{2})') \
|
||||
.regex(r'(?P<episodeSeparator>-)(?P<season>\d{1,2})(?P<episode>\d{2})').repeater('?')
|
||||
|
||||
# 102, 0102
|
||||
rebulk.chain(tags=['weak-episode', 'weak-duplicate'],
|
||||
name='weak_duplicate',
|
||||
conflict_solver=season_episode_conflict_solver,
|
||||
disabled=lambda context: (context.get('episode_prefer_number', False) or
|
||||
context.get('type') == 'movie') or is_season_episode_disabled(context)) \
|
||||
.defaults(tags=['weak-episode', 'weak-duplicate'],
|
||||
name='weak_duplicate',
|
||||
validator=None,
|
||||
conflict_solver=season_episode_conflict_solver) \
|
||||
.regex(r'(?P<season>\d{1,2})(?P<episode>\d{2})') \
|
||||
.regex(r'v(?P<version>\d+)').repeater('?') \
|
||||
.regex(r'(?P<episodeSeparator>x|-)(?P<episode>\d{2})', abbreviations=None).repeater('*')
|
||||
|
||||
rebulk.regex(r'v(?P<version>\d+)',
|
||||
formatter=int,
|
||||
disabled=lambda context: is_disabled(context, 'version'))
|
||||
|
||||
rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator'])
|
||||
|
||||
# TODO: List of words
|
||||
# detached of X count (season/episode)
|
||||
rebulk.regex(r'(?P<episode>\d+)-?' + build_or_pattern(of_words) +
|
||||
r'-?(?P<count>\d+)-?' + build_or_pattern(episode_words) + '?',
|
||||
formatter=int,
|
||||
pre_match_processor=match_processors.strip,
|
||||
disabled=lambda context: is_disabled(context, 'episode'))
|
||||
|
||||
rebulk.regex(r'Minisodes?',
|
||||
children=False,
|
||||
private_parent=False,
|
||||
name='episode_format',
|
||||
value="Minisode",
|
||||
disabled=lambda context: is_disabled(context, 'episode_format'))
|
||||
|
||||
rebulk.rules(WeakConflictSolver, RemoveInvalidSeason, RemoveInvalidEpisode,
|
||||
SeePatternRange(range_separators + ['_']),
|
||||
EpisodeNumberSeparatorRange(range_separators),
|
||||
SeasonSeparatorRange(range_separators), RemoveWeakIfMovie, RemoveWeakIfSxxExx, RemoveWeakDuplicate,
|
||||
EpisodeDetailValidator, RemoveDetachedEpisodeNumber, VersionValidator, RemoveWeak(episode_words),
|
||||
RenameToAbsoluteEpisode, CountValidator, EpisodeSingleDigitValidator, RenameToDiscMatch)
|
||||
|
||||
return rebulk
|
||||
|
||||
|
||||
class WeakConflictSolver(Rule):
|
||||
"""
|
||||
Rule to decide whether weak-episode or weak-duplicate matches should be kept.
|
||||
|
||||
If an anime is detected:
|
||||
- weak-duplicate matches should be removed
|
||||
- weak-episode matches should be tagged as anime
|
||||
Otherwise:
|
||||
- weak-episode matches are removed unless they're part of an episode range match.
|
||||
"""
|
||||
priority = 128
|
||||
consequence = [RemoveMatch, AppendMatch]
|
||||
|
||||
def enabled(self, context):
|
||||
return context.get('type') != 'movie'
|
||||
|
||||
@classmethod
|
||||
def is_anime(cls, matches):
|
||||
"""Return True if it seems to be an anime.
|
||||
|
||||
Anime characteristics:
|
||||
- version, crc32 matches
|
||||
- screen_size inside brackets
|
||||
- release_group at start and inside brackets
|
||||
"""
|
||||
if matches.named('version') or matches.named('crc32'):
|
||||
return True
|
||||
|
||||
for group in matches.markers.named('group'):
|
||||
if matches.range(group.start, group.end, predicate=lambda m: m.name == 'screen_size'):
|
||||
return True
|
||||
if matches.markers.starting(group.start, predicate=lambda m: m.name == 'path'):
|
||||
hole = matches.holes(group.start, group.end, index=0)
|
||||
if hole and hole.raw == group.raw:
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
def when(self, matches, context):
|
||||
to_remove = []
|
||||
to_append = []
|
||||
anime_detected = self.is_anime(matches)
|
||||
for filepart in matches.markers.named('path'):
|
||||
weak_matches = matches.range(filepart.start, filepart.end, predicate=(
|
||||
lambda m: m.initiator.name == 'weak_episode'))
|
||||
weak_dup_matches = matches.range(filepart.start, filepart.end, predicate=(
|
||||
lambda m: m.initiator.name == 'weak_duplicate'))
|
||||
if anime_detected:
|
||||
if weak_matches:
|
||||
to_remove.extend(weak_dup_matches)
|
||||
for match in matches.range(filepart.start, filepart.end, predicate=(
|
||||
lambda m: m.name == 'episode' and m.initiator.name != 'weak_duplicate')):
|
||||
episode = copy.copy(match)
|
||||
episode.tags = episode.tags + ['anime']
|
||||
to_append.append(episode)
|
||||
to_remove.append(match)
|
||||
elif weak_dup_matches:
|
||||
episodes_in_range = matches.range(filepart.start, filepart.end, predicate=(
|
||||
lambda m:
|
||||
m.name == 'episode' and m.initiator.name == 'weak_episode'
|
||||
and m.initiator.children.named('episodeSeparator')
|
||||
))
|
||||
if not episodes_in_range and not matches.range(filepart.start, filepart.end,
|
||||
predicate=lambda m: 'SxxExx' in m.tags):
|
||||
to_remove.extend(weak_matches)
|
||||
else:
|
||||
for match in episodes_in_range:
|
||||
episode = copy.copy(match)
|
||||
episode.tags = []
|
||||
to_append.append(episode)
|
||||
to_remove.append(match)
|
||||
|
||||
if to_append:
|
||||
to_remove.extend(weak_dup_matches)
|
||||
|
||||
if to_remove or to_append:
|
||||
return to_remove, to_append
|
||||
return False
|
||||
|
||||
|
||||
class CountValidator(Rule):
|
||||
"""
|
||||
Validate count property and rename it
|
||||
"""
|
||||
priority = 64
|
||||
consequence = [RemoveMatch, RenameMatch('episode_count'), RenameMatch('season_count')]
|
||||
|
||||
properties = {'episode_count': [None], 'season_count': [None]}
|
||||
|
||||
def when(self, matches, context):
|
||||
to_remove = []
|
||||
episode_count = []
|
||||
season_count = []
|
||||
|
||||
for count in matches.named('count'):
|
||||
previous = matches.previous(count, lambda match: match.name in ['episode', 'season'], 0)
|
||||
if previous:
|
||||
if previous.name == 'episode':
|
||||
episode_count.append(count)
|
||||
elif previous.name == 'season':
|
||||
season_count.append(count)
|
||||
else:
|
||||
to_remove.append(count)
|
||||
if to_remove or episode_count or season_count:
|
||||
return to_remove, episode_count, season_count
|
||||
return False
|
||||
|
||||
|
||||
class SeePatternRange(Rule):
|
||||
"""
|
||||
Create matches for episode range for SEE pattern. E.g.: Cap.102_104
|
||||
"""
|
||||
priority = 128
|
||||
consequence = [RemoveMatch, AppendMatch]
|
||||
|
||||
def __init__(self, range_separators):
|
||||
super(SeePatternRange, self).__init__()
|
||||
self.range_separators = range_separators
|
||||
|
||||
def when(self, matches, context):
|
||||
to_remove = []
|
||||
to_append = []
|
||||
|
||||
for separator in matches.tagged('see-pattern', lambda m: m.name == 'episodeSeparator'):
|
||||
previous_match = matches.previous(separator, lambda m: m.name == 'episode' and 'see-pattern' in m.tags, 0)
|
||||
next_match = matches.next(separator, lambda m: m.name == 'season' and 'see-pattern' in m.tags, 0)
|
||||
if not next_match:
|
||||
continue
|
||||
|
||||
next_match = matches.next(next_match, lambda m: m.name == 'episode' and 'see-pattern' in m.tags, 0)
|
||||
if previous_match and next_match and separator.value in self.range_separators:
|
||||
to_remove.append(next_match)
|
||||
|
||||
for episode_number in range(previous_match.value + 1, next_match.value + 1):
|
||||
match = copy.copy(next_match)
|
||||
match.value = episode_number
|
||||
to_append.append(match)
|
||||
|
||||
to_remove.append(separator)
|
||||
|
||||
if to_remove or to_append:
|
||||
return to_remove, to_append
|
||||
return False
|
||||
|
||||
|
||||
class AbstractSeparatorRange(Rule):
|
||||
"""
|
||||
Remove separator matches and create matches for season range.
|
||||
"""
|
||||
priority = 128
|
||||
consequence = [RemoveMatch, AppendMatch]
|
||||
|
||||
def __init__(self, range_separators, property_name):
|
||||
super(AbstractSeparatorRange, self).__init__()
|
||||
self.range_separators = range_separators
|
||||
self.property_name = property_name
|
||||
|
||||
def when(self, matches, context):
|
||||
to_remove = []
|
||||
to_append = []
|
||||
|
||||
for separator in matches.named(self.property_name + 'Separator'):
|
||||
previous_match = matches.previous(separator, lambda m: m.name == self.property_name, 0)
|
||||
next_match = matches.next(separator, lambda m: m.name == self.property_name, 0)
|
||||
initiator = separator.initiator
|
||||
|
||||
if previous_match and next_match and separator.value in self.range_separators:
|
||||
to_remove.append(next_match)
|
||||
for episode_number in range(previous_match.value + 1, next_match.value):
|
||||
match = copy.copy(next_match)
|
||||
match.value = episode_number
|
||||
initiator.children.append(match)
|
||||
to_append.append(match)
|
||||
to_append.append(next_match)
|
||||
to_remove.append(separator)
|
||||
|
||||
previous_match = None
|
||||
for next_match in matches.named(self.property_name):
|
||||
if previous_match:
|
||||
separator = matches.input_string[previous_match.initiator.end:next_match.initiator.start]
|
||||
if separator not in self.range_separators:
|
||||
separator = strip(separator)
|
||||
if separator in self.range_separators:
|
||||
initiator = previous_match.initiator
|
||||
for episode_number in range(previous_match.value + 1, next_match.value):
|
||||
match = copy.copy(next_match)
|
||||
match.value = episode_number
|
||||
initiator.children.append(match)
|
||||
to_append.append(match)
|
||||
to_append.append(Match(previous_match.end, next_match.start - 1,
|
||||
name=self.property_name + 'Separator',
|
||||
private=True,
|
||||
input_string=matches.input_string))
|
||||
to_remove.append(next_match) # Remove and append match to support proper ordering
|
||||
to_append.append(next_match)
|
||||
|
||||
previous_match = next_match
|
||||
|
||||
if to_remove or to_append:
|
||||
return to_remove, to_append
|
||||
return False
|
||||
|
||||
|
||||
class RenameToAbsoluteEpisode(Rule):
|
||||
"""
|
||||
Rename episode to absolute_episodes.
|
||||
|
||||
Absolute episodes are only used if two groups of episodes are detected:
|
||||
S02E04-06 25-27
|
||||
25-27 S02E04-06
|
||||
2x04-06 25-27
|
||||
28. Anime Name S02E05
|
||||
The matches in the group with higher episode values are renamed to absolute_episode.
|
||||
"""
|
||||
|
||||
consequence = RenameMatch('absolute_episode')
|
||||
|
||||
def when(self, matches, context): # pylint:disable=inconsistent-return-statements
|
||||
initiators = {match.initiator for match in matches.named('episode')
|
||||
if len(match.initiator.children.named('episode')) > 1}
|
||||
if len(initiators) != 2:
|
||||
ret = []
|
||||
for filepart in matches.markers.named('path'):
|
||||
if matches.range(filepart.start + 1, filepart.end, predicate=lambda m: m.name == 'episode'):
|
||||
ret.extend(
|
||||
matches.starting(filepart.start, predicate=lambda m: m.initiator.name == 'weak_episode'))
|
||||
return ret
|
||||
|
||||
initiators = sorted(initiators, key=lambda item: item.end)
|
||||
if not matches.holes(initiators[0].end, initiators[1].start, predicate=lambda m: m.raw.strip(seps)):
|
||||
first_range = matches.named('episode', predicate=lambda m: m.initiator == initiators[0])
|
||||
second_range = matches.named('episode', predicate=lambda m: m.initiator == initiators[1])
|
||||
if len(first_range) == len(second_range):
|
||||
if second_range[0].value > first_range[0].value:
|
||||
return second_range
|
||||
if first_range[0].value > second_range[0].value:
|
||||
return first_range
|
||||
|
||||
|
||||
class EpisodeNumberSeparatorRange(AbstractSeparatorRange):
|
||||
"""
|
||||
Remove separator matches and create matches for episoderNumber range.
|
||||
"""
|
||||
|
||||
def __init__(self, range_separators):
|
||||
super(EpisodeNumberSeparatorRange, self).__init__(range_separators, "episode")
|
||||
|
||||
|
||||
class SeasonSeparatorRange(AbstractSeparatorRange):
|
||||
"""
|
||||
Remove separator matches and create matches for season range.
|
||||
"""
|
||||
|
||||
def __init__(self, range_separators):
|
||||
super(SeasonSeparatorRange, self).__init__(range_separators, "season")
|
||||
|
||||
|
||||
class RemoveWeakIfMovie(Rule):
|
||||
"""
|
||||
Remove weak-episode tagged matches if it seems to be a movie.
|
||||
"""
|
||||
priority = 64
|
||||
consequence = RemoveMatch
|
||||
|
||||
def enabled(self, context):
|
||||
return context.get('type') != 'episode'
|
||||
|
||||
def when(self, matches, context):
|
||||
to_remove = []
|
||||
to_ignore = set()
|
||||
remove = False
|
||||
for filepart in matches.markers.named('path'):
|
||||
year = matches.range(filepart.start, filepart.end, predicate=lambda m: m.name == 'year', index=0)
|
||||
if year:
|
||||
remove = True
|
||||
next_match = matches.range(year.end, filepart.end, predicate=lambda m: m.private, index=0)
|
||||
if (next_match and not matches.holes(year.end, next_match.start, predicate=lambda m: m.raw.strip(seps))
|
||||
and not matches.at_match(next_match, predicate=lambda m: m.name == 'year')):
|
||||
to_ignore.add(next_match.initiator)
|
||||
|
||||
to_ignore.update(matches.range(filepart.start, filepart.end,
|
||||
predicate=lambda m: len(m.children.named('episode')) > 1))
|
||||
|
||||
to_remove.extend(matches.conflicting(year))
|
||||
if remove:
|
||||
to_remove.extend(matches.tagged('weak-episode', predicate=(
|
||||
lambda m: m.initiator not in to_ignore and 'anime' not in m.tags)))
|
||||
|
||||
return to_remove
|
||||
|
||||
|
||||
class RemoveWeak(Rule):
|
||||
"""
|
||||
Remove weak-episode matches which appears after video, source, and audio matches.
|
||||
"""
|
||||
priority = 16
|
||||
consequence = RemoveMatch, AppendMatch
|
||||
|
||||
def __init__(self, episode_words):
|
||||
super(RemoveWeak, self).__init__()
|
||||
self.episode_words = episode_words
|
||||
|
||||
def when(self, matches, context):
|
||||
to_remove = []
|
||||
to_append = []
|
||||
for filepart in matches.markers.named('path'):
|
||||
weaks = matches.range(filepart.start, filepart.end, predicate=lambda m: 'weak-episode' in m.tags)
|
||||
if weaks:
|
||||
weak = weaks[0]
|
||||
previous = matches.previous(weak, predicate=lambda m: m.name in (
|
||||
'audio_codec', 'screen_size', 'streaming_service', 'source', 'video_profile',
|
||||
'audio_channels', 'audio_profile'), index=0)
|
||||
if previous and not matches.holes(
|
||||
previous.end, weak.start, predicate=lambda m: m.raw.strip(seps)):
|
||||
if previous.raw.lower() in self.episode_words:
|
||||
try:
|
||||
episode = copy.copy(weak)
|
||||
episode.name = 'episode'
|
||||
episode.value = int(weak.value)
|
||||
episode.start = previous.start
|
||||
episode.private = False
|
||||
episode.tags = []
|
||||
|
||||
to_append.append(episode)
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
to_remove.extend(weaks)
|
||||
if to_remove or to_append:
|
||||
return to_remove, to_append
|
||||
return False
|
||||
|
||||
|
||||
class RemoveWeakIfSxxExx(Rule):
|
||||
"""
|
||||
Remove weak-episode tagged matches if SxxExx pattern is matched.
|
||||
|
||||
Weak episodes at beginning of filepart are kept.
|
||||
"""
|
||||
priority = 64
|
||||
consequence = RemoveMatch
|
||||
|
||||
def when(self, matches, context):
|
||||
to_remove = []
|
||||
for filepart in matches.markers.named('path'):
|
||||
if matches.range(filepart.start, filepart.end,
|
||||
predicate=lambda m: not m.private and 'SxxExx' in m.tags):
|
||||
for match in matches.range(filepart.start, filepart.end, predicate=lambda m: 'weak-episode' in m.tags):
|
||||
if match.start != filepart.start or match.initiator.name != 'weak_episode':
|
||||
to_remove.append(match)
|
||||
return to_remove
|
||||
|
||||
|
||||
class RemoveInvalidSeason(Rule):
|
||||
"""
|
||||
Remove invalid season matches.
|
||||
"""
|
||||
priority = 64
|
||||
consequence = RemoveMatch
|
||||
|
||||
def when(self, matches, context):
|
||||
to_remove = []
|
||||
for filepart in matches.markers.named('path'):
|
||||
strong_season = matches.range(filepart.start, filepart.end, index=0,
|
||||
predicate=lambda m: m.name == 'season'
|
||||
and not m.private and 'SxxExx' in m.tags)
|
||||
if strong_season:
|
||||
if strong_season.initiator.children.named('episode'):
|
||||
for season in matches.range(strong_season.end, filepart.end,
|
||||
predicate=lambda m: m.name == 'season' and not m.private):
|
||||
# remove weak season or seasons without episode matches
|
||||
if 'SxxExx' not in season.tags or not season.initiator.children.named('episode'):
|
||||
if season.initiator:
|
||||
to_remove.append(season.initiator)
|
||||
to_remove.extend(season.initiator.children)
|
||||
else:
|
||||
to_remove.append(season)
|
||||
|
||||
return to_remove
|
||||
|
||||
|
||||
class RemoveInvalidEpisode(Rule):
|
||||
"""
|
||||
Remove invalid episode matches.
|
||||
"""
|
||||
priority = 64
|
||||
consequence = RemoveMatch
|
||||
|
||||
def when(self, matches, context):
|
||||
to_remove = []
|
||||
for filepart in matches.markers.named('path'):
|
||||
strong_episode = matches.range(filepart.start, filepart.end, index=0,
|
||||
predicate=lambda m: m.name == 'episode'
|
||||
and not m.private and 'SxxExx' in m.tags)
|
||||
if strong_episode:
|
||||
strong_ep_marker = RemoveInvalidEpisode.get_episode_prefix(matches, strong_episode)
|
||||
for episode in matches.range(strong_episode.end, filepart.end,
|
||||
predicate=lambda m: m.name == 'episode' and not m.private):
|
||||
ep_marker = RemoveInvalidEpisode.get_episode_prefix(matches, episode)
|
||||
if strong_ep_marker and ep_marker and strong_ep_marker.value.lower() != ep_marker.value.lower():
|
||||
if episode.initiator:
|
||||
to_remove.append(episode.initiator)
|
||||
to_remove.extend(episode.initiator.children)
|
||||
else:
|
||||
to_remove.append(ep_marker)
|
||||
to_remove.append(episode)
|
||||
|
||||
return to_remove
|
||||
|
||||
@staticmethod
|
||||
def get_episode_prefix(matches, episode):
|
||||
"""
|
||||
Return episode prefix: episodeMarker or episodeSeparator
|
||||
"""
|
||||
return matches.previous(episode, index=0,
|
||||
predicate=lambda m: m.name in ('episodeMarker', 'episodeSeparator'))
|
||||
|
||||
|
||||
class RemoveWeakDuplicate(Rule):
|
||||
"""
|
||||
Remove weak-duplicate tagged matches if duplicate patterns, for example The 100.109
|
||||
"""
|
||||
priority = 64
|
||||
consequence = RemoveMatch
|
||||
|
||||
def when(self, matches, context):
|
||||
to_remove = []
|
||||
for filepart in matches.markers.named('path'):
|
||||
patterns = defaultdict(list)
|
||||
for match in reversed(matches.range(filepart.start, filepart.end,
|
||||
predicate=lambda m: 'weak-duplicate' in m.tags)):
|
||||
if match.pattern in patterns[match.name]:
|
||||
to_remove.append(match)
|
||||
else:
|
||||
patterns[match.name].append(match.pattern)
|
||||
return to_remove
|
||||
|
||||
|
||||
class EpisodeDetailValidator(Rule):
|
||||
"""
|
||||
Validate episode_details if they are detached or next to season or episode.
|
||||
"""
|
||||
priority = 64
|
||||
consequence = RemoveMatch
|
||||
|
||||
def when(self, matches, context):
|
||||
ret = []
|
||||
for detail in matches.named('episode_details'):
|
||||
if not seps_surround(detail) \
|
||||
and not matches.previous(detail, lambda match: match.name in ['season', 'episode']) \
|
||||
and not matches.next(detail, lambda match: match.name in ['season', 'episode']):
|
||||
ret.append(detail)
|
||||
return ret
|
||||
|
||||
|
||||
class RemoveDetachedEpisodeNumber(Rule):
|
||||
"""
|
||||
If multiple episode are found, remove those that are not detached from a range and less than 10.
|
||||
|
||||
Fairy Tail 2 - 16-20, 2 should be removed.
|
||||
"""
|
||||
priority = 64
|
||||
consequence = RemoveMatch
|
||||
dependency = [RemoveWeakIfSxxExx, RemoveWeakDuplicate]
|
||||
|
||||
def when(self, matches, context):
|
||||
ret = []
|
||||
|
||||
episode_numbers = []
|
||||
episode_values = set()
|
||||
for match in matches.named('episode', lambda m: not m.private and 'weak-episode' in m.tags):
|
||||
if match.value not in episode_values:
|
||||
episode_numbers.append(match)
|
||||
episode_values.add(match.value)
|
||||
|
||||
episode_numbers = list(sorted(episode_numbers, key=lambda m: m.value))
|
||||
if len(episode_numbers) > 1 and \
|
||||
episode_numbers[0].value < 10 and \
|
||||
episode_numbers[1].value - episode_numbers[0].value != 1:
|
||||
parent = episode_numbers[0]
|
||||
while parent: # TODO: Add a feature in rebulk to avoid this ...
|
||||
ret.append(parent)
|
||||
parent = parent.parent
|
||||
return ret
|
||||
|
||||
|
||||
class VersionValidator(Rule):
|
||||
"""
|
||||
Validate version if previous match is episode or if surrounded by separators.
|
||||
"""
|
||||
priority = 64
|
||||
dependency = [RemoveWeakIfMovie, RemoveWeakIfSxxExx]
|
||||
consequence = RemoveMatch
|
||||
|
||||
def when(self, matches, context):
|
||||
ret = []
|
||||
for version in matches.named('version'):
|
||||
episode_number = matches.previous(version, lambda match: match.name == 'episode', 0)
|
||||
if not episode_number and not seps_surround(version.initiator):
|
||||
ret.append(version)
|
||||
return ret
|
||||
|
||||
|
||||
class EpisodeSingleDigitValidator(Rule):
|
||||
"""
|
||||
Remove single digit episode when inside a group that doesn't own title.
|
||||
"""
|
||||
dependency = [TitleFromPosition]
|
||||
|
||||
consequence = RemoveMatch
|
||||
|
||||
def when(self, matches, context):
|
||||
ret = []
|
||||
for episode in matches.named('episode', lambda match: len(match.initiator) == 1):
|
||||
group = matches.markers.at_match(episode, lambda marker: marker.name == 'group', index=0)
|
||||
if group:
|
||||
if not matches.range(*group.span, predicate=lambda match: match.name == 'title'):
|
||||
ret.append(episode)
|
||||
return ret
|
||||
|
||||
|
||||
class RenameToDiscMatch(Rule):
|
||||
"""
|
||||
Rename episodes detected with `d` episodeMarkers to `disc`.
|
||||
"""
|
||||
|
||||
consequence = [RenameMatch('disc'), RenameMatch('discMarker'), RemoveMatch]
|
||||
|
||||
def when(self, matches, context):
|
||||
discs = []
|
||||
markers = []
|
||||
to_remove = []
|
||||
|
||||
disc_disabled = is_disabled(context, 'disc')
|
||||
|
||||
for marker in matches.named('episodeMarker', predicate=lambda m: m.value.lower() == 'd'):
|
||||
if disc_disabled:
|
||||
to_remove.append(marker)
|
||||
to_remove.extend(marker.initiator.children)
|
||||
continue
|
||||
|
||||
markers.append(marker)
|
||||
discs.extend(sorted(marker.initiator.children.named('episode'), key=lambda m: m.value))
|
||||
|
||||
if discs or markers or to_remove:
|
||||
return discs, markers, to_remove
|
||||
return False
|
||||
48
lib/guessit/rules/properties/film.py
Normal file
48
lib/guessit/rules/properties/film.py
Normal file
@@ -0,0 +1,48 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
film property
|
||||
"""
|
||||
from rebulk import Rebulk, AppendMatch, Rule
|
||||
from rebulk.remodule import re
|
||||
|
||||
from ..common.formatters import cleanup
|
||||
from ..common.pattern import is_disabled
|
||||
from ..common.validators import seps_surround
|
||||
|
||||
|
||||
def film(config): # pylint:disable=unused-argument
|
||||
"""
|
||||
Builder for rebulk object.
|
||||
:return: Created Rebulk object
|
||||
:rtype: Rebulk
|
||||
"""
|
||||
rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE, validate_all=True, validator={'__parent__': seps_surround})
|
||||
|
||||
rebulk.regex(r'f(\d{1,2})', name='film', private_parent=True, children=True, formatter=int,
|
||||
disabled=lambda context: is_disabled(context, 'film'))
|
||||
|
||||
rebulk.rules(FilmTitleRule)
|
||||
|
||||
return rebulk
|
||||
|
||||
|
||||
class FilmTitleRule(Rule):
|
||||
"""
|
||||
Rule to find out film_title (hole after film property
|
||||
"""
|
||||
consequence = AppendMatch
|
||||
|
||||
properties = {'film_title': [None]}
|
||||
|
||||
def enabled(self, context):
|
||||
return not is_disabled(context, 'film_title')
|
||||
|
||||
def when(self, matches, context): # pylint:disable=inconsistent-return-statements
|
||||
bonus_number = matches.named('film', lambda match: not match.private, index=0)
|
||||
if bonus_number:
|
||||
filepath = matches.markers.at_match(bonus_number, lambda marker: marker.name == 'path', 0)
|
||||
hole = matches.holes(filepath.start, bonus_number.start + 1, formatter=cleanup, index=0)
|
||||
if hole and hole.value:
|
||||
hole.name = 'film_title'
|
||||
return hole
|
||||
510
lib/guessit/rules/properties/language.py
Normal file
510
lib/guessit/rules/properties/language.py
Normal file
@@ -0,0 +1,510 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
language and subtitle_language properties
|
||||
"""
|
||||
# pylint: disable=no-member
|
||||
import copy
|
||||
from collections import defaultdict, namedtuple
|
||||
|
||||
import babelfish
|
||||
from rebulk import Rebulk, Rule, RemoveMatch, RenameMatch
|
||||
from rebulk.remodule import re
|
||||
|
||||
from ..common import seps
|
||||
from ..common.pattern import is_disabled
|
||||
from ..common.words import iter_words
|
||||
from ..common.validators import seps_surround
|
||||
|
||||
|
||||
def language(config, common_words):
|
||||
"""
|
||||
Builder for rebulk object.
|
||||
|
||||
:param config: rule configuration
|
||||
:type config: dict
|
||||
:param common_words: common words
|
||||
:type common_words: set
|
||||
:return: Created Rebulk object
|
||||
:rtype: Rebulk
|
||||
"""
|
||||
subtitle_both = config['subtitle_affixes']
|
||||
subtitle_prefixes = sorted(subtitle_both + config['subtitle_prefixes'], key=length_comparator)
|
||||
subtitle_suffixes = sorted(subtitle_both + config['subtitle_suffixes'], key=length_comparator)
|
||||
lang_both = config['language_affixes']
|
||||
lang_prefixes = sorted(lang_both + config['language_prefixes'], key=length_comparator)
|
||||
lang_suffixes = sorted(lang_both + config['language_suffixes'], key=length_comparator)
|
||||
weak_affixes = frozenset(config['weak_affixes'])
|
||||
|
||||
rebulk = Rebulk(disabled=lambda context: (is_disabled(context, 'language') and
|
||||
is_disabled(context, 'subtitle_language')))
|
||||
|
||||
rebulk.string(*subtitle_prefixes, name="subtitle_language.prefix", ignore_case=True, private=True,
|
||||
validator=seps_surround, tags=['release-group-prefix'],
|
||||
disabled=lambda context: is_disabled(context, 'subtitle_language'))
|
||||
rebulk.string(*subtitle_suffixes, name="subtitle_language.suffix", ignore_case=True, private=True,
|
||||
validator=seps_surround,
|
||||
disabled=lambda context: is_disabled(context, 'subtitle_language'))
|
||||
rebulk.string(*lang_suffixes, name="language.suffix", ignore_case=True, private=True,
|
||||
validator=seps_surround, tags=['source-suffix'],
|
||||
disabled=lambda context: is_disabled(context, 'language'))
|
||||
|
||||
def find_languages(string, context=None):
|
||||
"""Find languages in the string
|
||||
|
||||
:return: list of tuple (property, Language, lang_word, word)
|
||||
"""
|
||||
return LanguageFinder(context, subtitle_prefixes, subtitle_suffixes,
|
||||
lang_prefixes, lang_suffixes, weak_affixes).find(string)
|
||||
|
||||
rebulk.functional(find_languages,
|
||||
properties={'language': [None]},
|
||||
disabled=lambda context: not context.get('allowed_languages'))
|
||||
rebulk.rules(SubtitleExtensionRule,
|
||||
SubtitlePrefixLanguageRule,
|
||||
SubtitleSuffixLanguageRule,
|
||||
RemoveLanguage,
|
||||
RemoveInvalidLanguages(common_words))
|
||||
|
||||
babelfish.language_converters['guessit'] = GuessitConverter(config['synonyms'])
|
||||
|
||||
return rebulk
|
||||
|
||||
|
||||
UNDETERMINED = babelfish.Language('und')
|
||||
MULTIPLE = babelfish.Language('mul')
|
||||
NON_SPECIFIC_LANGUAGES = frozenset([UNDETERMINED, MULTIPLE])
|
||||
|
||||
|
||||
class GuessitConverter(babelfish.LanguageReverseConverter): # pylint: disable=missing-docstring
|
||||
_with_country_regexp = re.compile(r'(.*)\((.*)\)')
|
||||
_with_country_regexp2 = re.compile(r'(.*)-(.*)')
|
||||
|
||||
def __init__(self, synonyms):
|
||||
self.guessit_exceptions = {}
|
||||
for code, synlist in synonyms.items():
|
||||
if '_' in code:
|
||||
(alpha3, country) = code.split('_')
|
||||
else:
|
||||
(alpha3, country) = (code, None)
|
||||
for syn in synlist:
|
||||
self.guessit_exceptions[syn.lower()] = (alpha3, country, None)
|
||||
|
||||
@property
|
||||
def codes(self): # pylint: disable=missing-docstring
|
||||
return (babelfish.language_converters['alpha3b'].codes |
|
||||
babelfish.language_converters['alpha2'].codes |
|
||||
babelfish.language_converters['name'].codes |
|
||||
babelfish.language_converters['opensubtitles'].codes |
|
||||
babelfish.country_converters['name'].codes |
|
||||
frozenset(self.guessit_exceptions.keys()))
|
||||
|
||||
def convert(self, alpha3, country=None, script=None):
|
||||
return str(babelfish.Language(alpha3, country, script))
|
||||
|
||||
def reverse(self, name): # pylint:disable=arguments-differ
|
||||
name = name.lower()
|
||||
# exceptions come first, as they need to override a potential match
|
||||
# with any of the other guessers
|
||||
try:
|
||||
return self.guessit_exceptions[name]
|
||||
except KeyError:
|
||||
pass
|
||||
|
||||
for conv in [babelfish.Language,
|
||||
babelfish.Language.fromalpha3b,
|
||||
babelfish.Language.fromalpha2,
|
||||
babelfish.Language.fromname,
|
||||
babelfish.Language.fromopensubtitles,
|
||||
babelfish.Language.fromietf]:
|
||||
try:
|
||||
reverse = conv(name)
|
||||
return reverse.alpha3, reverse.country, reverse.script
|
||||
except (ValueError, babelfish.LanguageReverseError):
|
||||
pass
|
||||
|
||||
raise babelfish.LanguageReverseError(name)
|
||||
|
||||
|
||||
def length_comparator(value):
|
||||
"""
|
||||
Return value length.
|
||||
"""
|
||||
return len(value)
|
||||
|
||||
|
||||
_LanguageMatch = namedtuple('_LanguageMatch', ['property_name', 'word', 'lang'])
|
||||
|
||||
|
||||
class LanguageWord(object):
|
||||
"""
|
||||
Extension to the Word namedtuple in order to create compound words.
|
||||
|
||||
E.g.: pt-BR, soft subtitles, custom subs
|
||||
"""
|
||||
|
||||
def __init__(self, start, end, value, input_string, next_word=None):
|
||||
self.start = start
|
||||
self.end = end
|
||||
self.value = value
|
||||
self.input_string = input_string
|
||||
self.next_word = next_word
|
||||
|
||||
@property
|
||||
def extended_word(self): # pylint:disable=inconsistent-return-statements
|
||||
"""
|
||||
Return the extended word for this instance, if any.
|
||||
"""
|
||||
if self.next_word:
|
||||
separator = self.input_string[self.end:self.next_word.start]
|
||||
next_separator = self.input_string[self.next_word.end:self.next_word.end + 1]
|
||||
|
||||
if (separator == '-' and separator != next_separator) or separator in (' ', '.'):
|
||||
value = self.input_string[self.start:self.next_word.end].replace('.', ' ')
|
||||
|
||||
return LanguageWord(self.start, self.next_word.end, value, self.input_string, self.next_word.next_word)
|
||||
|
||||
def __repr__(self):
|
||||
return '<({start},{end}): {value}'.format(start=self.start, end=self.end, value=self.value)
|
||||
|
||||
|
||||
def to_rebulk_match(language_match):
|
||||
"""
|
||||
Convert language match to rebulk Match: start, end, dict
|
||||
"""
|
||||
word = language_match.word
|
||||
start = word.start
|
||||
end = word.end
|
||||
name = language_match.property_name
|
||||
if language_match.lang == UNDETERMINED:
|
||||
return start, end, {
|
||||
'name': name,
|
||||
'value': word.value.lower(),
|
||||
'formatter': babelfish.Language,
|
||||
'tags': ['weak-language']
|
||||
}
|
||||
|
||||
return start, end, {
|
||||
'name': name,
|
||||
'value': language_match.lang
|
||||
}
|
||||
|
||||
|
||||
class LanguageFinder(object):
|
||||
"""
|
||||
Helper class to search and return language matches: 'language' and 'subtitle_language' properties
|
||||
"""
|
||||
|
||||
def __init__(self, context,
|
||||
subtitle_prefixes, subtitle_suffixes,
|
||||
lang_prefixes, lang_suffixes, weak_affixes):
|
||||
allowed_languages = context.get('allowed_languages') if context else None
|
||||
self.allowed_languages = {l.lower() for l in allowed_languages or []}
|
||||
self.weak_affixes = weak_affixes
|
||||
self.prefixes_map = {}
|
||||
self.suffixes_map = {}
|
||||
|
||||
if not is_disabled(context, 'subtitle_language'):
|
||||
self.prefixes_map['subtitle_language'] = subtitle_prefixes
|
||||
self.suffixes_map['subtitle_language'] = subtitle_suffixes
|
||||
|
||||
self.prefixes_map['language'] = lang_prefixes
|
||||
self.suffixes_map['language'] = lang_suffixes
|
||||
|
||||
def find(self, string):
|
||||
"""
|
||||
Return all matches for language and subtitle_language.
|
||||
|
||||
Undetermined language matches are removed if a regular language is found.
|
||||
Multi language matches are removed if there are only undetermined language matches
|
||||
"""
|
||||
regular_lang_map = defaultdict(set)
|
||||
undetermined_map = defaultdict(set)
|
||||
multi_map = defaultdict(set)
|
||||
|
||||
for match in self.iter_language_matches(string):
|
||||
key = match.property_name
|
||||
if match.lang == UNDETERMINED:
|
||||
undetermined_map[key].add(match)
|
||||
elif match.lang == 'mul':
|
||||
multi_map[key].add(match)
|
||||
else:
|
||||
regular_lang_map[key].add(match)
|
||||
|
||||
for key, values in multi_map.items():
|
||||
if key in regular_lang_map or key not in undetermined_map:
|
||||
for value in values:
|
||||
yield to_rebulk_match(value)
|
||||
|
||||
for key, values in undetermined_map.items():
|
||||
if key not in regular_lang_map:
|
||||
for value in values:
|
||||
yield to_rebulk_match(value)
|
||||
|
||||
for values in regular_lang_map.values():
|
||||
for value in values:
|
||||
yield to_rebulk_match(value)
|
||||
|
||||
def iter_language_matches(self, string):
|
||||
"""
|
||||
Return language matches for the given string.
|
||||
"""
|
||||
candidates = []
|
||||
previous = None
|
||||
for word in iter_words(string):
|
||||
language_word = LanguageWord(start=word.span[0], end=word.span[1], value=word.value, input_string=string)
|
||||
if previous:
|
||||
previous.next_word = language_word
|
||||
candidates.append(previous)
|
||||
previous = language_word
|
||||
if previous:
|
||||
candidates.append(previous)
|
||||
|
||||
for candidate in candidates:
|
||||
for match in self.iter_matches_for_candidate(candidate):
|
||||
yield match
|
||||
|
||||
def iter_matches_for_candidate(self, language_word):
|
||||
"""
|
||||
Return language matches for the given candidate word.
|
||||
"""
|
||||
tuples = [
|
||||
(language_word, language_word.next_word,
|
||||
self.prefixes_map,
|
||||
lambda string, prefix: string.startswith(prefix),
|
||||
lambda string, prefix: string[len(prefix):]),
|
||||
(language_word.next_word, language_word,
|
||||
self.suffixes_map,
|
||||
lambda string, suffix: string.endswith(suffix),
|
||||
lambda string, suffix: string[:len(string) - len(suffix)])
|
||||
]
|
||||
|
||||
for word, fallback_word, affixes, is_affix, strip_affix in tuples:
|
||||
if not word:
|
||||
continue
|
||||
|
||||
match = self.find_match_for_word(word, fallback_word, affixes, is_affix, strip_affix)
|
||||
if match:
|
||||
yield match
|
||||
|
||||
match = self.find_language_match_for_word(language_word)
|
||||
if match:
|
||||
yield match
|
||||
|
||||
def find_match_for_word(self, word, fallback_word, affixes, is_affix, strip_affix): # pylint:disable=inconsistent-return-statements
|
||||
"""
|
||||
Return the language match for the given word and affixes.
|
||||
"""
|
||||
for current_word in (word.extended_word, word):
|
||||
if not current_word:
|
||||
continue
|
||||
|
||||
word_lang = current_word.value.lower()
|
||||
|
||||
for key, parts in affixes.items():
|
||||
for part in parts:
|
||||
if not is_affix(word_lang, part):
|
||||
continue
|
||||
|
||||
match = None
|
||||
value = strip_affix(word_lang, part)
|
||||
if not value:
|
||||
if fallback_word and (
|
||||
abs(fallback_word.start - word.end) <= 1 or abs(word.start - fallback_word.end) <= 1):
|
||||
match = self.find_language_match_for_word(fallback_word, key=key)
|
||||
|
||||
if not match and part not in self.weak_affixes:
|
||||
match = self.create_language_match(key, LanguageWord(current_word.start, current_word.end,
|
||||
'und', current_word.input_string))
|
||||
else:
|
||||
match = self.create_language_match(key, LanguageWord(current_word.start, current_word.end,
|
||||
value, current_word.input_string))
|
||||
|
||||
if match:
|
||||
return match
|
||||
|
||||
def find_language_match_for_word(self, word, key='language'): # pylint:disable=inconsistent-return-statements
|
||||
"""
|
||||
Return the language match for the given word.
|
||||
"""
|
||||
for current_word in (word.extended_word, word):
|
||||
if current_word:
|
||||
match = self.create_language_match(key, current_word)
|
||||
if match:
|
||||
return match
|
||||
|
||||
def create_language_match(self, key, word): # pylint:disable=inconsistent-return-statements
|
||||
"""
|
||||
Create a LanguageMatch for a given word
|
||||
"""
|
||||
lang = self.parse_language(word.value.lower())
|
||||
|
||||
if lang is not None:
|
||||
return _LanguageMatch(property_name=key, word=word, lang=lang)
|
||||
|
||||
def parse_language(self, lang_word): # pylint:disable=inconsistent-return-statements
|
||||
"""
|
||||
Parse the lang_word into a valid Language.
|
||||
|
||||
Multi and Undetermined languages are also valid languages.
|
||||
"""
|
||||
try:
|
||||
lang = babelfish.Language.fromguessit(lang_word)
|
||||
if ((hasattr(lang, 'name') and lang.name.lower() in self.allowed_languages) or
|
||||
(hasattr(lang, 'alpha2') and lang.alpha2.lower() in self.allowed_languages) or
|
||||
lang.alpha3.lower() in self.allowed_languages):
|
||||
return lang
|
||||
|
||||
except babelfish.Error:
|
||||
pass
|
||||
|
||||
|
||||
class SubtitlePrefixLanguageRule(Rule):
|
||||
"""
|
||||
Convert language guess as subtitle_language if previous match is a subtitle language prefix
|
||||
"""
|
||||
consequence = RemoveMatch
|
||||
|
||||
properties = {'subtitle_language': [None]}
|
||||
|
||||
def enabled(self, context):
|
||||
return not is_disabled(context, 'subtitle_language')
|
||||
|
||||
def when(self, matches, context):
|
||||
to_rename = []
|
||||
to_remove = matches.named('subtitle_language.prefix')
|
||||
for lang in matches.named('language'):
|
||||
prefix = matches.previous(lang, lambda match: match.name == 'subtitle_language.prefix', 0)
|
||||
if not prefix:
|
||||
group_marker = matches.markers.at_match(lang, lambda marker: marker.name == 'group', 0)
|
||||
if group_marker:
|
||||
# Find prefix if placed just before the group
|
||||
prefix = matches.previous(group_marker, lambda match: match.name == 'subtitle_language.prefix',
|
||||
0)
|
||||
if not prefix:
|
||||
# Find prefix if placed before in the group
|
||||
prefix = matches.range(group_marker.start, lang.start,
|
||||
lambda match: match.name == 'subtitle_language.prefix', 0)
|
||||
if prefix:
|
||||
to_rename.append((prefix, lang))
|
||||
to_remove.extend(matches.conflicting(lang))
|
||||
if prefix in to_remove:
|
||||
to_remove.remove(prefix)
|
||||
if to_rename or to_remove:
|
||||
return to_rename, to_remove
|
||||
return False
|
||||
|
||||
def then(self, matches, when_response, context):
|
||||
to_rename, to_remove = when_response
|
||||
super(SubtitlePrefixLanguageRule, self).then(matches, to_remove, context)
|
||||
for prefix, match in to_rename:
|
||||
# Remove suffix equivalent of prefix.
|
||||
suffix = copy.copy(prefix)
|
||||
suffix.name = 'subtitle_language.suffix'
|
||||
if suffix in matches:
|
||||
matches.remove(suffix)
|
||||
matches.remove(match)
|
||||
match.name = 'subtitle_language'
|
||||
matches.append(match)
|
||||
|
||||
|
||||
class SubtitleSuffixLanguageRule(Rule):
|
||||
"""
|
||||
Convert language guess as subtitle_language if next match is a subtitle language suffix
|
||||
"""
|
||||
dependency = SubtitlePrefixLanguageRule
|
||||
consequence = RemoveMatch
|
||||
|
||||
properties = {'subtitle_language': [None]}
|
||||
|
||||
def enabled(self, context):
|
||||
return not is_disabled(context, 'subtitle_language')
|
||||
|
||||
def when(self, matches, context):
|
||||
to_append = []
|
||||
to_remove = matches.named('subtitle_language.suffix')
|
||||
for lang in matches.named('language'):
|
||||
suffix = matches.next(lang, lambda match: match.name == 'subtitle_language.suffix', 0)
|
||||
if suffix:
|
||||
to_append.append(lang)
|
||||
if suffix in to_remove:
|
||||
to_remove.remove(suffix)
|
||||
if to_append or to_remove:
|
||||
return to_append, to_remove
|
||||
return False
|
||||
|
||||
def then(self, matches, when_response, context):
|
||||
to_rename, to_remove = when_response
|
||||
super(SubtitleSuffixLanguageRule, self).then(matches, to_remove, context)
|
||||
for match in to_rename:
|
||||
matches.remove(match)
|
||||
match.name = 'subtitle_language'
|
||||
matches.append(match)
|
||||
|
||||
|
||||
class SubtitleExtensionRule(Rule):
|
||||
"""
|
||||
Convert language guess as subtitle_language if next match is a subtitle extension.
|
||||
|
||||
Since it's a strong match, it also removes any conflicting source with it.
|
||||
"""
|
||||
consequence = [RemoveMatch, RenameMatch('subtitle_language')]
|
||||
|
||||
properties = {'subtitle_language': [None]}
|
||||
|
||||
def enabled(self, context):
|
||||
return not is_disabled(context, 'subtitle_language')
|
||||
|
||||
def when(self, matches, context): # pylint:disable=inconsistent-return-statements
|
||||
subtitle_extension = matches.named('container',
|
||||
lambda match: 'extension' in match.tags and 'subtitle' in match.tags,
|
||||
0)
|
||||
if subtitle_extension:
|
||||
subtitle_lang = matches.previous(subtitle_extension, lambda match: match.name == 'language', 0)
|
||||
if subtitle_lang:
|
||||
for weak in matches.named('subtitle_language', predicate=lambda m: 'weak-language' in m.tags):
|
||||
weak.private = True
|
||||
|
||||
return matches.conflicting(subtitle_lang, lambda m: m.name == 'source'), subtitle_lang
|
||||
|
||||
|
||||
class RemoveLanguage(Rule):
|
||||
"""Remove language matches that were not converted to subtitle_language when language is disabled."""
|
||||
|
||||
consequence = RemoveMatch
|
||||
|
||||
def enabled(self, context):
|
||||
return is_disabled(context, 'language')
|
||||
|
||||
def when(self, matches, context):
|
||||
return matches.named('language')
|
||||
|
||||
|
||||
class RemoveInvalidLanguages(Rule):
|
||||
"""Remove language matches that matches the blacklisted common words."""
|
||||
|
||||
consequence = RemoveMatch
|
||||
priority = 32
|
||||
|
||||
def __init__(self, common_words):
|
||||
"""Constructor."""
|
||||
super(RemoveInvalidLanguages, self).__init__()
|
||||
self.common_words = common_words
|
||||
|
||||
def when(self, matches, context):
|
||||
to_remove = []
|
||||
for match in matches.range(0, len(matches.input_string),
|
||||
predicate=lambda m: m.name in ('language', 'subtitle_language')):
|
||||
if match.raw.lower() not in self.common_words:
|
||||
continue
|
||||
|
||||
group = matches.markers.at_match(match, index=0, predicate=lambda m: m.name == 'group')
|
||||
if group and (
|
||||
not matches.range(
|
||||
group.start, group.end, predicate=lambda m: m.name not in ('language', 'subtitle_language')
|
||||
) and (not matches.holes(group.start, group.end, predicate=lambda m: m.value.strip(seps)))):
|
||||
continue
|
||||
|
||||
to_remove.append(match)
|
||||
|
||||
return to_remove
|
||||
55
lib/guessit/rules/properties/mimetype.py
Normal file
55
lib/guessit/rules/properties/mimetype.py
Normal file
@@ -0,0 +1,55 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
mimetype property
|
||||
"""
|
||||
import mimetypes
|
||||
|
||||
from rebulk import Rebulk, CustomRule, POST_PROCESS
|
||||
from rebulk.match import Match
|
||||
|
||||
from ..common.pattern import is_disabled
|
||||
from ...rules.processors import Processors
|
||||
|
||||
|
||||
def mimetype(config): # pylint:disable=unused-argument
|
||||
"""
|
||||
Builder for rebulk object.
|
||||
|
||||
:param config: rule configuration
|
||||
:type config: dict
|
||||
:return: Created Rebulk object
|
||||
:rtype: Rebulk
|
||||
"""
|
||||
rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'mimetype'))
|
||||
rebulk.rules(Mimetype)
|
||||
|
||||
return rebulk
|
||||
|
||||
|
||||
class Mimetype(CustomRule):
|
||||
"""
|
||||
Mimetype post processor
|
||||
:param matches:
|
||||
:type matches:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
priority = POST_PROCESS
|
||||
|
||||
dependency = Processors
|
||||
|
||||
def when(self, matches, context):
|
||||
mime, _ = mimetypes.guess_type(matches.input_string, strict=False)
|
||||
return mime
|
||||
|
||||
def then(self, matches, when_response, context):
|
||||
mime = when_response
|
||||
matches.append(Match(len(matches.input_string), len(matches.input_string), name='mimetype', value=mime))
|
||||
|
||||
@property
|
||||
def properties(self):
|
||||
"""
|
||||
Properties for this rule.
|
||||
"""
|
||||
return {'mimetype': [None]}
|
||||
383
lib/guessit/rules/properties/other.py
Normal file
383
lib/guessit/rules/properties/other.py
Normal file
@@ -0,0 +1,383 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
other property
|
||||
"""
|
||||
import copy
|
||||
|
||||
from rebulk import Rebulk, Rule, RemoveMatch, RenameMatch, POST_PROCESS, AppendMatch
|
||||
from rebulk.remodule import re
|
||||
|
||||
from ..common import dash
|
||||
from ..common import seps
|
||||
from ..common.pattern import is_disabled
|
||||
from ..common.validators import seps_after, seps_before, seps_surround, and_
|
||||
from ...reutils import build_or_pattern
|
||||
from ...rules.common.formatters import raw_cleanup
|
||||
|
||||
|
||||
def other(config): # pylint:disable=unused-argument,too-many-statements
|
||||
"""
|
||||
Builder for rebulk object.
|
||||
|
||||
:param config: rule configuration
|
||||
:type config: dict
|
||||
:return: Created Rebulk object
|
||||
:rtype: Rebulk
|
||||
"""
|
||||
rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'other'))
|
||||
rebulk = rebulk.regex_defaults(flags=re.IGNORECASE, abbreviations=[dash]).string_defaults(ignore_case=True)
|
||||
rebulk.defaults(name="other", validator=seps_surround)
|
||||
|
||||
rebulk.regex('Audio-?Fix', 'Audio-?Fixed', value='Audio Fixed')
|
||||
rebulk.regex('Sync-?Fix', 'Sync-?Fixed', value='Sync Fixed')
|
||||
rebulk.regex('Dual', 'Dual-?Audio', value='Dual Audio')
|
||||
rebulk.regex('ws', 'wide-?screen', value='Widescreen')
|
||||
rebulk.regex('Re-?Enc(?:oded)?', value='Reencoded')
|
||||
|
||||
rebulk.string('Repack', 'Rerip', value='Proper',
|
||||
tags=['streaming_service.prefix', 'streaming_service.suffix'])
|
||||
rebulk.string('Proper', value='Proper',
|
||||
tags=['has-neighbor', 'streaming_service.prefix', 'streaming_service.suffix'])
|
||||
|
||||
rebulk.regex('Real-Proper', 'Real-Repack', 'Real-Rerip', value='Proper',
|
||||
tags=['streaming_service.prefix', 'streaming_service.suffix', 'real'])
|
||||
rebulk.regex('Real', value='Proper',
|
||||
tags=['has-neighbor', 'streaming_service.prefix', 'streaming_service.suffix', 'real'])
|
||||
|
||||
rebulk.string('Fix', 'Fixed', value='Fix', tags=['has-neighbor-before', 'has-neighbor-after',
|
||||
'streaming_service.prefix', 'streaming_service.suffix'])
|
||||
rebulk.string('Dirfix', 'Nfofix', 'Prooffix', value='Fix',
|
||||
tags=['streaming_service.prefix', 'streaming_service.suffix'])
|
||||
rebulk.regex('(?:Proof-?)?Sample-?Fix', value='Fix',
|
||||
tags=['streaming_service.prefix', 'streaming_service.suffix'])
|
||||
|
||||
rebulk.string('Fansub', value='Fan Subtitled', tags='has-neighbor')
|
||||
rebulk.string('Fastsub', value='Fast Subtitled', tags='has-neighbor')
|
||||
|
||||
season_words = build_or_pattern(["seasons?", "series?"])
|
||||
complete_articles = build_or_pattern(["The"])
|
||||
|
||||
def validate_complete(match):
|
||||
"""
|
||||
Make sure season word is are defined.
|
||||
:param match:
|
||||
:type match:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
children = match.children
|
||||
if not children.named('completeWordsBefore') and not children.named('completeWordsAfter'):
|
||||
return False
|
||||
return True
|
||||
|
||||
rebulk.regex('(?P<completeArticle>' + complete_articles + '-)?' +
|
||||
'(?P<completeWordsBefore>' + season_words + '-)?' +
|
||||
'Complete' + '(?P<completeWordsAfter>-' + season_words + ')?',
|
||||
private_names=['completeArticle', 'completeWordsBefore', 'completeWordsAfter'],
|
||||
value={'other': 'Complete'},
|
||||
tags=['release-group-prefix'],
|
||||
validator={'__parent__': and_(seps_surround, validate_complete)})
|
||||
rebulk.string('R5', value='Region 5')
|
||||
rebulk.string('RC', value='Region C')
|
||||
rebulk.regex('Pre-?Air', value='Preair')
|
||||
rebulk.regex('(?:PS-?)Vita', value='PS Vita')
|
||||
rebulk.regex('Vita', value='PS Vita', tags='has-neighbor')
|
||||
rebulk.regex('(HD)(?P<another>Rip)', value={'other': 'HD', 'another': 'Rip'},
|
||||
private_parent=True, children=True, validator={'__parent__': seps_surround}, validate_all=True)
|
||||
|
||||
for value in ('Screener', 'Remux', 'PAL', 'SECAM', 'NTSC', 'XXX'):
|
||||
rebulk.string(value, value=value)
|
||||
rebulk.string('3D', value='3D', tags='has-neighbor')
|
||||
|
||||
rebulk.string('HQ', value='High Quality', tags='uhdbluray-neighbor')
|
||||
rebulk.string('HR', value='High Resolution')
|
||||
rebulk.string('LD', value='Line Dubbed')
|
||||
rebulk.string('MD', value='Mic Dubbed')
|
||||
rebulk.string('mHD', 'HDLight', value='Micro HD')
|
||||
rebulk.string('LDTV', value='Low Definition')
|
||||
rebulk.string('HFR', value='High Frame Rate')
|
||||
rebulk.string('VFR', value='Variable Frame Rate')
|
||||
rebulk.string('HD', value='HD', validator=None,
|
||||
tags=['streaming_service.prefix', 'streaming_service.suffix'])
|
||||
rebulk.regex('Full-?HD', 'FHD', value='Full HD', validator=None,
|
||||
tags=['streaming_service.prefix', 'streaming_service.suffix'])
|
||||
rebulk.regex('Ultra-?(?:HD)?', 'UHD', value='Ultra HD', validator=None,
|
||||
tags=['streaming_service.prefix', 'streaming_service.suffix'])
|
||||
rebulk.regex('Upscaled?', value='Upscaled')
|
||||
|
||||
for value in ('Complete', 'Classic', 'Bonus', 'Trailer', 'Retail',
|
||||
'Colorized', 'Internal'):
|
||||
rebulk.string(value, value=value, tags=['has-neighbor', 'release-group-prefix'])
|
||||
rebulk.regex('LiNE', value='Line Audio', tags=['has-neighbor-before', 'has-neighbor-after', 'release-group-prefix'])
|
||||
rebulk.regex('Read-?NFO', value='Read NFO')
|
||||
rebulk.string('CONVERT', value='Converted', tags='has-neighbor')
|
||||
rebulk.string('DOCU', 'DOKU', value='Documentary', tags='has-neighbor')
|
||||
rebulk.string('OM', value='Open Matte', tags='has-neighbor')
|
||||
rebulk.string('STV', value='Straight to Video', tags='has-neighbor')
|
||||
rebulk.string('OAR', value='Original Aspect Ratio', tags='has-neighbor')
|
||||
rebulk.string('Complet', value='Complete', tags=['has-neighbor', 'release-group-prefix'])
|
||||
|
||||
for coast in ('East', 'West'):
|
||||
rebulk.regex(r'(?:Live-)?(?:Episode-)?' + coast + '-?(?:Coast-)?Feed', value=coast + ' Coast Feed')
|
||||
|
||||
rebulk.string('VO', 'OV', value='Original Video', tags='has-neighbor')
|
||||
rebulk.string('Ova', 'Oav', value='Original Animated Video')
|
||||
|
||||
rebulk.regex('Scr(?:eener)?', value='Screener', validator=None,
|
||||
tags=['other.validate.screener', 'source-prefix', 'source-suffix'])
|
||||
rebulk.string('Mux', value='Mux', validator=seps_after,
|
||||
tags=['other.validate.mux', 'video-codec-prefix', 'source-suffix'])
|
||||
rebulk.string('HC', 'vost', value='Hardcoded Subtitles')
|
||||
|
||||
rebulk.string('SDR', value='Standard Dynamic Range', tags='uhdbluray-neighbor')
|
||||
rebulk.regex('HDR(?:10)?', value='HDR10', tags='uhdbluray-neighbor')
|
||||
rebulk.regex('Dolby-?Vision', value='Dolby Vision', tags='uhdbluray-neighbor')
|
||||
rebulk.regex('BT-?2020', value='BT.2020', tags='uhdbluray-neighbor')
|
||||
|
||||
rebulk.string('Sample', value='Sample', tags=['at-end', 'not-a-release-group'])
|
||||
rebulk.string('Extras', value='Extras', tags='has-neighbor')
|
||||
rebulk.regex('Digital-?Extras?', value='Extras')
|
||||
rebulk.string('Proof', value='Proof', tags=['at-end', 'not-a-release-group'])
|
||||
rebulk.string('Obfuscated', 'Scrambled', value='Obfuscated', tags=['at-end', 'not-a-release-group'])
|
||||
rebulk.string('xpost', 'postbot', 'asrequested', value='Repost', tags='not-a-release-group')
|
||||
|
||||
rebulk.rules(RenameAnotherToOther, ValidateHasNeighbor, ValidateHasNeighborAfter, ValidateHasNeighborBefore,
|
||||
ValidateScreenerRule, ValidateMuxRule, ValidateHardcodedSubs, ValidateStreamingServiceNeighbor,
|
||||
ValidateAtEnd, ValidateReal, ProperCountRule)
|
||||
|
||||
return rebulk
|
||||
|
||||
|
||||
class ProperCountRule(Rule):
|
||||
"""
|
||||
Add proper_count property
|
||||
"""
|
||||
priority = POST_PROCESS
|
||||
|
||||
consequence = AppendMatch
|
||||
|
||||
properties = {'proper_count': [None]}
|
||||
|
||||
def when(self, matches, context): # pylint:disable=inconsistent-return-statements
|
||||
propers = matches.named('other', lambda match: match.value == 'Proper')
|
||||
if propers:
|
||||
raws = {} # Count distinct raw values
|
||||
for proper in propers:
|
||||
raws[raw_cleanup(proper.raw)] = proper
|
||||
proper_count_match = copy.copy(propers[-1])
|
||||
proper_count_match.name = 'proper_count'
|
||||
|
||||
value = 0
|
||||
for raw in raws.values():
|
||||
value += 2 if 'real' in raw.tags else 1
|
||||
|
||||
proper_count_match.value = value
|
||||
return proper_count_match
|
||||
|
||||
|
||||
class RenameAnotherToOther(Rule):
|
||||
"""
|
||||
Rename `another` properties to `other`
|
||||
"""
|
||||
priority = 32
|
||||
consequence = RenameMatch('other')
|
||||
|
||||
def when(self, matches, context):
|
||||
return matches.named('another')
|
||||
|
||||
|
||||
class ValidateHasNeighbor(Rule):
|
||||
"""
|
||||
Validate tag has-neighbor
|
||||
"""
|
||||
consequence = RemoveMatch
|
||||
priority = 64
|
||||
|
||||
def when(self, matches, context):
|
||||
ret = []
|
||||
for to_check in matches.range(predicate=lambda match: 'has-neighbor' in match.tags):
|
||||
previous_match = matches.previous(to_check, index=0)
|
||||
previous_group = matches.markers.previous(to_check, lambda marker: marker.name == 'group', 0)
|
||||
if previous_group and (not previous_match or previous_group.end > previous_match.end):
|
||||
previous_match = previous_group
|
||||
if previous_match and not matches.input_string[previous_match.end:to_check.start].strip(seps):
|
||||
break
|
||||
next_match = matches.next(to_check, index=0)
|
||||
next_group = matches.markers.next(to_check, lambda marker: marker.name == 'group', 0)
|
||||
if next_group and (not next_match or next_group.start < next_match.start):
|
||||
next_match = next_group
|
||||
if next_match and not matches.input_string[to_check.end:next_match.start].strip(seps):
|
||||
break
|
||||
ret.append(to_check)
|
||||
return ret
|
||||
|
||||
|
||||
class ValidateHasNeighborBefore(Rule):
|
||||
"""
|
||||
Validate tag has-neighbor-before that previous match exists.
|
||||
"""
|
||||
consequence = RemoveMatch
|
||||
priority = 64
|
||||
|
||||
def when(self, matches, context):
|
||||
ret = []
|
||||
for to_check in matches.range(predicate=lambda match: 'has-neighbor-before' in match.tags):
|
||||
next_match = matches.next(to_check, index=0)
|
||||
next_group = matches.markers.next(to_check, lambda marker: marker.name == 'group', 0)
|
||||
if next_group and (not next_match or next_group.start < next_match.start):
|
||||
next_match = next_group
|
||||
if next_match and not matches.input_string[to_check.end:next_match.start].strip(seps):
|
||||
break
|
||||
ret.append(to_check)
|
||||
return ret
|
||||
|
||||
|
||||
class ValidateHasNeighborAfter(Rule):
|
||||
"""
|
||||
Validate tag has-neighbor-after that next match exists.
|
||||
"""
|
||||
consequence = RemoveMatch
|
||||
priority = 64
|
||||
|
||||
def when(self, matches, context):
|
||||
ret = []
|
||||
for to_check in matches.range(predicate=lambda match: 'has-neighbor-after' in match.tags):
|
||||
previous_match = matches.previous(to_check, index=0)
|
||||
previous_group = matches.markers.previous(to_check, lambda marker: marker.name == 'group', 0)
|
||||
if previous_group and (not previous_match or previous_group.end > previous_match.end):
|
||||
previous_match = previous_group
|
||||
if previous_match and not matches.input_string[previous_match.end:to_check.start].strip(seps):
|
||||
break
|
||||
ret.append(to_check)
|
||||
return ret
|
||||
|
||||
|
||||
class ValidateScreenerRule(Rule):
|
||||
"""
|
||||
Validate tag other.validate.screener
|
||||
"""
|
||||
consequence = RemoveMatch
|
||||
priority = 64
|
||||
|
||||
def when(self, matches, context):
|
||||
ret = []
|
||||
for screener in matches.named('other', lambda match: 'other.validate.screener' in match.tags):
|
||||
source_match = matches.previous(screener, lambda match: match.initiator.name == 'source', 0)
|
||||
if not source_match or matches.input_string[source_match.end:screener.start].strip(seps):
|
||||
ret.append(screener)
|
||||
return ret
|
||||
|
||||
|
||||
class ValidateMuxRule(Rule):
|
||||
"""
|
||||
Validate tag other.validate.mux
|
||||
"""
|
||||
consequence = RemoveMatch
|
||||
priority = 64
|
||||
|
||||
def when(self, matches, context):
|
||||
ret = []
|
||||
for mux in matches.named('other', lambda match: 'other.validate.mux' in match.tags):
|
||||
source_match = matches.previous(mux, lambda match: match.initiator.name == 'source', 0)
|
||||
if not source_match:
|
||||
ret.append(mux)
|
||||
return ret
|
||||
|
||||
|
||||
class ValidateHardcodedSubs(Rule):
|
||||
"""Validate HC matches."""
|
||||
|
||||
priority = 32
|
||||
consequence = RemoveMatch
|
||||
|
||||
def when(self, matches, context):
|
||||
to_remove = []
|
||||
for hc_match in matches.named('other', predicate=lambda match: match.value == 'Hardcoded Subtitles'):
|
||||
next_match = matches.next(hc_match, predicate=lambda match: match.name == 'subtitle_language', index=0)
|
||||
if next_match and not matches.holes(hc_match.end, next_match.start,
|
||||
predicate=lambda match: match.value.strip(seps)):
|
||||
continue
|
||||
|
||||
previous_match = matches.previous(hc_match,
|
||||
predicate=lambda match: match.name == 'subtitle_language', index=0)
|
||||
if previous_match and not matches.holes(previous_match.end, hc_match.start,
|
||||
predicate=lambda match: match.value.strip(seps)):
|
||||
continue
|
||||
|
||||
to_remove.append(hc_match)
|
||||
|
||||
return to_remove
|
||||
|
||||
|
||||
class ValidateStreamingServiceNeighbor(Rule):
|
||||
"""Validate streaming service's neighbors."""
|
||||
|
||||
priority = 32
|
||||
consequence = RemoveMatch
|
||||
|
||||
def when(self, matches, context):
|
||||
to_remove = []
|
||||
for match in matches.named('other',
|
||||
predicate=lambda m: (m.initiator.name != 'source'
|
||||
and ('streaming_service.prefix' in m.tags
|
||||
or 'streaming_service.suffix' in m.tags))):
|
||||
match = match.initiator
|
||||
if not seps_after(match):
|
||||
if 'streaming_service.prefix' in match.tags:
|
||||
next_match = matches.next(match, lambda m: m.name == 'streaming_service', 0)
|
||||
if next_match and not matches.holes(match.end, next_match.start,
|
||||
predicate=lambda m: m.value.strip(seps)):
|
||||
continue
|
||||
if match.children:
|
||||
to_remove.extend(match.children)
|
||||
to_remove.append(match)
|
||||
|
||||
elif not seps_before(match):
|
||||
if 'streaming_service.suffix' in match.tags:
|
||||
previous_match = matches.previous(match, lambda m: m.name == 'streaming_service', 0)
|
||||
if previous_match and not matches.holes(previous_match.end, match.start,
|
||||
predicate=lambda m: m.value.strip(seps)):
|
||||
continue
|
||||
|
||||
if match.children:
|
||||
to_remove.extend(match.children)
|
||||
to_remove.append(match)
|
||||
|
||||
return to_remove
|
||||
|
||||
|
||||
class ValidateAtEnd(Rule):
|
||||
"""Validate other which should occur at the end of a filepart."""
|
||||
|
||||
priority = 32
|
||||
consequence = RemoveMatch
|
||||
|
||||
def when(self, matches, context):
|
||||
to_remove = []
|
||||
for filepart in matches.markers.named('path'):
|
||||
for match in matches.range(filepart.start, filepart.end,
|
||||
predicate=lambda m: m.name == 'other' and 'at-end' in m.tags):
|
||||
if (matches.holes(match.end, filepart.end, predicate=lambda m: m.value.strip(seps)) or
|
||||
matches.range(match.end, filepart.end, predicate=lambda m: m.name not in (
|
||||
'other', 'container'))):
|
||||
to_remove.append(match)
|
||||
|
||||
return to_remove
|
||||
|
||||
|
||||
class ValidateReal(Rule):
|
||||
"""
|
||||
Validate Real
|
||||
"""
|
||||
consequence = RemoveMatch
|
||||
priority = 64
|
||||
|
||||
def when(self, matches, context):
|
||||
ret = []
|
||||
for filepart in matches.markers.named('path'):
|
||||
for match in matches.range(filepart.start, filepart.end, lambda m: m.name == 'other' and 'real' in m.tags):
|
||||
if not matches.range(filepart.start, match.start):
|
||||
ret.append(match)
|
||||
|
||||
return ret
|
||||
46
lib/guessit/rules/properties/part.py
Normal file
46
lib/guessit/rules/properties/part.py
Normal file
@@ -0,0 +1,46 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
part property
|
||||
"""
|
||||
from rebulk.remodule import re
|
||||
|
||||
from rebulk import Rebulk
|
||||
from ..common import dash
|
||||
from ..common.pattern import is_disabled
|
||||
from ..common.validators import seps_surround, int_coercable, and_
|
||||
from ..common.numeral import numeral, parse_numeral
|
||||
from ...reutils import build_or_pattern
|
||||
|
||||
|
||||
def part(config): # pylint:disable=unused-argument
|
||||
"""
|
||||
Builder for rebulk object.
|
||||
|
||||
:param config: rule configuration
|
||||
:type config: dict
|
||||
:return: Created Rebulk object
|
||||
:rtype: Rebulk
|
||||
"""
|
||||
rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'part'))
|
||||
rebulk.regex_defaults(flags=re.IGNORECASE, abbreviations=[dash], validator={'__parent__': seps_surround})
|
||||
|
||||
prefixes = config['prefixes']
|
||||
|
||||
def validate_roman(match):
|
||||
"""
|
||||
Validate a roman match if surrounded by separators
|
||||
:param match:
|
||||
:type match:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
if int_coercable(match.raw):
|
||||
return True
|
||||
return seps_surround(match)
|
||||
|
||||
rebulk.regex(build_or_pattern(prefixes) + r'-?(?P<part>' + numeral + r')',
|
||||
prefixes=prefixes, validate_all=True, private_parent=True, children=True, formatter=parse_numeral,
|
||||
validator={'part': and_(validate_roman, lambda m: 0 < m.value < 100)})
|
||||
|
||||
return rebulk
|
||||
347
lib/guessit/rules/properties/release_group.py
Normal file
347
lib/guessit/rules/properties/release_group.py
Normal file
@@ -0,0 +1,347 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
release_group property
|
||||
"""
|
||||
import copy
|
||||
|
||||
from rebulk import Rebulk, Rule, AppendMatch, RemoveMatch
|
||||
from rebulk.match import Match
|
||||
|
||||
from ..common import seps
|
||||
from ..common.comparators import marker_sorted
|
||||
from ..common.expected import build_expected_function
|
||||
from ..common.formatters import cleanup
|
||||
from ..common.pattern import is_disabled
|
||||
from ..common.validators import int_coercable, seps_surround
|
||||
from ..properties.title import TitleFromPosition
|
||||
|
||||
|
||||
def release_group(config):
|
||||
"""
|
||||
Builder for rebulk object.
|
||||
|
||||
:param config: rule configuration
|
||||
:type config: dict
|
||||
:return: Created Rebulk object
|
||||
:rtype: Rebulk
|
||||
"""
|
||||
forbidden_groupnames = config['forbidden_names']
|
||||
|
||||
groupname_ignore_seps = config['ignored_seps']
|
||||
groupname_seps = ''.join([c for c in seps if c not in groupname_ignore_seps])
|
||||
|
||||
def clean_groupname(string):
|
||||
"""
|
||||
Removes and strip separators from input_string
|
||||
:param string:
|
||||
:type string:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
string = string.strip(groupname_seps)
|
||||
if not (string.endswith(tuple(groupname_ignore_seps)) and string.startswith(tuple(groupname_ignore_seps))) \
|
||||
and not any(i in string.strip(groupname_ignore_seps) for i in groupname_ignore_seps):
|
||||
string = string.strip(groupname_ignore_seps)
|
||||
for forbidden in forbidden_groupnames:
|
||||
if string.lower().startswith(forbidden) and string[len(forbidden):len(forbidden) + 1] in seps:
|
||||
string = string[len(forbidden):]
|
||||
string = string.strip(groupname_seps)
|
||||
if string.lower().endswith(forbidden) and string[-len(forbidden) - 1:-len(forbidden)] in seps:
|
||||
string = string[:len(forbidden)]
|
||||
string = string.strip(groupname_seps)
|
||||
return string.strip()
|
||||
|
||||
rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'release_group'))
|
||||
|
||||
expected_group = build_expected_function('expected_group')
|
||||
|
||||
rebulk.functional(expected_group, name='release_group', tags=['expected'],
|
||||
validator=seps_surround,
|
||||
conflict_solver=lambda match, other: other,
|
||||
disabled=lambda context: not context.get('expected_group'))
|
||||
|
||||
return rebulk.rules(
|
||||
DashSeparatedReleaseGroup(clean_groupname),
|
||||
SceneReleaseGroup(clean_groupname),
|
||||
AnimeReleaseGroup
|
||||
)
|
||||
|
||||
|
||||
_scene_previous_names = ('video_codec', 'source', 'video_api', 'audio_codec', 'audio_profile', 'video_profile',
|
||||
'audio_channels', 'screen_size', 'other', 'container', 'language', 'subtitle_language',
|
||||
'subtitle_language.suffix', 'subtitle_language.prefix', 'language.suffix')
|
||||
|
||||
_scene_previous_tags = ('release-group-prefix',)
|
||||
|
||||
_scene_no_previous_tags = ('no-release-group-prefix',)
|
||||
|
||||
|
||||
class DashSeparatedReleaseGroup(Rule):
|
||||
"""
|
||||
Detect dash separated release groups that might appear at the end or at the beginning of a release name.
|
||||
|
||||
Series.S01E02.Pilot.DVDRip.x264-CS.mkv
|
||||
release_group: CS
|
||||
abc-the.title.name.1983.1080p.bluray.x264.mkv
|
||||
release_group: abc
|
||||
|
||||
At the end: Release groups should be dash-separated and shouldn't contain spaces nor
|
||||
appear in a group with other matches. The preceding matches should be separated by dot.
|
||||
If a release group is found, the conflicting matches are removed.
|
||||
|
||||
At the beginning: Release groups should be dash-separated and shouldn't contain spaces nor appear in a group.
|
||||
It should be followed by a hole with dot-separated words.
|
||||
Detection only happens if no matches exist at the beginning.
|
||||
"""
|
||||
consequence = [RemoveMatch, AppendMatch]
|
||||
|
||||
def __init__(self, value_formatter):
|
||||
"""Default constructor."""
|
||||
super(DashSeparatedReleaseGroup, self).__init__()
|
||||
self.value_formatter = value_formatter
|
||||
|
||||
@classmethod
|
||||
def is_valid(cls, matches, candidate, start, end, at_end): # pylint:disable=inconsistent-return-statements
|
||||
"""
|
||||
Whether a candidate is a valid release group.
|
||||
"""
|
||||
if not at_end:
|
||||
if len(candidate.value) <= 1:
|
||||
return False
|
||||
|
||||
if matches.markers.at_match(candidate, predicate=lambda m: m.name == 'group'):
|
||||
return False
|
||||
|
||||
first_hole = matches.holes(candidate.end, end, predicate=lambda m: m.start == candidate.end, index=0)
|
||||
if not first_hole:
|
||||
return False
|
||||
|
||||
raw_value = first_hole.raw
|
||||
return raw_value[0] == '-' and '-' not in raw_value[1:] and '.' in raw_value and ' ' not in raw_value
|
||||
|
||||
group = matches.markers.at_match(candidate, predicate=lambda m: m.name == 'group', index=0)
|
||||
if group and matches.at_match(group, predicate=lambda m: not m.private and m.span != candidate.span):
|
||||
return False
|
||||
|
||||
count = 0
|
||||
match = candidate
|
||||
while match:
|
||||
current = matches.range(start,
|
||||
match.start,
|
||||
index=-1,
|
||||
predicate=lambda m: not m.private and not 'expected' in m.tags)
|
||||
if not current:
|
||||
break
|
||||
|
||||
separator = match.input_string[current.end:match.start]
|
||||
if not separator and match.raw[0] == '-':
|
||||
separator = '-'
|
||||
|
||||
match = current
|
||||
|
||||
if count == 0:
|
||||
if separator != '-':
|
||||
break
|
||||
|
||||
count += 1
|
||||
continue
|
||||
|
||||
if separator == '.':
|
||||
return True
|
||||
|
||||
def detect(self, matches, start, end, at_end): # pylint:disable=inconsistent-return-statements
|
||||
"""
|
||||
Detect release group at the end or at the beginning of a filepart.
|
||||
"""
|
||||
candidate = None
|
||||
if at_end:
|
||||
container = matches.ending(end, lambda m: m.name == 'container', index=0)
|
||||
if container:
|
||||
end = container.start
|
||||
|
||||
candidate = matches.ending(end, index=0, predicate=(
|
||||
lambda m: not m.private and not (
|
||||
m.name == 'other' and 'not-a-release-group' in m.tags
|
||||
) and '-' not in m.raw and m.raw.strip() == m.raw))
|
||||
|
||||
if not candidate:
|
||||
if at_end:
|
||||
candidate = matches.holes(start, end, seps=seps, index=-1,
|
||||
predicate=lambda m: m.end == end and m.raw.strip(seps) and m.raw[0] == '-')
|
||||
else:
|
||||
candidate = matches.holes(start, end, seps=seps, index=0,
|
||||
predicate=lambda m: m.start == start and m.raw.strip(seps))
|
||||
|
||||
if candidate and self.is_valid(matches, candidate, start, end, at_end):
|
||||
return candidate
|
||||
|
||||
def when(self, matches, context): # pylint:disable=inconsistent-return-statements
|
||||
if matches.named('release_group'):
|
||||
return
|
||||
|
||||
to_remove = []
|
||||
to_append = []
|
||||
for filepart in matches.markers.named('path'):
|
||||
candidate = self.detect(matches, filepart.start, filepart.end, True)
|
||||
if candidate:
|
||||
to_remove.extend(matches.at_match(candidate))
|
||||
else:
|
||||
candidate = self.detect(matches, filepart.start, filepart.end, False)
|
||||
|
||||
if candidate:
|
||||
releasegroup = Match(candidate.start, candidate.end, name='release_group',
|
||||
formatter=self.value_formatter, input_string=candidate.input_string)
|
||||
|
||||
if releasegroup.value:
|
||||
to_append.append(releasegroup)
|
||||
if to_remove or to_append:
|
||||
return to_remove, to_append
|
||||
|
||||
|
||||
class SceneReleaseGroup(Rule):
|
||||
"""
|
||||
Add release_group match in existing matches (scene format).
|
||||
|
||||
Something.XViD-ReleaseGroup.mkv
|
||||
"""
|
||||
dependency = [TitleFromPosition]
|
||||
consequence = AppendMatch
|
||||
|
||||
properties = {'release_group': [None]}
|
||||
|
||||
def __init__(self, value_formatter):
|
||||
"""Default constructor."""
|
||||
super(SceneReleaseGroup, self).__init__()
|
||||
self.value_formatter = value_formatter
|
||||
|
||||
@staticmethod
|
||||
def is_previous_match(match):
|
||||
"""
|
||||
Check if match can precede release_group
|
||||
|
||||
:param match:
|
||||
:return:
|
||||
"""
|
||||
return not match.tagged(*_scene_no_previous_tags) if match.name in _scene_previous_names else \
|
||||
match.tagged(*_scene_previous_tags)
|
||||
|
||||
def when(self, matches, context): # pylint:disable=too-many-locals
|
||||
# If a release_group is found before, ignore this kind of release_group rule.
|
||||
|
||||
ret = []
|
||||
|
||||
for filepart in marker_sorted(matches.markers.named('path'), matches):
|
||||
# pylint:disable=cell-var-from-loop
|
||||
start, end = filepart.span
|
||||
if matches.named('release_group', predicate=lambda m: m.start >= start and m.end <= end):
|
||||
continue
|
||||
|
||||
titles = matches.named('title', predicate=lambda m: m.start >= start and m.end <= end)
|
||||
|
||||
def keep_only_first_title(match):
|
||||
"""
|
||||
Keep only first title from this filepart, as other ones are most likely release group.
|
||||
|
||||
:param match:
|
||||
:type match:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
return match in titles[1:]
|
||||
|
||||
last_hole = matches.holes(start, end + 1, formatter=self.value_formatter,
|
||||
ignore=keep_only_first_title,
|
||||
predicate=lambda hole: cleanup(hole.value), index=-1)
|
||||
|
||||
if last_hole:
|
||||
def previous_match_filter(match):
|
||||
"""
|
||||
Filter to apply to find previous match
|
||||
|
||||
:param match:
|
||||
:type match:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
|
||||
if match.start < filepart.start:
|
||||
return False
|
||||
return not match.private or self.is_previous_match(match)
|
||||
|
||||
previous_match = matches.previous(last_hole,
|
||||
previous_match_filter,
|
||||
index=0)
|
||||
if previous_match and (self.is_previous_match(previous_match)) and \
|
||||
not matches.input_string[previous_match.end:last_hole.start].strip(seps) \
|
||||
and not int_coercable(last_hole.value.strip(seps)):
|
||||
|
||||
last_hole.name = 'release_group'
|
||||
last_hole.tags = ['scene']
|
||||
|
||||
# if hole is inside a group marker with same value, remove [](){} ...
|
||||
group = matches.markers.at_match(last_hole, lambda marker: marker.name == 'group', 0)
|
||||
if group:
|
||||
group.formatter = self.value_formatter
|
||||
if group.value == last_hole.value:
|
||||
last_hole.start = group.start + 1
|
||||
last_hole.end = group.end - 1
|
||||
last_hole.tags = ['anime']
|
||||
|
||||
ignored_matches = matches.range(last_hole.start, last_hole.end, keep_only_first_title)
|
||||
|
||||
for ignored_match in ignored_matches:
|
||||
matches.remove(ignored_match)
|
||||
|
||||
ret.append(last_hole)
|
||||
return ret
|
||||
|
||||
|
||||
class AnimeReleaseGroup(Rule):
|
||||
"""
|
||||
Add release_group match in existing matches (anime format)
|
||||
...[ReleaseGroup] Something.mkv
|
||||
"""
|
||||
dependency = [SceneReleaseGroup, TitleFromPosition]
|
||||
consequence = [RemoveMatch, AppendMatch]
|
||||
|
||||
properties = {'release_group': [None]}
|
||||
|
||||
def when(self, matches, context):
|
||||
to_remove = []
|
||||
to_append = []
|
||||
|
||||
# If a release_group is found before, ignore this kind of release_group rule.
|
||||
if matches.named('release_group'):
|
||||
return False
|
||||
|
||||
if not matches.named('episode') and not matches.named('season') and matches.named('release_group'):
|
||||
# This doesn't seems to be an anime, and we already found another release_group.
|
||||
return False
|
||||
|
||||
for filepart in marker_sorted(matches.markers.named('path'), matches):
|
||||
|
||||
# pylint:disable=bad-continuation
|
||||
empty_group = matches.markers.range(filepart.start,
|
||||
filepart.end,
|
||||
lambda marker: (marker.name == 'group'
|
||||
and not matches.range(marker.start, marker.end,
|
||||
lambda m:
|
||||
'weak-language' not in m.tags)
|
||||
and marker.value.strip(seps)
|
||||
and not int_coercable(marker.value.strip(seps))), 0)
|
||||
|
||||
if empty_group:
|
||||
group = copy.copy(empty_group)
|
||||
group.marker = False
|
||||
group.raw_start += 1
|
||||
group.raw_end -= 1
|
||||
group.tags = ['anime']
|
||||
group.name = 'release_group'
|
||||
to_append.append(group)
|
||||
to_remove.extend(matches.range(empty_group.start, empty_group.end,
|
||||
lambda m: 'weak-language' in m.tags))
|
||||
|
||||
if to_remove or to_append:
|
||||
return to_remove, to_append
|
||||
return False
|
||||
163
lib/guessit/rules/properties/screen_size.py
Normal file
163
lib/guessit/rules/properties/screen_size.py
Normal file
@@ -0,0 +1,163 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
screen_size property
|
||||
"""
|
||||
from rebulk.match import Match
|
||||
from rebulk.remodule import re
|
||||
|
||||
from rebulk import Rebulk, Rule, RemoveMatch, AppendMatch
|
||||
|
||||
from ..common.pattern import is_disabled
|
||||
from ..common.quantity import FrameRate
|
||||
from ..common.validators import seps_surround
|
||||
from ..common import dash, seps
|
||||
from ...reutils import build_or_pattern
|
||||
|
||||
|
||||
def screen_size(config):
|
||||
"""
|
||||
Builder for rebulk object.
|
||||
|
||||
:param config: rule configuration
|
||||
:type config: dict
|
||||
:return: Created Rebulk object
|
||||
:rtype: Rebulk
|
||||
"""
|
||||
interlaced = frozenset(config['interlaced'])
|
||||
progressive = frozenset(config['progressive'])
|
||||
frame_rates = [re.escape(rate) for rate in config['frame_rates']]
|
||||
min_ar = config['min_ar']
|
||||
max_ar = config['max_ar']
|
||||
|
||||
rebulk = Rebulk()
|
||||
rebulk = rebulk.string_defaults(ignore_case=True).regex_defaults(flags=re.IGNORECASE)
|
||||
|
||||
rebulk.defaults(name='screen_size', validator=seps_surround, abbreviations=[dash],
|
||||
disabled=lambda context: is_disabled(context, 'screen_size'))
|
||||
|
||||
frame_rate_pattern = build_or_pattern(frame_rates, name='frame_rate')
|
||||
interlaced_pattern = build_or_pattern(interlaced, name='height')
|
||||
progressive_pattern = build_or_pattern(progressive, name='height')
|
||||
|
||||
res_pattern = r'(?:(?P<width>\d{3,4})(?:x|\*))?'
|
||||
rebulk.regex(res_pattern + interlaced_pattern + r'(?P<scan_type>i)' + frame_rate_pattern + '?')
|
||||
rebulk.regex(res_pattern + progressive_pattern + r'(?P<scan_type>p)' + frame_rate_pattern + '?')
|
||||
rebulk.regex(res_pattern + progressive_pattern + r'(?P<scan_type>p)?(?:hd)')
|
||||
rebulk.regex(res_pattern + progressive_pattern + r'(?P<scan_type>p)?x?')
|
||||
rebulk.string('4k', value='2160p')
|
||||
rebulk.regex(r'(?P<width>\d{3,4})-?(?:x|\*)-?(?P<height>\d{3,4})',
|
||||
conflict_solver=lambda match, other: '__default__' if other.name == 'screen_size' else other)
|
||||
|
||||
rebulk.regex(frame_rate_pattern + '(p|fps)', name='frame_rate',
|
||||
formatter=FrameRate.fromstring, disabled=lambda context: is_disabled(context, 'frame_rate'))
|
||||
|
||||
rebulk.rules(PostProcessScreenSize(progressive, min_ar, max_ar), ScreenSizeOnlyOne, ResolveScreenSizeConflicts)
|
||||
|
||||
return rebulk
|
||||
|
||||
|
||||
class PostProcessScreenSize(Rule):
|
||||
"""
|
||||
Process the screen size calculating the aspect ratio if available.
|
||||
|
||||
Convert to a standard notation (720p, 1080p, etc) when it's a standard resolution and
|
||||
aspect ratio is valid or not available.
|
||||
|
||||
It also creates an aspect_ratio match when available.
|
||||
"""
|
||||
consequence = AppendMatch
|
||||
|
||||
def __init__(self, standard_heights, min_ar, max_ar):
|
||||
super(PostProcessScreenSize, self).__init__()
|
||||
self.standard_heights = standard_heights
|
||||
self.min_ar = min_ar
|
||||
self.max_ar = max_ar
|
||||
|
||||
def when(self, matches, context):
|
||||
to_append = []
|
||||
for match in matches.named('screen_size'):
|
||||
if not is_disabled(context, 'frame_rate'):
|
||||
for frame_rate in match.children.named('frame_rate'):
|
||||
frame_rate.formatter = FrameRate.fromstring
|
||||
to_append.append(frame_rate)
|
||||
|
||||
values = match.children.to_dict()
|
||||
if 'height' not in values:
|
||||
continue
|
||||
|
||||
scan_type = (values.get('scan_type') or 'p').lower()
|
||||
height = values['height']
|
||||
if 'width' not in values:
|
||||
match.value = '{0}{1}'.format(height, scan_type)
|
||||
continue
|
||||
|
||||
width = values['width']
|
||||
calculated_ar = float(width) / float(height)
|
||||
|
||||
aspect_ratio = Match(match.start, match.end, input_string=match.input_string,
|
||||
name='aspect_ratio', value=round(calculated_ar, 3))
|
||||
|
||||
if not is_disabled(context, 'aspect_ratio'):
|
||||
to_append.append(aspect_ratio)
|
||||
|
||||
if height in self.standard_heights and self.min_ar < calculated_ar < self.max_ar:
|
||||
match.value = '{0}{1}'.format(height, scan_type)
|
||||
else:
|
||||
match.value = '{0}x{1}'.format(width, height)
|
||||
|
||||
return to_append
|
||||
|
||||
|
||||
class ScreenSizeOnlyOne(Rule):
|
||||
"""
|
||||
Keep a single screen_size per filepath part.
|
||||
"""
|
||||
consequence = RemoveMatch
|
||||
|
||||
def when(self, matches, context):
|
||||
to_remove = []
|
||||
for filepart in matches.markers.named('path'):
|
||||
screensize = list(reversed(matches.range(filepart.start, filepart.end,
|
||||
lambda match: match.name == 'screen_size')))
|
||||
if len(screensize) > 1 and len(set((match.value for match in screensize))) > 1:
|
||||
to_remove.extend(screensize[1:])
|
||||
|
||||
return to_remove
|
||||
|
||||
|
||||
class ResolveScreenSizeConflicts(Rule):
|
||||
"""
|
||||
Resolve screen_size conflicts with season and episode matches.
|
||||
"""
|
||||
consequence = RemoveMatch
|
||||
|
||||
def when(self, matches, context):
|
||||
to_remove = []
|
||||
for filepart in matches.markers.named('path'):
|
||||
screensize = matches.range(filepart.start, filepart.end, lambda match: match.name == 'screen_size', 0)
|
||||
if not screensize:
|
||||
continue
|
||||
|
||||
conflicts = matches.conflicting(screensize, lambda match: match.name in ('season', 'episode'))
|
||||
if not conflicts:
|
||||
continue
|
||||
|
||||
has_neighbor = False
|
||||
video_profile = matches.range(screensize.end, filepart.end, lambda match: match.name == 'video_profile', 0)
|
||||
if video_profile and not matches.holes(screensize.end, video_profile.start,
|
||||
predicate=lambda h: h.value and h.value.strip(seps)):
|
||||
to_remove.extend(conflicts)
|
||||
has_neighbor = True
|
||||
|
||||
previous = matches.previous(screensize, index=0, predicate=(
|
||||
lambda m: m.name in ('date', 'source', 'other', 'streaming_service')))
|
||||
if previous and not matches.holes(previous.end, screensize.start,
|
||||
predicate=lambda h: h.value and h.value.strip(seps)):
|
||||
to_remove.extend(conflicts)
|
||||
has_neighbor = True
|
||||
|
||||
if not has_neighbor:
|
||||
to_remove.append(screensize)
|
||||
|
||||
return to_remove
|
||||
30
lib/guessit/rules/properties/size.py
Normal file
30
lib/guessit/rules/properties/size.py
Normal file
@@ -0,0 +1,30 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
size property
|
||||
"""
|
||||
import re
|
||||
|
||||
from rebulk import Rebulk
|
||||
|
||||
from ..common import dash
|
||||
from ..common.quantity import Size
|
||||
from ..common.pattern import is_disabled
|
||||
from ..common.validators import seps_surround
|
||||
|
||||
|
||||
def size(config): # pylint:disable=unused-argument
|
||||
"""
|
||||
Builder for rebulk object.
|
||||
|
||||
:param config: rule configuration
|
||||
:type config: dict
|
||||
:return: Created Rebulk object
|
||||
:rtype: Rebulk
|
||||
"""
|
||||
rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'size'))
|
||||
rebulk.regex_defaults(flags=re.IGNORECASE, abbreviations=[dash])
|
||||
rebulk.defaults(name='size', validator=seps_surround)
|
||||
rebulk.regex(r'\d+-?[mgt]b', r'\d+\.\d+-?[mgt]b', formatter=Size.fromstring, tags=['release-group-prefix'])
|
||||
|
||||
return rebulk
|
||||
235
lib/guessit/rules/properties/source.py
Normal file
235
lib/guessit/rules/properties/source.py
Normal file
@@ -0,0 +1,235 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
source property
|
||||
"""
|
||||
import copy
|
||||
|
||||
from rebulk.remodule import re
|
||||
|
||||
from rebulk import AppendMatch, Rebulk, RemoveMatch, Rule
|
||||
|
||||
from .audio_codec import HqConflictRule
|
||||
from ..common import dash, seps
|
||||
from ..common.pattern import is_disabled
|
||||
from ..common.validators import seps_before, seps_after, or_
|
||||
|
||||
|
||||
def source(config): # pylint:disable=unused-argument
|
||||
"""
|
||||
Builder for rebulk object.
|
||||
|
||||
:param config: rule configuration
|
||||
:type config: dict
|
||||
:return: Created Rebulk object
|
||||
:rtype: Rebulk
|
||||
"""
|
||||
rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'source'))
|
||||
rebulk = rebulk.regex_defaults(flags=re.IGNORECASE, abbreviations=[dash], private_parent=True, children=True)
|
||||
rebulk = rebulk.defaults(name='source',
|
||||
tags=['video-codec-prefix', 'streaming_service.suffix'],
|
||||
validate_all=True,
|
||||
validator={'__parent__': or_(seps_before, seps_after)})
|
||||
|
||||
rip_prefix = '(?P<other>Rip)-?'
|
||||
rip_suffix = '-?(?P<other>Rip)'
|
||||
rip_optional_suffix = '(?:' + rip_suffix + ')?'
|
||||
|
||||
def build_source_pattern(*patterns, **kwargs):
|
||||
"""Helper pattern to build source pattern."""
|
||||
prefix_format = kwargs.get('prefix') or ''
|
||||
suffix_format = kwargs.get('suffix') or ''
|
||||
|
||||
string_format = prefix_format + '({0})' + suffix_format
|
||||
return [string_format.format(pattern) for pattern in patterns]
|
||||
|
||||
def demote_other(match, other): # pylint: disable=unused-argument
|
||||
"""Default conflict solver with 'other' property."""
|
||||
return other if other.name == 'other' or other.name == 'release_group' else '__default__'
|
||||
|
||||
rebulk.regex(*build_source_pattern('VHS', suffix=rip_optional_suffix),
|
||||
value={'source': 'VHS', 'other': 'Rip'})
|
||||
rebulk.regex(*build_source_pattern('CAM', suffix=rip_optional_suffix),
|
||||
value={'source': 'Camera', 'other': 'Rip'})
|
||||
rebulk.regex(*build_source_pattern('HD-?CAM', suffix=rip_optional_suffix),
|
||||
value={'source': 'HD Camera', 'other': 'Rip'})
|
||||
rebulk.regex(*build_source_pattern('TELESYNC', 'TS', suffix=rip_optional_suffix),
|
||||
value={'source': 'Telesync', 'other': 'Rip'})
|
||||
rebulk.regex(*build_source_pattern('HD-?TELESYNC', 'HD-?TS', suffix=rip_optional_suffix),
|
||||
value={'source': 'HD Telesync', 'other': 'Rip'})
|
||||
rebulk.regex(*build_source_pattern('WORKPRINT', 'WP'), value='Workprint')
|
||||
rebulk.regex(*build_source_pattern('TELECINE', 'TC', suffix=rip_optional_suffix),
|
||||
value={'source': 'Telecine', 'other': 'Rip'})
|
||||
rebulk.regex(*build_source_pattern('HD-?TELECINE', 'HD-?TC', suffix=rip_optional_suffix),
|
||||
value={'source': 'HD Telecine', 'other': 'Rip'})
|
||||
rebulk.regex(*build_source_pattern('PPV', suffix=rip_optional_suffix),
|
||||
value={'source': 'Pay-per-view', 'other': 'Rip'})
|
||||
rebulk.regex(*build_source_pattern('SD-?TV', suffix=rip_optional_suffix),
|
||||
value={'source': 'TV', 'other': 'Rip'})
|
||||
rebulk.regex(*build_source_pattern('TV', suffix=rip_suffix), # TV is too common to allow matching
|
||||
value={'source': 'TV', 'other': 'Rip'})
|
||||
rebulk.regex(*build_source_pattern('TV', 'SD-?TV', prefix=rip_prefix),
|
||||
value={'source': 'TV', 'other': 'Rip'})
|
||||
rebulk.regex(*build_source_pattern('TV-?(?=Dub)'), value='TV')
|
||||
rebulk.regex(*build_source_pattern('DVB', 'PD-?TV', suffix=rip_optional_suffix),
|
||||
value={'source': 'Digital TV', 'other': 'Rip'})
|
||||
rebulk.regex(*build_source_pattern('DVD', suffix=rip_optional_suffix),
|
||||
value={'source': 'DVD', 'other': 'Rip'})
|
||||
rebulk.regex(*build_source_pattern('DM', suffix=rip_optional_suffix),
|
||||
value={'source': 'Digital Master', 'other': 'Rip'})
|
||||
rebulk.regex(*build_source_pattern('VIDEO-?TS', 'DVD-?R(?:$|(?!E))', # 'DVD-?R(?:$|^E)' => DVD-Real ...
|
||||
'DVD-?9', 'DVD-?5'), value='DVD')
|
||||
|
||||
rebulk.regex(*build_source_pattern('HD-?TV', suffix=rip_optional_suffix), conflict_solver=demote_other,
|
||||
value={'source': 'HDTV', 'other': 'Rip'})
|
||||
rebulk.regex(*build_source_pattern('TV-?HD', suffix=rip_suffix), conflict_solver=demote_other,
|
||||
value={'source': 'HDTV', 'other': 'Rip'})
|
||||
rebulk.regex(*build_source_pattern('TV', suffix='-?(?P<other>Rip-?HD)'), conflict_solver=demote_other,
|
||||
value={'source': 'HDTV', 'other': 'Rip'})
|
||||
|
||||
rebulk.regex(*build_source_pattern('VOD', suffix=rip_optional_suffix),
|
||||
value={'source': 'Video on Demand', 'other': 'Rip'})
|
||||
|
||||
rebulk.regex(*build_source_pattern('WEB', 'WEB-?DL', suffix=rip_suffix),
|
||||
value={'source': 'Web', 'other': 'Rip'})
|
||||
# WEBCap is a synonym to WEBRip, mostly used by non english
|
||||
rebulk.regex(*build_source_pattern('WEB-?(?P<another>Cap)', suffix=rip_optional_suffix),
|
||||
value={'source': 'Web', 'other': 'Rip', 'another': 'Rip'})
|
||||
rebulk.regex(*build_source_pattern('WEB-?DL', 'WEB-?U?HD', 'DL-?WEB', 'DL(?=-?Mux)'),
|
||||
value={'source': 'Web'})
|
||||
rebulk.regex('(WEB)', value='Web', tags='weak.source')
|
||||
|
||||
rebulk.regex(*build_source_pattern('HD-?DVD', suffix=rip_optional_suffix),
|
||||
value={'source': 'HD-DVD', 'other': 'Rip'})
|
||||
|
||||
rebulk.regex(*build_source_pattern('Blu-?ray', 'BD', 'BD[59]', 'BD25', 'BD50', suffix=rip_optional_suffix),
|
||||
value={'source': 'Blu-ray', 'other': 'Rip'})
|
||||
rebulk.regex(*build_source_pattern('(?P<another>BR)-?(?=Scr(?:eener)?)', '(?P<another>BR)-?(?=Mux)'), # BRRip
|
||||
value={'source': 'Blu-ray', 'another': 'Reencoded'})
|
||||
rebulk.regex(*build_source_pattern('(?P<another>BR)', suffix=rip_suffix), # BRRip
|
||||
value={'source': 'Blu-ray', 'other': 'Rip', 'another': 'Reencoded'})
|
||||
|
||||
rebulk.regex(*build_source_pattern('Ultra-?Blu-?ray', 'Blu-?ray-?Ultra'), value='Ultra HD Blu-ray')
|
||||
|
||||
rebulk.regex(*build_source_pattern('AHDTV'), value='Analog HDTV')
|
||||
rebulk.regex(*build_source_pattern('UHD-?TV', suffix=rip_optional_suffix), conflict_solver=demote_other,
|
||||
value={'source': 'Ultra HDTV', 'other': 'Rip'})
|
||||
rebulk.regex(*build_source_pattern('UHD', suffix=rip_suffix), conflict_solver=demote_other,
|
||||
value={'source': 'Ultra HDTV', 'other': 'Rip'})
|
||||
|
||||
rebulk.regex(*build_source_pattern('DSR', 'DTH', suffix=rip_optional_suffix),
|
||||
value={'source': 'Satellite', 'other': 'Rip'})
|
||||
rebulk.regex(*build_source_pattern('DSR?', 'SAT', suffix=rip_suffix),
|
||||
value={'source': 'Satellite', 'other': 'Rip'})
|
||||
|
||||
rebulk.rules(ValidateSourcePrefixSuffix, ValidateWeakSource, UltraHdBlurayRule)
|
||||
|
||||
return rebulk
|
||||
|
||||
|
||||
class UltraHdBlurayRule(Rule):
|
||||
"""
|
||||
Replace other:Ultra HD and source:Blu-ray with source:Ultra HD Blu-ray
|
||||
"""
|
||||
dependency = HqConflictRule
|
||||
consequence = [RemoveMatch, AppendMatch]
|
||||
|
||||
@classmethod
|
||||
def find_ultrahd(cls, matches, start, end, index):
|
||||
"""Find Ultra HD match."""
|
||||
return matches.range(start, end, index=index, predicate=(
|
||||
lambda m: not m.private and m.name == 'other' and m.value == 'Ultra HD'
|
||||
))
|
||||
|
||||
@classmethod
|
||||
def validate_range(cls, matches, start, end):
|
||||
"""Validate no holes or invalid matches exist in the specified range."""
|
||||
return (
|
||||
not matches.holes(start, end, predicate=lambda m: m.value.strip(seps)) and
|
||||
not matches.range(start, end, predicate=(
|
||||
lambda m: not m.private and (
|
||||
m.name not in ('screen_size', 'color_depth') and (
|
||||
m.name != 'other' or 'uhdbluray-neighbor' not in m.tags))))
|
||||
)
|
||||
|
||||
def when(self, matches, context):
|
||||
to_remove = []
|
||||
to_append = []
|
||||
for filepart in matches.markers.named('path'):
|
||||
for match in matches.range(filepart.start, filepart.end, predicate=(
|
||||
lambda m: not m.private and m.name == 'source' and m.value == 'Blu-ray')):
|
||||
other = self.find_ultrahd(matches, filepart.start, match.start, -1)
|
||||
if not other or not self.validate_range(matches, other.end, match.start):
|
||||
other = self.find_ultrahd(matches, match.end, filepart.end, 0)
|
||||
if not other or not self.validate_range(matches, match.end, other.start):
|
||||
if not matches.range(filepart.start, filepart.end, predicate=(
|
||||
lambda m: m.name == 'screen_size' and m.value == '2160p')):
|
||||
continue
|
||||
|
||||
if other:
|
||||
other.private = True
|
||||
|
||||
new_source = copy.copy(match)
|
||||
new_source.value = 'Ultra HD Blu-ray'
|
||||
to_remove.append(match)
|
||||
to_append.append(new_source)
|
||||
|
||||
if to_remove or to_append:
|
||||
return to_remove, to_append
|
||||
return False
|
||||
|
||||
|
||||
class ValidateSourcePrefixSuffix(Rule):
|
||||
"""
|
||||
Validate source with source prefix, source suffix.
|
||||
"""
|
||||
priority = 64
|
||||
consequence = RemoveMatch
|
||||
|
||||
def when(self, matches, context):
|
||||
ret = []
|
||||
for filepart in matches.markers.named('path'):
|
||||
for match in matches.range(filepart.start, filepart.end, predicate=lambda m: m.name == 'source'):
|
||||
match = match.initiator
|
||||
if not seps_before(match) and \
|
||||
not matches.range(match.start - 1, match.start - 2,
|
||||
lambda m: 'source-prefix' in m.tags):
|
||||
if match.children:
|
||||
ret.extend(match.children)
|
||||
ret.append(match)
|
||||
continue
|
||||
if not seps_after(match) and \
|
||||
not matches.range(match.end, match.end + 1,
|
||||
lambda m: 'source-suffix' in m.tags):
|
||||
if match.children:
|
||||
ret.extend(match.children)
|
||||
ret.append(match)
|
||||
continue
|
||||
|
||||
return ret
|
||||
|
||||
|
||||
class ValidateWeakSource(Rule):
|
||||
"""
|
||||
Validate weak source
|
||||
"""
|
||||
dependency = [ValidateSourcePrefixSuffix]
|
||||
priority = 64
|
||||
consequence = RemoveMatch
|
||||
|
||||
def when(self, matches, context):
|
||||
ret = []
|
||||
for filepart in matches.markers.named('path'):
|
||||
for match in matches.range(filepart.start, filepart.end, predicate=lambda m: m.name == 'source'):
|
||||
# if there are more than 1 source in this filepart, just before the year and with holes for the title
|
||||
# most likely the source is part of the title
|
||||
if 'weak.source' in match.tags \
|
||||
and matches.range(match.end, filepart.end, predicate=lambda m: m.name == 'source') \
|
||||
and matches.holes(filepart.start, match.start,
|
||||
predicate=lambda m: m.value.strip(seps), index=-1):
|
||||
if match.children:
|
||||
ret.extend(match.children)
|
||||
ret.append(match)
|
||||
continue
|
||||
|
||||
return ret
|
||||
78
lib/guessit/rules/properties/streaming_service.py
Normal file
78
lib/guessit/rules/properties/streaming_service.py
Normal file
@@ -0,0 +1,78 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
streaming_service property
|
||||
"""
|
||||
import re
|
||||
|
||||
from rebulk import Rebulk
|
||||
from rebulk.rules import Rule, RemoveMatch
|
||||
|
||||
from ..common.pattern import is_disabled
|
||||
from ...rules.common import seps, dash
|
||||
from ...rules.common.validators import seps_before, seps_after
|
||||
|
||||
|
||||
def streaming_service(config): # pylint: disable=too-many-statements,unused-argument
|
||||
"""Streaming service property.
|
||||
|
||||
:param config: rule configuration
|
||||
:type config: dict
|
||||
:return:
|
||||
:rtype: Rebulk
|
||||
"""
|
||||
rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'streaming_service'))
|
||||
rebulk = rebulk.string_defaults(ignore_case=True).regex_defaults(flags=re.IGNORECASE, abbreviations=[dash])
|
||||
rebulk.defaults(name='streaming_service', tags=['source-prefix'])
|
||||
|
||||
for value, items in config.items():
|
||||
patterns = items if isinstance(items, list) else [items]
|
||||
for pattern in patterns:
|
||||
if pattern.startswith('re:'):
|
||||
rebulk.regex(pattern, value=value)
|
||||
else:
|
||||
rebulk.string(pattern, value=value)
|
||||
|
||||
rebulk.rules(ValidateStreamingService)
|
||||
|
||||
return rebulk
|
||||
|
||||
|
||||
class ValidateStreamingService(Rule):
|
||||
"""Validate streaming service matches."""
|
||||
|
||||
priority = 128
|
||||
consequence = RemoveMatch
|
||||
|
||||
def when(self, matches, context):
|
||||
"""Streaming service is always before source.
|
||||
|
||||
:param matches:
|
||||
:type matches: rebulk.match.Matches
|
||||
:param context:
|
||||
:type context: dict
|
||||
:return:
|
||||
"""
|
||||
to_remove = []
|
||||
for service in matches.named('streaming_service'):
|
||||
next_match = matches.next(service, lambda match: 'streaming_service.suffix' in match.tags, 0)
|
||||
previous_match = matches.previous(service, lambda match: 'streaming_service.prefix' in match.tags, 0)
|
||||
has_other = service.initiator and service.initiator.children.named('other')
|
||||
|
||||
if not has_other:
|
||||
if (not next_match or
|
||||
matches.holes(service.end, next_match.start,
|
||||
predicate=lambda match: match.value.strip(seps)) or
|
||||
not seps_before(service)):
|
||||
if (not previous_match or
|
||||
matches.holes(previous_match.end, service.start,
|
||||
predicate=lambda match: match.value.strip(seps)) or
|
||||
not seps_after(service)):
|
||||
to_remove.append(service)
|
||||
continue
|
||||
|
||||
if service.value == 'Comedy Central':
|
||||
# Current match is a valid streaming service, removing invalid Criterion Collection (CC) matches
|
||||
to_remove.extend(matches.named('edition', predicate=lambda match: match.value == 'Criterion'))
|
||||
|
||||
return to_remove
|
||||
349
lib/guessit/rules/properties/title.py
Normal file
349
lib/guessit/rules/properties/title.py
Normal file
@@ -0,0 +1,349 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
title property
|
||||
"""
|
||||
|
||||
from rebulk import Rebulk, Rule, AppendMatch, RemoveMatch, AppendTags
|
||||
from rebulk.formatters import formatters
|
||||
|
||||
from .film import FilmTitleRule
|
||||
from .language import (
|
||||
SubtitlePrefixLanguageRule,
|
||||
SubtitleSuffixLanguageRule,
|
||||
SubtitleExtensionRule,
|
||||
NON_SPECIFIC_LANGUAGES
|
||||
)
|
||||
from ..common import seps, title_seps
|
||||
from ..common.comparators import marker_sorted
|
||||
from ..common.expected import build_expected_function
|
||||
from ..common.formatters import cleanup, reorder_title
|
||||
from ..common.pattern import is_disabled
|
||||
from ..common.validators import seps_surround
|
||||
|
||||
|
||||
def title(config): # pylint:disable=unused-argument
|
||||
"""
|
||||
Builder for rebulk object.
|
||||
|
||||
:param config: rule configuration
|
||||
:type config: dict
|
||||
:return: Created Rebulk object
|
||||
:rtype: Rebulk
|
||||
"""
|
||||
rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'title'))
|
||||
rebulk.rules(TitleFromPosition, PreferTitleWithYear)
|
||||
|
||||
expected_title = build_expected_function('expected_title')
|
||||
|
||||
rebulk.functional(expected_title, name='title', tags=['expected', 'title'],
|
||||
validator=seps_surround,
|
||||
formatter=formatters(cleanup, reorder_title),
|
||||
conflict_solver=lambda match, other: other,
|
||||
disabled=lambda context: not context.get('expected_title'))
|
||||
|
||||
return rebulk
|
||||
|
||||
|
||||
class TitleBaseRule(Rule):
|
||||
"""
|
||||
Add title match in existing matches
|
||||
"""
|
||||
# pylint:disable=no-self-use,unused-argument
|
||||
consequence = [AppendMatch, RemoveMatch]
|
||||
|
||||
def __init__(self, match_name, match_tags=None, alternative_match_name=None):
|
||||
super(TitleBaseRule, self).__init__()
|
||||
self.match_name = match_name
|
||||
self.match_tags = match_tags
|
||||
self.alternative_match_name = alternative_match_name
|
||||
|
||||
def hole_filter(self, hole, matches):
|
||||
"""
|
||||
Filter holes for titles.
|
||||
:param hole:
|
||||
:type hole:
|
||||
:param matches:
|
||||
:type matches:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
return True
|
||||
|
||||
def filepart_filter(self, filepart, matches):
|
||||
"""
|
||||
Filter filepart for titles.
|
||||
:param filepart:
|
||||
:type filepart:
|
||||
:param matches:
|
||||
:type matches:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
return True
|
||||
|
||||
def holes_process(self, holes, matches):
|
||||
"""
|
||||
process holes
|
||||
:param holes:
|
||||
:type holes:
|
||||
:param matches:
|
||||
:type matches:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
cropped_holes = []
|
||||
group_markers = matches.markers.named('group')
|
||||
for group_marker in group_markers:
|
||||
path_marker = matches.markers.at_match(group_marker, predicate=lambda m: m.name == 'path', index=0)
|
||||
if path_marker and path_marker.span == group_marker.span:
|
||||
group_markers.remove(group_marker)
|
||||
|
||||
for hole in holes:
|
||||
cropped_holes.extend(hole.crop(group_markers))
|
||||
|
||||
return cropped_holes
|
||||
|
||||
@staticmethod
|
||||
def is_ignored(match):
|
||||
"""
|
||||
Ignore matches when scanning for title (hole).
|
||||
|
||||
Full word language and countries won't be ignored if they are uppercase.
|
||||
"""
|
||||
return not (len(match) > 3 and match.raw.isupper()) and match.name in ('language', 'country', 'episode_details')
|
||||
|
||||
def should_keep(self, match, to_keep, matches, filepart, hole, starting):
|
||||
"""
|
||||
Check if this match should be accepted when ending or starting a hole.
|
||||
:param match:
|
||||
:type match:
|
||||
:param to_keep:
|
||||
:type to_keep: list[Match]
|
||||
:param matches:
|
||||
:type matches: Matches
|
||||
:param hole: the filepart match
|
||||
:type hole: Match
|
||||
:param hole: the hole match
|
||||
:type hole: Match
|
||||
:param starting: true if match is starting the hole
|
||||
:type starting: bool
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
if match.name in ('language', 'country'):
|
||||
# Keep language if exactly matching the hole.
|
||||
if len(hole.value) == len(match.raw):
|
||||
return True
|
||||
|
||||
# Keep language if other languages exists in the filepart.
|
||||
outside_matches = filepart.crop(hole)
|
||||
other_languages = []
|
||||
for outside in outside_matches:
|
||||
other_languages.extend(matches.range(outside.start, outside.end,
|
||||
lambda c_match: c_match.name == match.name and
|
||||
c_match not in to_keep and
|
||||
c_match.value not in NON_SPECIFIC_LANGUAGES))
|
||||
|
||||
if not other_languages and (not starting or len(match.raw) <= 3):
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
def should_remove(self, match, matches, filepart, hole, context):
|
||||
"""
|
||||
Check if this match should be removed after beeing ignored.
|
||||
:param match:
|
||||
:param matches:
|
||||
:param filepart:
|
||||
:param hole:
|
||||
:return:
|
||||
"""
|
||||
if context.get('type') == 'episode' and match.name == 'episode_details':
|
||||
return match.start >= hole.start and match.end <= hole.end
|
||||
return True
|
||||
|
||||
def check_titles_in_filepart(self, filepart, matches, context): # pylint:disable=inconsistent-return-statements
|
||||
"""
|
||||
Find title in filepart (ignoring language)
|
||||
"""
|
||||
# pylint:disable=too-many-locals,too-many-branches,too-many-statements
|
||||
start, end = filepart.span
|
||||
|
||||
holes = matches.holes(start, end + 1, formatter=formatters(cleanup, reorder_title),
|
||||
ignore=self.is_ignored,
|
||||
predicate=lambda m: m.value)
|
||||
|
||||
holes = self.holes_process(holes, matches)
|
||||
|
||||
for hole in holes:
|
||||
if not hole or (self.hole_filter and not self.hole_filter(hole, matches)):
|
||||
continue
|
||||
|
||||
to_remove = []
|
||||
to_keep = []
|
||||
|
||||
ignored_matches = matches.range(hole.start, hole.end, self.is_ignored)
|
||||
|
||||
if ignored_matches:
|
||||
for ignored_match in reversed(ignored_matches):
|
||||
# pylint:disable=undefined-loop-variable, cell-var-from-loop
|
||||
trailing = matches.chain_before(hole.end, seps, predicate=lambda m: m == ignored_match)
|
||||
if trailing:
|
||||
should_keep = self.should_keep(ignored_match, to_keep, matches, filepart, hole, False)
|
||||
if should_keep:
|
||||
# pylint:disable=unpacking-non-sequence
|
||||
try:
|
||||
append, crop = should_keep
|
||||
except TypeError:
|
||||
append, crop = should_keep, should_keep
|
||||
if append:
|
||||
to_keep.append(ignored_match)
|
||||
if crop:
|
||||
hole.end = ignored_match.start
|
||||
|
||||
for ignored_match in ignored_matches:
|
||||
if ignored_match not in to_keep:
|
||||
starting = matches.chain_after(hole.start, seps,
|
||||
predicate=lambda m: m == ignored_match)
|
||||
if starting:
|
||||
should_keep = self.should_keep(ignored_match, to_keep, matches, filepart, hole, True)
|
||||
if should_keep:
|
||||
# pylint:disable=unpacking-non-sequence
|
||||
try:
|
||||
append, crop = should_keep
|
||||
except TypeError:
|
||||
append, crop = should_keep, should_keep
|
||||
if append:
|
||||
to_keep.append(ignored_match)
|
||||
if crop:
|
||||
hole.start = ignored_match.end
|
||||
|
||||
for match in ignored_matches:
|
||||
if self.should_remove(match, matches, filepart, hole, context):
|
||||
to_remove.append(match)
|
||||
for keep_match in to_keep:
|
||||
if keep_match in to_remove:
|
||||
to_remove.remove(keep_match)
|
||||
|
||||
if hole and hole.value:
|
||||
hole.name = self.match_name
|
||||
hole.tags = self.match_tags
|
||||
if self.alternative_match_name:
|
||||
# Split and keep values that can be a title
|
||||
titles = hole.split(title_seps, lambda m: m.value)
|
||||
for title_match in list(titles[1:]):
|
||||
previous_title = titles[titles.index(title_match) - 1]
|
||||
separator = matches.input_string[previous_title.end:title_match.start]
|
||||
if len(separator) == 1 and separator == '-' \
|
||||
and previous_title.raw[-1] not in seps \
|
||||
and title_match.raw[0] not in seps:
|
||||
titles[titles.index(title_match) - 1].end = title_match.end
|
||||
titles.remove(title_match)
|
||||
else:
|
||||
title_match.name = self.alternative_match_name
|
||||
|
||||
else:
|
||||
titles = [hole]
|
||||
return titles, to_remove
|
||||
|
||||
def when(self, matches, context):
|
||||
ret = []
|
||||
to_remove = []
|
||||
|
||||
if matches.named(self.match_name, lambda match: 'expected' in match.tags):
|
||||
return False
|
||||
|
||||
fileparts = [filepart for filepart in list(marker_sorted(matches.markers.named('path'), matches))
|
||||
if not self.filepart_filter or self.filepart_filter(filepart, matches)]
|
||||
|
||||
# Priorize fileparts containing the year
|
||||
years_fileparts = []
|
||||
for filepart in fileparts:
|
||||
year_match = matches.range(filepart.start, filepart.end, lambda match: match.name == 'year', 0)
|
||||
if year_match:
|
||||
years_fileparts.append(filepart)
|
||||
|
||||
for filepart in fileparts:
|
||||
try:
|
||||
years_fileparts.remove(filepart)
|
||||
except ValueError:
|
||||
pass
|
||||
titles = self.check_titles_in_filepart(filepart, matches, context)
|
||||
if titles:
|
||||
titles, to_remove_c = titles
|
||||
ret.extend(titles)
|
||||
to_remove.extend(to_remove_c)
|
||||
break
|
||||
|
||||
# Add title match in all fileparts containing the year.
|
||||
for filepart in years_fileparts:
|
||||
titles = self.check_titles_in_filepart(filepart, matches, context)
|
||||
if titles:
|
||||
# pylint:disable=unbalanced-tuple-unpacking
|
||||
titles, to_remove_c = titles
|
||||
ret.extend(titles)
|
||||
to_remove.extend(to_remove_c)
|
||||
|
||||
if ret or to_remove:
|
||||
return ret, to_remove
|
||||
return False
|
||||
|
||||
|
||||
class TitleFromPosition(TitleBaseRule):
|
||||
"""
|
||||
Add title match in existing matches
|
||||
"""
|
||||
dependency = [FilmTitleRule, SubtitlePrefixLanguageRule, SubtitleSuffixLanguageRule, SubtitleExtensionRule]
|
||||
|
||||
properties = {'title': [None], 'alternative_title': [None]}
|
||||
|
||||
def __init__(self):
|
||||
super(TitleFromPosition, self).__init__('title', ['title'], 'alternative_title')
|
||||
|
||||
def enabled(self, context):
|
||||
return not is_disabled(context, 'alternative_title')
|
||||
|
||||
|
||||
class PreferTitleWithYear(Rule):
|
||||
"""
|
||||
Prefer title where filepart contains year.
|
||||
"""
|
||||
dependency = TitleFromPosition
|
||||
consequence = [RemoveMatch, AppendTags(['equivalent-ignore'])]
|
||||
|
||||
properties = {'title': [None]}
|
||||
|
||||
def when(self, matches, context):
|
||||
with_year_in_group = []
|
||||
with_year = []
|
||||
titles = matches.named('title')
|
||||
|
||||
for title_match in titles:
|
||||
filepart = matches.markers.at_match(title_match, lambda marker: marker.name == 'path', 0)
|
||||
if filepart:
|
||||
year_match = matches.range(filepart.start, filepart.end, lambda match: match.name == 'year', 0)
|
||||
if year_match:
|
||||
group = matches.markers.at_match(year_match, lambda m: m.name == 'group')
|
||||
if group:
|
||||
with_year_in_group.append(title_match)
|
||||
else:
|
||||
with_year.append(title_match)
|
||||
|
||||
to_tag = []
|
||||
if with_year_in_group:
|
||||
title_values = {title_match.value for title_match in with_year_in_group}
|
||||
to_tag.extend(with_year_in_group)
|
||||
elif with_year:
|
||||
title_values = {title_match.value for title_match in with_year}
|
||||
to_tag.extend(with_year)
|
||||
else:
|
||||
title_values = {title_match.value for title_match in titles}
|
||||
|
||||
to_remove = []
|
||||
for title_match in titles:
|
||||
if title_match.value not in title_values:
|
||||
to_remove.append(title_match)
|
||||
if to_remove or to_tag:
|
||||
return to_remove, to_tag
|
||||
return False
|
||||
83
lib/guessit/rules/properties/type.py
Normal file
83
lib/guessit/rules/properties/type.py
Normal file
@@ -0,0 +1,83 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
type property
|
||||
"""
|
||||
from rebulk import CustomRule, Rebulk, POST_PROCESS
|
||||
from rebulk.match import Match
|
||||
|
||||
from ..common.pattern import is_disabled
|
||||
from ...rules.processors import Processors
|
||||
|
||||
|
||||
def _type(matches, value):
|
||||
"""
|
||||
Define type match with given value.
|
||||
:param matches:
|
||||
:param value:
|
||||
:return:
|
||||
"""
|
||||
matches.append(Match(len(matches.input_string), len(matches.input_string), name='type', value=value))
|
||||
|
||||
|
||||
def type_(config): # pylint:disable=unused-argument
|
||||
"""
|
||||
Builder for rebulk object.
|
||||
|
||||
:param config: rule configuration
|
||||
:type config: dict
|
||||
:return: Created Rebulk object
|
||||
:rtype: Rebulk
|
||||
"""
|
||||
rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'type'))
|
||||
rebulk = rebulk.rules(TypeProcessor)
|
||||
|
||||
return rebulk
|
||||
|
||||
|
||||
class TypeProcessor(CustomRule):
|
||||
"""
|
||||
Post processor to find file type based on all others found matches.
|
||||
"""
|
||||
priority = POST_PROCESS
|
||||
|
||||
dependency = Processors
|
||||
|
||||
properties = {'type': ['episode', 'movie']}
|
||||
|
||||
def when(self, matches, context): # pylint:disable=too-many-return-statements
|
||||
option_type = context.get('type', None)
|
||||
if option_type:
|
||||
return option_type
|
||||
|
||||
episode = matches.named('episode')
|
||||
season = matches.named('season')
|
||||
absolute_episode = matches.named('absolute_episode')
|
||||
episode_details = matches.named('episode_details')
|
||||
|
||||
if episode or season or episode_details or absolute_episode:
|
||||
return 'episode'
|
||||
|
||||
film = matches.named('film')
|
||||
if film:
|
||||
return 'movie'
|
||||
|
||||
year = matches.named('year')
|
||||
date = matches.named('date')
|
||||
|
||||
if date and not year:
|
||||
return 'episode'
|
||||
|
||||
bonus = matches.named('bonus')
|
||||
if bonus and not year:
|
||||
return 'episode'
|
||||
|
||||
crc32 = matches.named('crc32')
|
||||
anime_release_group = matches.named('release_group', lambda match: 'anime' in match.tags)
|
||||
if crc32 and anime_release_group:
|
||||
return 'episode'
|
||||
|
||||
return 'movie'
|
||||
|
||||
def then(self, matches, when_response, context):
|
||||
_type(matches, when_response)
|
||||
126
lib/guessit/rules/properties/video_codec.py
Normal file
126
lib/guessit/rules/properties/video_codec.py
Normal file
@@ -0,0 +1,126 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
video_codec and video_profile property
|
||||
"""
|
||||
from rebulk import Rebulk, Rule, RemoveMatch
|
||||
from rebulk.remodule import re
|
||||
|
||||
from ..common import dash
|
||||
from ..common.pattern import is_disabled
|
||||
from ..common.validators import seps_after, seps_before, seps_surround
|
||||
|
||||
|
||||
def video_codec(config): # pylint:disable=unused-argument
|
||||
"""
|
||||
Builder for rebulk object.
|
||||
|
||||
:param config: rule configuration
|
||||
:type config: dict
|
||||
:return: Created Rebulk object
|
||||
:rtype: Rebulk
|
||||
"""
|
||||
rebulk = Rebulk()
|
||||
rebulk = rebulk.regex_defaults(flags=re.IGNORECASE, abbreviations=[dash]).string_defaults(ignore_case=True)
|
||||
rebulk.defaults(name="video_codec",
|
||||
tags=['source-suffix', 'streaming_service.suffix'],
|
||||
disabled=lambda context: is_disabled(context, 'video_codec'))
|
||||
|
||||
rebulk.regex(r'Rv\d{2}', value='RealVideo')
|
||||
rebulk.regex('Mpe?g-?2', '[hx]-?262', value='MPEG-2')
|
||||
rebulk.string("DVDivX", "DivX", value="DivX")
|
||||
rebulk.string('XviD', value='Xvid')
|
||||
rebulk.regex('VC-?1', value='VC-1')
|
||||
rebulk.string('VP7', value='VP7')
|
||||
rebulk.string('VP8', 'VP80', value='VP8')
|
||||
rebulk.string('VP9', value='VP9')
|
||||
rebulk.regex('[hx]-?263', value='H.263')
|
||||
rebulk.regex('[hx]-?264', '(MPEG-?4)?AVC(?:HD)?', value='H.264')
|
||||
rebulk.regex('[hx]-?265', 'HEVC', value='H.265')
|
||||
rebulk.regex('(?P<video_codec>hevc)(?P<color_depth>10)', value={'video_codec': 'H.265', 'color_depth': '10-bit'},
|
||||
tags=['video-codec-suffix'], children=True)
|
||||
|
||||
# http://blog.mediacoderhq.com/h264-profiles-and-levels/
|
||||
# https://en.wikipedia.org/wiki/H.264/MPEG-4_AVC
|
||||
rebulk.defaults(clear=True,
|
||||
name="video_profile",
|
||||
validator=seps_surround,
|
||||
disabled=lambda context: is_disabled(context, 'video_profile'))
|
||||
|
||||
rebulk.string('BP', value='Baseline', tags='video_profile.rule')
|
||||
rebulk.string('XP', 'EP', value='Extended', tags='video_profile.rule')
|
||||
rebulk.string('MP', value='Main', tags='video_profile.rule')
|
||||
rebulk.string('HP', 'HiP', value='High', tags='video_profile.rule')
|
||||
|
||||
# https://en.wikipedia.org/wiki/Scalable_Video_Coding
|
||||
rebulk.string('SC', 'SVC', value='Scalable Video Coding', tags='video_profile.rule')
|
||||
# https://en.wikipedia.org/wiki/AVCHD
|
||||
rebulk.regex('AVC(?:HD)?', value='Advanced Video Codec High Definition', tags='video_profile.rule')
|
||||
# https://en.wikipedia.org/wiki/H.265/HEVC
|
||||
rebulk.string('HEVC', value='High Efficiency Video Coding', tags='video_profile.rule')
|
||||
|
||||
rebulk.regex('Hi422P', value='High 4:2:2')
|
||||
rebulk.regex('Hi444PP', value='High 4:4:4 Predictive')
|
||||
rebulk.regex('Hi10P?', value='High 10') # no profile validation is required
|
||||
|
||||
rebulk.string('DXVA', value='DXVA', name='video_api',
|
||||
disabled=lambda context: is_disabled(context, 'video_api'))
|
||||
|
||||
rebulk.defaults(clear=True,
|
||||
name='color_depth',
|
||||
validator=seps_surround,
|
||||
disabled=lambda context: is_disabled(context, 'color_depth'))
|
||||
rebulk.regex('12.?bits?', value='12-bit')
|
||||
rebulk.regex('10.?bits?', 'YUV420P10', 'Hi10P?', value='10-bit')
|
||||
rebulk.regex('8.?bits?', value='8-bit')
|
||||
|
||||
rebulk.rules(ValidateVideoCodec, VideoProfileRule)
|
||||
|
||||
return rebulk
|
||||
|
||||
|
||||
class ValidateVideoCodec(Rule):
|
||||
"""
|
||||
Validate video_codec with source property or separated
|
||||
"""
|
||||
priority = 64
|
||||
consequence = RemoveMatch
|
||||
|
||||
def enabled(self, context):
|
||||
return not is_disabled(context, 'video_codec')
|
||||
|
||||
def when(self, matches, context):
|
||||
ret = []
|
||||
for codec in matches.named('video_codec'):
|
||||
if not seps_before(codec) and \
|
||||
not matches.at_index(codec.start - 1, lambda match: 'video-codec-prefix' in match.tags):
|
||||
ret.append(codec)
|
||||
continue
|
||||
if not seps_after(codec) and \
|
||||
not matches.at_index(codec.end + 1, lambda match: 'video-codec-suffix' in match.tags):
|
||||
ret.append(codec)
|
||||
continue
|
||||
return ret
|
||||
|
||||
|
||||
class VideoProfileRule(Rule):
|
||||
"""
|
||||
Rule to validate video_profile
|
||||
"""
|
||||
consequence = RemoveMatch
|
||||
|
||||
def enabled(self, context):
|
||||
return not is_disabled(context, 'video_profile')
|
||||
|
||||
def when(self, matches, context):
|
||||
profile_list = matches.named('video_profile', lambda match: 'video_profile.rule' in match.tags)
|
||||
ret = []
|
||||
for profile in profile_list:
|
||||
codec = matches.at_span(profile.span, lambda match: match.name == 'video_codec', 0)
|
||||
if not codec:
|
||||
codec = matches.previous(profile, lambda match: match.name == 'video_codec')
|
||||
if not codec:
|
||||
codec = matches.next(profile, lambda match: match.name == 'video_codec')
|
||||
if not codec:
|
||||
ret.append(profile)
|
||||
return ret
|
||||
110
lib/guessit/rules/properties/website.py
Normal file
110
lib/guessit/rules/properties/website.py
Normal file
@@ -0,0 +1,110 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Website property.
|
||||
"""
|
||||
# from pkg_resources import resource_stream # @UnresolvedImport
|
||||
import os
|
||||
|
||||
from rebulk.remodule import re
|
||||
|
||||
from rebulk import Rebulk, Rule, RemoveMatch
|
||||
from ..common import seps
|
||||
from ..common.formatters import cleanup
|
||||
from ..common.pattern import is_disabled
|
||||
from ..common.validators import seps_surround
|
||||
from ...reutils import build_or_pattern
|
||||
|
||||
|
||||
def website(config):
|
||||
"""
|
||||
Builder for rebulk object.
|
||||
|
||||
:param config: rule configuration
|
||||
:type config: dict
|
||||
:return: Created Rebulk object
|
||||
:rtype: Rebulk
|
||||
"""
|
||||
rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'website'))
|
||||
rebulk = rebulk.regex_defaults(flags=re.IGNORECASE).string_defaults(ignore_case=True)
|
||||
rebulk.defaults(name="website")
|
||||
|
||||
with open(os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), 'tlds-alpha-by-domain.txt')) as tld_file:
|
||||
tlds = [
|
||||
tld.strip().decode('utf-8')
|
||||
for tld in tld_file.readlines()
|
||||
if b'--' not in tld
|
||||
][1:] # All registered domain extension
|
||||
|
||||
safe_tlds = config['safe_tlds'] # For sure a website extension
|
||||
safe_subdomains = config['safe_subdomains'] # For sure a website subdomain
|
||||
safe_prefix = config['safe_prefixes'] # Those words before a tlds are sure
|
||||
website_prefixes = config['prefixes']
|
||||
|
||||
rebulk.regex(r'(?:[^a-z0-9]|^)((?:'+build_or_pattern(safe_subdomains) +
|
||||
r'\.)+(?:[a-z-]+\.)+(?:'+build_or_pattern(tlds) +
|
||||
r'))(?:[^a-z0-9]|$)',
|
||||
children=True)
|
||||
rebulk.regex(r'(?:[^a-z0-9]|^)((?:'+build_or_pattern(safe_subdomains) +
|
||||
r'\.)*[a-z-]+\.(?:'+build_or_pattern(safe_tlds) +
|
||||
r'))(?:[^a-z0-9]|$)',
|
||||
safe_subdomains=safe_subdomains, safe_tlds=safe_tlds, children=True)
|
||||
rebulk.regex(r'(?:[^a-z0-9]|^)((?:'+build_or_pattern(safe_subdomains) +
|
||||
r'\.)*[a-z-]+\.(?:'+build_or_pattern(safe_prefix) +
|
||||
r'\.)+(?:'+build_or_pattern(tlds) +
|
||||
r'))(?:[^a-z0-9]|$)',
|
||||
safe_subdomains=safe_subdomains, safe_prefix=safe_prefix, tlds=tlds, children=True)
|
||||
|
||||
rebulk.string(*website_prefixes,
|
||||
validator=seps_surround, private=True, tags=['website.prefix'])
|
||||
|
||||
class PreferTitleOverWebsite(Rule):
|
||||
"""
|
||||
If found match is more likely a title, remove website.
|
||||
"""
|
||||
consequence = RemoveMatch
|
||||
|
||||
@staticmethod
|
||||
def valid_followers(match):
|
||||
"""
|
||||
Validator for next website matches
|
||||
"""
|
||||
return match.named('season', 'episode', 'year')
|
||||
|
||||
def when(self, matches, context):
|
||||
to_remove = []
|
||||
for website_match in matches.named('website'):
|
||||
safe = False
|
||||
for safe_start in safe_subdomains + safe_prefix:
|
||||
if website_match.value.lower().startswith(safe_start):
|
||||
safe = True
|
||||
break
|
||||
if not safe:
|
||||
suffix = matches.next(website_match, PreferTitleOverWebsite.valid_followers, 0)
|
||||
if suffix:
|
||||
group = matches.markers.at_match(website_match, lambda marker: marker.name == 'group', 0)
|
||||
if not group:
|
||||
to_remove.append(website_match)
|
||||
return to_remove
|
||||
|
||||
rebulk.rules(PreferTitleOverWebsite, ValidateWebsitePrefix)
|
||||
|
||||
return rebulk
|
||||
|
||||
|
||||
class ValidateWebsitePrefix(Rule):
|
||||
"""
|
||||
Validate website prefixes
|
||||
"""
|
||||
priority = 64
|
||||
consequence = RemoveMatch
|
||||
|
||||
def when(self, matches, context):
|
||||
to_remove = []
|
||||
for prefix in matches.tagged('website.prefix'):
|
||||
website_match = matches.next(prefix, predicate=lambda match: match.name == 'website', index=0)
|
||||
if (not website_match or
|
||||
matches.holes(prefix.end, website_match.start,
|
||||
formatter=cleanup, seps=seps, predicate=lambda match: match.value)):
|
||||
to_remove.append(prefix)
|
||||
return to_remove
|
||||
3
lib/guessit/test/__init__.py
Normal file
3
lib/guessit/test/__init__.py
Normal file
@@ -0,0 +1,3 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
# pylint: disable=no-self-use, pointless-statement, missing-docstring, invalid-name
|
||||
1
lib/guessit/test/config/dummy.txt
Normal file
1
lib/guessit/test/config/dummy.txt
Normal file
@@ -0,0 +1 @@
|
||||
Not a configuration file
|
||||
4
lib/guessit/test/config/test.json
Normal file
4
lib/guessit/test/config/test.json
Normal file
@@ -0,0 +1,4 @@
|
||||
{
|
||||
"expected_title": ["The 100", "OSS 117"],
|
||||
"yaml": false
|
||||
}
|
||||
4
lib/guessit/test/config/test.yaml
Normal file
4
lib/guessit/test/config/test.yaml
Normal file
@@ -0,0 +1,4 @@
|
||||
expected_title:
|
||||
- The 100
|
||||
- OSS 117
|
||||
yaml: True
|
||||
4
lib/guessit/test/config/test.yml
Normal file
4
lib/guessit/test/config/test.yml
Normal file
@@ -0,0 +1,4 @@
|
||||
expected_title:
|
||||
- The 100
|
||||
- OSS 117
|
||||
yaml: True
|
||||
335
lib/guessit/test/enable_disable_properties.yml
Normal file
335
lib/guessit/test/enable_disable_properties.yml
Normal file
@@ -0,0 +1,335 @@
|
||||
? vorbis
|
||||
: options: --exclude audio_codec
|
||||
-audio_codec: Vorbis
|
||||
|
||||
? DTS-ES
|
||||
: options: --exclude audio_profile
|
||||
audio_codec: DTS
|
||||
-audio_profile: Extended Surround
|
||||
|
||||
? DTS.ES
|
||||
: options: --include audio_codec
|
||||
audio_codec: DTS
|
||||
-audio_profile: Extended Surround
|
||||
|
||||
? 5.1
|
||||
? 5ch
|
||||
? 6ch
|
||||
: options: --exclude audio_channels
|
||||
-audio_channels: '5.1'
|
||||
|
||||
? Movie Title-x01-Other Title.mkv
|
||||
? Movie Title-x01-Other Title
|
||||
? directory/Movie Title-x01-Other Title/file.mkv
|
||||
: options: --exclude bonus
|
||||
-bonus: 1
|
||||
-bonus_title: Other Title
|
||||
|
||||
? Title-x02-Bonus Title.mkv
|
||||
: options: --include bonus
|
||||
bonus: 2
|
||||
-bonus_title: Other Title
|
||||
|
||||
? cd 1of3
|
||||
: options: --exclude cd
|
||||
-cd: 1
|
||||
-cd_count: 3
|
||||
|
||||
? This.is.Us
|
||||
: options: --exclude country
|
||||
title: This is Us
|
||||
-country: US
|
||||
|
||||
? 2015.01.31
|
||||
: options: --exclude date
|
||||
year: 2015
|
||||
-date: 2015-01-31
|
||||
|
||||
? Something 2 mar 2013)
|
||||
: options: --exclude date
|
||||
-date: 2013-03-02
|
||||
|
||||
? 2012 2009 S01E02 2015 # If no year is marked, the second one is guessed.
|
||||
: options: --exclude year
|
||||
-year: 2009
|
||||
|
||||
? Director's cut
|
||||
: options: --exclude edition
|
||||
-edition: Director's Cut
|
||||
|
||||
? 2x5
|
||||
? 2X5
|
||||
? 02x05
|
||||
? 2X05
|
||||
? 02x5
|
||||
? S02E05
|
||||
? s02e05
|
||||
? s02e5
|
||||
? s2e05
|
||||
? s02ep05
|
||||
? s2EP5
|
||||
: options: --exclude season
|
||||
-season: 2
|
||||
-episode: 5
|
||||
|
||||
? 2x6
|
||||
? 2X6
|
||||
? 02x06
|
||||
? 2X06
|
||||
? 02x6
|
||||
? S02E06
|
||||
? s02e06
|
||||
? s02e6
|
||||
? s2e06
|
||||
? s02ep06
|
||||
? s2EP6
|
||||
: options: --exclude episode
|
||||
-season: 2
|
||||
-episode: 6
|
||||
|
||||
? serie Season 2 other
|
||||
: options: --exclude season
|
||||
-season: 2
|
||||
|
||||
? Some Dummy Directory/S02 Some Series/E01-Episode title.mkv
|
||||
: options: --exclude episode_title
|
||||
-episode_title: Episode title
|
||||
season: 2
|
||||
episode: 1
|
||||
|
||||
? Another Dummy Directory/S02 Some Series/E01-Episode title.mkv
|
||||
: options: --include season --include episode
|
||||
-episode_title: Episode title
|
||||
season: 2
|
||||
episode: 1
|
||||
|
||||
# pattern contains season and episode: it wont work enabling only one
|
||||
? Some Series S03E01E02
|
||||
: options: --include episode
|
||||
-season: 3
|
||||
-episode: [1, 2]
|
||||
|
||||
# pattern contains season and episode: it wont work enabling only one
|
||||
? Another Series S04E01E02
|
||||
: options: --include season
|
||||
-season: 4
|
||||
-episode: [1, 2]
|
||||
|
||||
? Show.Name.Season.4.Episode.1
|
||||
: options: --include episode
|
||||
-season: 4
|
||||
episode: 1
|
||||
|
||||
? Another.Show.Name.Season.4.Episode.1
|
||||
: options: --include season
|
||||
season: 4
|
||||
-episode: 1
|
||||
|
||||
? Some Series S01 02 03
|
||||
: options: --exclude season
|
||||
-season: [1, 2, 3]
|
||||
|
||||
? Some Series E01 02 04
|
||||
: options: --exclude episode
|
||||
-episode: [1, 2, 4]
|
||||
|
||||
? A very special episode s06 special
|
||||
: options: -t episode --exclude episode_details
|
||||
season: 6
|
||||
-episode_details: Special
|
||||
|
||||
? S01D02.3-5-GROUP
|
||||
: options: --exclude disc
|
||||
-season: 1
|
||||
-disc: [2, 3, 4, 5]
|
||||
-episode: [2, 3, 4, 5]
|
||||
|
||||
? S01D02&4-6&8
|
||||
: options: --exclude season
|
||||
-season: 1
|
||||
-disc: [2, 4, 5, 6, 8]
|
||||
-episode: [2, 4, 5, 6, 8]
|
||||
|
||||
? Film Title-f01-Series Title.mkv
|
||||
: options: --exclude film
|
||||
-film: 1
|
||||
-film_title: Film Title
|
||||
|
||||
? Another Film Title-f01-Series Title.mkv
|
||||
: options: --exclude film_title
|
||||
film: 1
|
||||
-film_title: Film Title
|
||||
|
||||
? English
|
||||
? .ENG.
|
||||
: options: --exclude language
|
||||
-language: English
|
||||
|
||||
? SubFrench
|
||||
? SubFr
|
||||
? STFr
|
||||
: options: --exclude subtitle_language
|
||||
-language: French
|
||||
-subtitle_language: French
|
||||
|
||||
? ST.FR
|
||||
: options: --exclude subtitle_language
|
||||
language: French
|
||||
-subtitle_language: French
|
||||
|
||||
? ENG.-.sub.FR
|
||||
? ENG.-.FR Sub
|
||||
: options: --include language
|
||||
language: [English, French]
|
||||
-subtitle_language: French
|
||||
|
||||
? ENG.-.SubFR
|
||||
: options: --include language
|
||||
language: English
|
||||
-subtitle_language: French
|
||||
|
||||
? ENG.-.FRSUB
|
||||
? ENG.-.FRSUBS
|
||||
? ENG.-.FR-SUBS
|
||||
: options: --include subtitle_language
|
||||
-language: English
|
||||
subtitle_language: French
|
||||
|
||||
? DVD.Real.XViD
|
||||
? DVD.fix.XViD
|
||||
: options: --exclude other
|
||||
-other: Fix
|
||||
-proper_count: 1
|
||||
|
||||
? Part 3
|
||||
? Part III
|
||||
? Part Three
|
||||
? Part Trois
|
||||
? Part3
|
||||
: options: --exclude part
|
||||
-part: 3
|
||||
|
||||
? Some.Title.XViD-by.Artik[SEDG].avi
|
||||
: options: --exclude release_group
|
||||
-release_group: Artik[SEDG]
|
||||
|
||||
? "[ABC] Some.Title.avi"
|
||||
? some/folder/[ABC]Some.Title.avi
|
||||
: options: --exclude release_group
|
||||
-release_group: ABC
|
||||
|
||||
? 360p
|
||||
? 360px
|
||||
? "360"
|
||||
? +500x360
|
||||
: options: --exclude screen_size
|
||||
-screen_size: 360p
|
||||
|
||||
? 640x360
|
||||
: options: --exclude aspect_ratio
|
||||
screen_size: 360p
|
||||
-aspect_ratio: 1.778
|
||||
|
||||
? 8196x4320
|
||||
: options: --exclude screen_size
|
||||
-screen_size: 4320p
|
||||
-aspect_ratio: 1.897
|
||||
|
||||
? 4.3gb
|
||||
: options: --exclude size
|
||||
-size: 4.3GB
|
||||
|
||||
? VhS_rip
|
||||
? VHS.RIP
|
||||
: options: --exclude source
|
||||
-source: VHS
|
||||
-other: Rip
|
||||
|
||||
? DVD.RIP
|
||||
: options: --include other
|
||||
-source: DVD
|
||||
-other: Rip
|
||||
|
||||
? Title Only.avi
|
||||
: options: --exclude title
|
||||
-title: Title Only
|
||||
|
||||
? h265
|
||||
? x265
|
||||
? h.265
|
||||
? x.265
|
||||
? hevc
|
||||
: options: --exclude video_codec
|
||||
-video_codec: H.265
|
||||
|
||||
? hevc10
|
||||
: options: --include color_depth
|
||||
-video_codec: H.265
|
||||
-color_depth: 10-bit
|
||||
|
||||
? HEVC-YUV420P10
|
||||
: options: --include color_depth
|
||||
-video_codec: H.265
|
||||
color_depth: 10-bit
|
||||
|
||||
? h265-HP
|
||||
: options: --exclude video_profile
|
||||
video_codec: H.265
|
||||
-video_profile: High
|
||||
|
||||
? House.of.Cards.2013.S02E03.1080p.NF.WEBRip.DD5.1.x264-NTb.mkv
|
||||
? House.of.Cards.2013.S02E03.1080p.Netflix.WEBRip.DD5.1.x264-NTb.mkv
|
||||
: options: --exclude streaming_service
|
||||
-streaming_service: Netflix
|
||||
|
||||
? wawa.co.uk
|
||||
: options: --exclude website
|
||||
-website: wawa.co.uk
|
||||
|
||||
? movie.mp4
|
||||
: options: --exclude mimetype
|
||||
-mimetype: video/mp4
|
||||
|
||||
? another movie.mkv
|
||||
: options: --exclude container
|
||||
-container: mkv
|
||||
|
||||
? series s02e01
|
||||
: options: --exclude type
|
||||
-type: episode
|
||||
|
||||
? series s02e01
|
||||
: options: --exclude type
|
||||
-type: episode
|
||||
|
||||
? Hotel.Hell.S01E01.720p.DD5.1.448kbps-ALANiS
|
||||
: options: --exclude audio_bit_rate
|
||||
-audio_bit_rate: 448Kbps
|
||||
|
||||
? Katy Perry - Pepsi & Billboard Summer Beats Concert Series 2012 1080i HDTV 20 Mbps DD2.0 MPEG2-TrollHD.ts
|
||||
: options: --exclude video_bit_rate
|
||||
-video_bit_rate: 20Mbps
|
||||
|
||||
? "[Figmentos] Monster 34 - At the End of Darkness [781219F1].mkv"
|
||||
: options: --exclude crc32
|
||||
-crc32: 781219F1
|
||||
|
||||
? 1080p25
|
||||
: options: --exclude frame_rate
|
||||
screen_size: 1080p
|
||||
-frame_rate: 25fps
|
||||
|
||||
? 1080p25
|
||||
: options: --exclude screen_size
|
||||
-screen_size: 1080p
|
||||
-frame_rate: 25fps
|
||||
|
||||
? 1080p25
|
||||
: options: --include frame_rate
|
||||
-screen_size: 1080p
|
||||
-frame_rate: 25fps
|
||||
|
||||
? 1080p 30fps
|
||||
: options: --exclude screen_size
|
||||
-screen_size: 1080p
|
||||
frame_rate: 30fps
|
||||
4693
lib/guessit/test/episodes.yml
Normal file
4693
lib/guessit/test/episodes.yml
Normal file
File diff suppressed because it is too large
Load Diff
1786
lib/guessit/test/movies.yml
Normal file
1786
lib/guessit/test/movies.yml
Normal file
File diff suppressed because it is too large
Load Diff
3
lib/guessit/test/rules/__init__.py
Normal file
3
lib/guessit/test/rules/__init__.py
Normal file
@@ -0,0 +1,3 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
# pylint: disable=no-self-use, pointless-statement, missing-docstring, invalid-name
|
||||
134
lib/guessit/test/rules/audio_codec.yml
Normal file
134
lib/guessit/test/rules/audio_codec.yml
Normal file
@@ -0,0 +1,134 @@
|
||||
# Multiple input strings having same expected results can be chained.
|
||||
# Use $ marker to check inputs that should not match results.
|
||||
|
||||
|
||||
? +MP3
|
||||
? +lame
|
||||
? +lame3.12
|
||||
? +lame3.100
|
||||
: audio_codec: MP3
|
||||
|
||||
? +MP2
|
||||
: audio_codec: MP2
|
||||
|
||||
? +DolbyDigital
|
||||
? +DD
|
||||
? +Dolby Digital
|
||||
? +AC3
|
||||
: audio_codec: Dolby Digital
|
||||
|
||||
? +DDP
|
||||
? +DD+
|
||||
? +EAC3
|
||||
: audio_codec: Dolby Digital Plus
|
||||
|
||||
? +DolbyAtmos
|
||||
? +Dolby Atmos
|
||||
? +Atmos
|
||||
? -Atmosphere
|
||||
: audio_codec: Dolby Atmos
|
||||
|
||||
? +AAC
|
||||
: audio_codec: AAC
|
||||
|
||||
? +Flac
|
||||
: audio_codec: FLAC
|
||||
|
||||
? +DTS
|
||||
: audio_codec: DTS
|
||||
|
||||
? +True-HD
|
||||
? +trueHD
|
||||
: audio_codec: Dolby TrueHD
|
||||
|
||||
? +True-HD51
|
||||
? +trueHD51
|
||||
: audio_codec: Dolby TrueHD
|
||||
audio_channels: '5.1'
|
||||
|
||||
? +DTSHD
|
||||
? +DTS HD
|
||||
? +DTS-HD
|
||||
: audio_codec: DTS-HD
|
||||
|
||||
? +DTS-HDma
|
||||
? +DTSMA
|
||||
: audio_codec: DTS-HD
|
||||
audio_profile: Master Audio
|
||||
|
||||
? +AC3-hq
|
||||
: audio_codec: Dolby Digital
|
||||
audio_profile: High Quality
|
||||
|
||||
? +AAC-HE
|
||||
: audio_codec: AAC
|
||||
audio_profile: High Efficiency
|
||||
|
||||
? +AAC-LC
|
||||
: audio_codec: AAC
|
||||
audio_profile: Low Complexity
|
||||
|
||||
? +AAC2.0
|
||||
? +AAC20
|
||||
: audio_codec: AAC
|
||||
audio_channels: '2.0'
|
||||
|
||||
? +7.1
|
||||
? +7ch
|
||||
? +8ch
|
||||
: audio_channels: '7.1'
|
||||
|
||||
? +5.1
|
||||
? +5ch
|
||||
? +6ch
|
||||
: audio_channels: '5.1'
|
||||
|
||||
? +2ch
|
||||
? +2.0
|
||||
? +stereo
|
||||
: audio_channels: '2.0'
|
||||
|
||||
? +1ch
|
||||
? +mono
|
||||
: audio_channels: '1.0'
|
||||
|
||||
? DD5.1
|
||||
? DD51
|
||||
: audio_codec: Dolby Digital
|
||||
audio_channels: '5.1'
|
||||
|
||||
? -51
|
||||
: audio_channels: '5.1'
|
||||
|
||||
? DTS-HD.HRA
|
||||
? DTSHD.HRA
|
||||
? DTS-HD.HR
|
||||
? DTSHD.HR
|
||||
? -HRA
|
||||
? -HR
|
||||
: audio_codec: DTS-HD
|
||||
audio_profile: High Resolution Audio
|
||||
|
||||
? DTSES
|
||||
? DTS-ES
|
||||
? -ES
|
||||
: audio_codec: DTS
|
||||
audio_profile: Extended Surround
|
||||
|
||||
? DD-EX
|
||||
? DDEX
|
||||
? -EX
|
||||
: audio_codec: Dolby Digital
|
||||
audio_profile: EX
|
||||
|
||||
? OPUS
|
||||
: audio_codec: Opus
|
||||
|
||||
? Vorbis
|
||||
: audio_codec: Vorbis
|
||||
|
||||
? PCM
|
||||
: audio_codec: PCM
|
||||
|
||||
? LPCM
|
||||
: audio_codec: LPCM
|
||||
9
lib/guessit/test/rules/bonus.yml
Normal file
9
lib/guessit/test/rules/bonus.yml
Normal file
@@ -0,0 +1,9 @@
|
||||
# Multiple input strings having same expected results can be chained.
|
||||
# Use - marker to check inputs that should not match results.
|
||||
? Movie Title-x01-Other Title.mkv
|
||||
? Movie Title-x01-Other Title
|
||||
? directory/Movie Title-x01-Other Title/file.mkv
|
||||
: title: Movie Title
|
||||
bonus_title: Other Title
|
||||
bonus: 1
|
||||
|
||||
10
lib/guessit/test/rules/cds.yml
Normal file
10
lib/guessit/test/rules/cds.yml
Normal file
@@ -0,0 +1,10 @@
|
||||
# Multiple input strings having same expected results can be chained.
|
||||
# Use - marker to check inputs that should not match results.
|
||||
? cd 1of3
|
||||
: cd: 1
|
||||
cd_count: 3
|
||||
|
||||
? Some.Title-DVDRIP-x264-CDP
|
||||
: cd: !!null
|
||||
release_group: CDP
|
||||
video_codec: H.264
|
||||
467
lib/guessit/test/rules/common_words.yml
Normal file
467
lib/guessit/test/rules/common_words.yml
Normal file
@@ -0,0 +1,467 @@
|
||||
? is
|
||||
: title: is
|
||||
|
||||
? it
|
||||
: title: it
|
||||
|
||||
? am
|
||||
: title: am
|
||||
|
||||
? mad
|
||||
: title: mad
|
||||
|
||||
? men
|
||||
: title: men
|
||||
|
||||
? man
|
||||
: title: man
|
||||
|
||||
? run
|
||||
: title: run
|
||||
|
||||
? sin
|
||||
: title: sin
|
||||
|
||||
? st
|
||||
: title: st
|
||||
|
||||
? to
|
||||
: title: to
|
||||
|
||||
? 'no'
|
||||
: title: 'no'
|
||||
|
||||
? non
|
||||
: title: non
|
||||
|
||||
? war
|
||||
: title: war
|
||||
|
||||
? min
|
||||
: title: min
|
||||
|
||||
? new
|
||||
: title: new
|
||||
|
||||
? car
|
||||
: title: car
|
||||
|
||||
? day
|
||||
: title: day
|
||||
|
||||
? bad
|
||||
: title: bad
|
||||
|
||||
? bat
|
||||
: title: bat
|
||||
|
||||
? fan
|
||||
: title: fan
|
||||
|
||||
? fry
|
||||
: title: fry
|
||||
|
||||
? cop
|
||||
: title: cop
|
||||
|
||||
? zen
|
||||
: title: zen
|
||||
|
||||
? gay
|
||||
: title: gay
|
||||
|
||||
? fat
|
||||
: title: fat
|
||||
|
||||
? one
|
||||
: title: one
|
||||
|
||||
? cherokee
|
||||
: title: cherokee
|
||||
|
||||
? got
|
||||
: title: got
|
||||
|
||||
? an
|
||||
: title: an
|
||||
|
||||
? as
|
||||
: title: as
|
||||
|
||||
? cat
|
||||
: title: cat
|
||||
|
||||
? her
|
||||
: title: her
|
||||
|
||||
? be
|
||||
: title: be
|
||||
|
||||
? hat
|
||||
: title: hat
|
||||
|
||||
? sun
|
||||
: title: sun
|
||||
|
||||
? may
|
||||
: title: may
|
||||
|
||||
? my
|
||||
: title: my
|
||||
|
||||
? mr
|
||||
: title: mr
|
||||
|
||||
? rum
|
||||
: title: rum
|
||||
|
||||
? pi
|
||||
: title: pi
|
||||
|
||||
? bb
|
||||
: title: bb
|
||||
|
||||
? bt
|
||||
: title: bt
|
||||
|
||||
? tv
|
||||
: title: tv
|
||||
|
||||
? aw
|
||||
: title: aw
|
||||
|
||||
? by
|
||||
: title: by
|
||||
|
||||
? md
|
||||
: other: Mic Dubbed
|
||||
|
||||
? mp
|
||||
: title: mp
|
||||
|
||||
? cd
|
||||
: title: cd
|
||||
|
||||
? in
|
||||
: title: in
|
||||
|
||||
? ad
|
||||
: title: ad
|
||||
|
||||
? ice
|
||||
: title: ice
|
||||
|
||||
? ay
|
||||
: title: ay
|
||||
|
||||
? at
|
||||
: title: at
|
||||
|
||||
? star
|
||||
: title: star
|
||||
|
||||
? so
|
||||
: title: so
|
||||
|
||||
? he
|
||||
: title: he
|
||||
|
||||
? do
|
||||
: title: do
|
||||
|
||||
? ax
|
||||
: title: ax
|
||||
|
||||
? mx
|
||||
: title: mx
|
||||
|
||||
? bas
|
||||
: title: bas
|
||||
|
||||
? de
|
||||
: title: de
|
||||
|
||||
? le
|
||||
: title: le
|
||||
|
||||
? son
|
||||
: title: son
|
||||
|
||||
? ne
|
||||
: title: ne
|
||||
|
||||
? ca
|
||||
: title: ca
|
||||
|
||||
? ce
|
||||
: title: ce
|
||||
|
||||
? et
|
||||
: title: et
|
||||
|
||||
? que
|
||||
: title: que
|
||||
|
||||
? mal
|
||||
: title: mal
|
||||
|
||||
? est
|
||||
: title: est
|
||||
|
||||
? vol
|
||||
: title: vol
|
||||
|
||||
? or
|
||||
: title: or
|
||||
|
||||
? mon
|
||||
: title: mon
|
||||
|
||||
? se
|
||||
: title: se
|
||||
|
||||
? je
|
||||
: title: je
|
||||
|
||||
? tu
|
||||
: title: tu
|
||||
|
||||
? me
|
||||
: title: me
|
||||
|
||||
? ma
|
||||
: title: ma
|
||||
|
||||
? va
|
||||
: title: va
|
||||
|
||||
? au
|
||||
: country: AU
|
||||
|
||||
? lu
|
||||
: title: lu
|
||||
|
||||
? wa
|
||||
: title: wa
|
||||
|
||||
? ga
|
||||
: title: ga
|
||||
|
||||
? ao
|
||||
: title: ao
|
||||
|
||||
? la
|
||||
: title: la
|
||||
|
||||
? el
|
||||
: title: el
|
||||
|
||||
? del
|
||||
: title: del
|
||||
|
||||
? por
|
||||
: title: por
|
||||
|
||||
? mar
|
||||
: title: mar
|
||||
|
||||
? al
|
||||
: title: al
|
||||
|
||||
? un
|
||||
: title: un
|
||||
|
||||
? ind
|
||||
: title: ind
|
||||
|
||||
? arw
|
||||
: title: arw
|
||||
|
||||
? ts
|
||||
: source: Telesync
|
||||
|
||||
? ii
|
||||
: title: ii
|
||||
|
||||
? bin
|
||||
: title: bin
|
||||
|
||||
? chan
|
||||
: title: chan
|
||||
|
||||
? ss
|
||||
: title: ss
|
||||
|
||||
? san
|
||||
: title: san
|
||||
|
||||
? oss
|
||||
: title: oss
|
||||
|
||||
? iii
|
||||
: title: iii
|
||||
|
||||
? vi
|
||||
: title: vi
|
||||
|
||||
? ben
|
||||
: title: ben
|
||||
|
||||
? da
|
||||
: title: da
|
||||
|
||||
? lt
|
||||
: title: lt
|
||||
|
||||
? ch
|
||||
: title: ch
|
||||
|
||||
? sr
|
||||
: title: sr
|
||||
|
||||
? ps
|
||||
: title: ps
|
||||
|
||||
? cx
|
||||
: title: cx
|
||||
|
||||
? vo
|
||||
: title: vo
|
||||
|
||||
? mkv
|
||||
: container: mkv
|
||||
|
||||
? avi
|
||||
: container: avi
|
||||
|
||||
? dmd
|
||||
: title: dmd
|
||||
|
||||
? the
|
||||
: title: the
|
||||
|
||||
? dis
|
||||
: title: dis
|
||||
|
||||
? cut
|
||||
: title: cut
|
||||
|
||||
? stv
|
||||
: title: stv
|
||||
|
||||
? des
|
||||
: title: des
|
||||
|
||||
? dia
|
||||
: title: dia
|
||||
|
||||
? and
|
||||
: title: and
|
||||
|
||||
? cab
|
||||
: title: cab
|
||||
|
||||
? sub
|
||||
: title: sub
|
||||
|
||||
? mia
|
||||
: title: mia
|
||||
|
||||
? rim
|
||||
: title: rim
|
||||
|
||||
? las
|
||||
: title: las
|
||||
|
||||
? une
|
||||
: title: une
|
||||
|
||||
? par
|
||||
: title: par
|
||||
|
||||
? srt
|
||||
: container: srt
|
||||
|
||||
? ano
|
||||
: title: ano
|
||||
|
||||
? toy
|
||||
: title: toy
|
||||
|
||||
? job
|
||||
: title: job
|
||||
|
||||
? gag
|
||||
: title: gag
|
||||
|
||||
? reel
|
||||
: title: reel
|
||||
|
||||
? www
|
||||
: title: www
|
||||
|
||||
? for
|
||||
: title: for
|
||||
|
||||
? ayu
|
||||
: title: ayu
|
||||
|
||||
? csi
|
||||
: title: csi
|
||||
|
||||
? ren
|
||||
: title: ren
|
||||
|
||||
? moi
|
||||
: title: moi
|
||||
|
||||
? sur
|
||||
: title: sur
|
||||
|
||||
? fer
|
||||
: title: fer
|
||||
|
||||
? fun
|
||||
: title: fun
|
||||
|
||||
? two
|
||||
: title: two
|
||||
|
||||
? big
|
||||
: title: big
|
||||
|
||||
? psy
|
||||
: title: psy
|
||||
|
||||
? air
|
||||
: title: air
|
||||
|
||||
? brazil
|
||||
: title: brazil
|
||||
|
||||
? jordan
|
||||
: title: jordan
|
||||
|
||||
? bs
|
||||
: title: bs
|
||||
|
||||
? kz
|
||||
: title: kz
|
||||
|
||||
? gt
|
||||
: title: gt
|
||||
|
||||
? im
|
||||
: title: im
|
||||
|
||||
? pt
|
||||
: language: pt
|
||||
|
||||
? scr
|
||||
: title: scr
|
||||
|
||||
? sd
|
||||
: title: sd
|
||||
|
||||
? hr
|
||||
: other: High Resolution
|
||||
13
lib/guessit/test/rules/country.yml
Normal file
13
lib/guessit/test/rules/country.yml
Normal file
@@ -0,0 +1,13 @@
|
||||
# Multiple input strings having same expected results can be chained.
|
||||
# Use $ marker to check inputs that should not match results.
|
||||
? Us.this.is.title
|
||||
? this.is.title.US
|
||||
: country: US
|
||||
title: this is title
|
||||
|
||||
? This.is.Us
|
||||
: title: This is Us
|
||||
|
||||
? This.Is.Us
|
||||
: options: --no-default-config
|
||||
title: This Is Us
|
||||
50
lib/guessit/test/rules/date.yml
Normal file
50
lib/guessit/test/rules/date.yml
Normal file
@@ -0,0 +1,50 @@
|
||||
# Multiple input strings having same expected results can be chained.
|
||||
# Use - marker to check inputs that should not match results.
|
||||
? +09.03.08
|
||||
? +09.03.2008
|
||||
? +2008.03.09
|
||||
: date: 2008-03-09
|
||||
|
||||
? +31.01.15
|
||||
? +31.01.2015
|
||||
? +15.01.31
|
||||
? +2015.01.31
|
||||
: date: 2015-01-31
|
||||
|
||||
? +01.02.03
|
||||
: date: 2003-02-01
|
||||
|
||||
? +01.02.03
|
||||
: options: --date-year-first
|
||||
date: 2001-02-03
|
||||
|
||||
? +01.02.03
|
||||
: options: --date-day-first
|
||||
date: 2003-02-01
|
||||
|
||||
? 1919
|
||||
? 2030
|
||||
: !!map {}
|
||||
|
||||
? 2029
|
||||
: year: 2029
|
||||
|
||||
? (1920)
|
||||
: year: 1920
|
||||
|
||||
? 2012
|
||||
: year: 2012
|
||||
|
||||
? 2011 2013 (2012) (2015) # first marked year is guessed.
|
||||
: title: "2011 2013"
|
||||
year: 2012
|
||||
|
||||
? 2012 2009 S01E02 2015 # If no year is marked, the second one is guessed.
|
||||
: title: "2012"
|
||||
year: 2009
|
||||
episode_title: "2015"
|
||||
|
||||
? Something 2 mar 2013)
|
||||
: title: Something
|
||||
date: 2013-03-02
|
||||
type: episode
|
||||
63
lib/guessit/test/rules/edition.yml
Normal file
63
lib/guessit/test/rules/edition.yml
Normal file
@@ -0,0 +1,63 @@
|
||||
# Multiple input strings having same expected results can be chained.
|
||||
# Use - marker to check inputs that should not match results.
|
||||
? Director's cut
|
||||
? Edition Director's cut
|
||||
: edition: Director's Cut
|
||||
|
||||
? Collector
|
||||
? Collector Edition
|
||||
? Edition Collector
|
||||
: edition: Collector
|
||||
|
||||
? Special Edition
|
||||
? Edition Special
|
||||
? -Special
|
||||
: edition: Special
|
||||
|
||||
? Criterion Edition
|
||||
? Edition Criterion
|
||||
? CC
|
||||
? -Criterion
|
||||
: edition: Criterion
|
||||
|
||||
? Deluxe
|
||||
? Deluxe Edition
|
||||
? Edition Deluxe
|
||||
: edition: Deluxe
|
||||
|
||||
? Super Movie Alternate XViD
|
||||
? Super Movie Alternative XViD
|
||||
? Super Movie Alternate Cut XViD
|
||||
? Super Movie Alternative Cut XViD
|
||||
: edition: Alternative Cut
|
||||
|
||||
? ddc
|
||||
: edition: Director's Definitive Cut
|
||||
|
||||
? IMAX
|
||||
? IMAX Edition
|
||||
: edition: IMAX
|
||||
|
||||
? ultimate edition
|
||||
? -ultimate
|
||||
: edition: Ultimate
|
||||
|
||||
? ultimate collector edition
|
||||
? ultimate collector's edition
|
||||
? ultimate collectors edition
|
||||
? -collectors edition
|
||||
? -ultimate edition
|
||||
: edition: [Ultimate, Collector]
|
||||
|
||||
? ultimate collectors edition dc
|
||||
: edition: [Ultimate, Collector, Director's Cut]
|
||||
|
||||
? fan edit
|
||||
? fan edition
|
||||
? fan collection
|
||||
: edition: Fan
|
||||
|
||||
? ultimate fan edit
|
||||
? ultimate fan edition
|
||||
? ultimate fan collection
|
||||
: edition: [Ultimate, Fan]
|
||||
331
lib/guessit/test/rules/episodes.yml
Normal file
331
lib/guessit/test/rules/episodes.yml
Normal file
@@ -0,0 +1,331 @@
|
||||
# Multiple input strings having same expected results can be chained.
|
||||
# Use $ marker to check inputs that should not match results.
|
||||
? +2x5
|
||||
? +2X5
|
||||
? +02x05
|
||||
? +2X05
|
||||
? +02x5
|
||||
? S02E05
|
||||
? s02e05
|
||||
? s02e5
|
||||
? s2e05
|
||||
? s02ep05
|
||||
? s2EP5
|
||||
? -s03e05
|
||||
? -s02e06
|
||||
? -3x05
|
||||
? -2x06
|
||||
: season: 2
|
||||
episode: 5
|
||||
|
||||
? "+0102"
|
||||
? "+102"
|
||||
: season: 1
|
||||
episode: 2
|
||||
|
||||
? "0102 S03E04"
|
||||
? "S03E04 102"
|
||||
: season: 3
|
||||
episode: 4
|
||||
|
||||
? +serie Saison 2 other
|
||||
? +serie Season 2 other
|
||||
? +serie Saisons 2 other
|
||||
? +serie Seasons 2 other
|
||||
? +serie Season Two other
|
||||
? +serie Season II other
|
||||
: season: 2
|
||||
|
||||
? Some Series.S02E01.Episode.title.mkv
|
||||
? Some Series/Season 02/E01-Episode title.mkv
|
||||
? Some Series/Season 02/Some Series-E01-Episode title.mkv
|
||||
? Some Dummy Directory/Season 02/Some Series-E01-Episode title.mkv
|
||||
? -Some Dummy Directory/Season 02/E01-Episode title.mkv
|
||||
? Some Series/Unsafe Season 02/Some Series-E01-Episode title.mkv
|
||||
? -Some Series/Unsafe Season 02/E01-Episode title.mkv
|
||||
? Some Series/Season 02/E01-Episode title.mkv
|
||||
? Some Series/ Season 02/E01-Episode title.mkv
|
||||
? Some Dummy Directory/Some Series S02/E01-Episode title.mkv
|
||||
? Some Dummy Directory/S02 Some Series/E01-Episode title.mkv
|
||||
: title: Some Series
|
||||
episode_title: Episode title
|
||||
season: 2
|
||||
episode: 1
|
||||
|
||||
? Some Series.S02E01.mkv
|
||||
? Some Series/Season 02/E01.mkv
|
||||
? Some Series/Season 02/Some Series-E01.mkv
|
||||
? Some Dummy Directory/Season 02/Some Series-E01.mkv
|
||||
? -Some Dummy Directory/Season 02/E01.mkv
|
||||
? Some Series/Unsafe Season 02/Some Series-E01.mkv
|
||||
? -Some Series/Unsafe Season 02/E01.mkv
|
||||
? Some Series/Season 02/E01.mkv
|
||||
? Some Series/ Season 02/E01.mkv
|
||||
? Some Dummy Directory/Some Series S02/E01-AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA.mkv
|
||||
: title: Some Series
|
||||
season: 2
|
||||
episode: 1
|
||||
|
||||
? Some Series S03E01E02
|
||||
: title: Some Series
|
||||
season: 3
|
||||
episode: [1, 2]
|
||||
|
||||
? Some Series S01S02S03
|
||||
? Some Series S01-02-03
|
||||
? Some Series S01 S02 S03
|
||||
? Some Series S01 02 03
|
||||
: title: Some Series
|
||||
season: [1, 2, 3]
|
||||
|
||||
? Some Series E01E02E03
|
||||
? Some Series E01-02-03
|
||||
? Some Series E01-03
|
||||
? Some Series E01 E02 E03
|
||||
? Some Series E01 02 03
|
||||
: title: Some Series
|
||||
episode: [1, 2, 3]
|
||||
|
||||
? Some Series E01E02E04
|
||||
? Some Series E01 E02 E04
|
||||
? Some Series E01 02 04
|
||||
: title: Some Series
|
||||
episode: [1, 2, 4]
|
||||
|
||||
? Some Series E01-02-04
|
||||
? Some Series E01-04
|
||||
? Some Series E01-04
|
||||
: title: Some Series
|
||||
episode: [1, 2, 3, 4]
|
||||
|
||||
? Some Series E01-02-E04
|
||||
: title: Some Series
|
||||
episode: [1, 2, 3, 4]
|
||||
|
||||
? Episode 3
|
||||
? -Episode III
|
||||
: episode: 3
|
||||
|
||||
? Episode 3
|
||||
? Episode III
|
||||
: options: -t episode
|
||||
episode: 3
|
||||
|
||||
? -A very special movie
|
||||
: episode_details: Special
|
||||
|
||||
? -A very special episode
|
||||
: options: -t episode
|
||||
episode_details: Special
|
||||
|
||||
? A very special episode s06 special
|
||||
: options: -t episode
|
||||
title: A very special episode
|
||||
episode_details: Special
|
||||
|
||||
? 12 Monkeys\Season 01\Episode 05\12 Monkeys - S01E05 - The Night Room.mkv
|
||||
: container: mkv
|
||||
title: 12 Monkeys
|
||||
episode: 5
|
||||
season: 1
|
||||
|
||||
? S03E02.X.1080p
|
||||
: episode: 2
|
||||
screen_size: 1080p
|
||||
season: 3
|
||||
|
||||
? Something 1 x 2-FlexGet
|
||||
: options: -t episode
|
||||
title: Something
|
||||
season: 1
|
||||
episode: 2
|
||||
episode_title: FlexGet
|
||||
|
||||
? Show.Name.-.Season.1.to.3.-.Mp4.1080p
|
||||
? Show.Name.-.Season.1~3.-.Mp4.1080p
|
||||
? Show.Name.-.Saison.1.a.3.-.Mp4.1080p
|
||||
: container: mp4
|
||||
screen_size: 1080p
|
||||
season:
|
||||
- 1
|
||||
- 2
|
||||
- 3
|
||||
title: Show Name
|
||||
|
||||
? Show.Name.Season.1.3&5.HDTV.XviD-GoodGroup[SomeTrash]
|
||||
? Show.Name.Season.1.3 and 5.HDTV.XviD-GoodGroup[SomeTrash]
|
||||
: source: HDTV
|
||||
release_group: GoodGroup[SomeTrash]
|
||||
season:
|
||||
- 1
|
||||
- 3
|
||||
- 5
|
||||
title: Show Name
|
||||
type: episode
|
||||
video_codec: Xvid
|
||||
|
||||
? Show.Name.Season.1.2.3-5.HDTV.XviD-GoodGroup[SomeTrash]
|
||||
? Show.Name.Season.1.2.3~5.HDTV.XviD-GoodGroup[SomeTrash]
|
||||
? Show.Name.Season.1.2.3 to 5.HDTV.XviD-GoodGroup[SomeTrash]
|
||||
: source: HDTV
|
||||
release_group: GoodGroup[SomeTrash]
|
||||
season:
|
||||
- 1
|
||||
- 2
|
||||
- 3
|
||||
- 4
|
||||
- 5
|
||||
title: Show Name
|
||||
type: episode
|
||||
video_codec: Xvid
|
||||
|
||||
? The.Get.Down.S01EP01.FRENCH.720p.WEBRIP.XVID-STR
|
||||
: episode: 1
|
||||
source: Web
|
||||
other: Rip
|
||||
language: fr
|
||||
release_group: STR
|
||||
screen_size: 720p
|
||||
season: 1
|
||||
title: The Get Down
|
||||
type: episode
|
||||
video_codec: Xvid
|
||||
|
||||
? My.Name.Is.Earl.S01E01-S01E21.SWE-SUB
|
||||
: episode:
|
||||
- 1
|
||||
- 2
|
||||
- 3
|
||||
- 4
|
||||
- 5
|
||||
- 6
|
||||
- 7
|
||||
- 8
|
||||
- 9
|
||||
- 10
|
||||
- 11
|
||||
- 12
|
||||
- 13
|
||||
- 14
|
||||
- 15
|
||||
- 16
|
||||
- 17
|
||||
- 18
|
||||
- 19
|
||||
- 20
|
||||
- 21
|
||||
season: 1
|
||||
subtitle_language: sv
|
||||
title: My Name Is Earl
|
||||
type: episode
|
||||
|
||||
? Show.Name.Season.4.Episodes.1-12
|
||||
: episode:
|
||||
- 1
|
||||
- 2
|
||||
- 3
|
||||
- 4
|
||||
- 5
|
||||
- 6
|
||||
- 7
|
||||
- 8
|
||||
- 9
|
||||
- 10
|
||||
- 11
|
||||
- 12
|
||||
season: 4
|
||||
title: Show Name
|
||||
type: episode
|
||||
|
||||
? show name s01.to.s04
|
||||
: season:
|
||||
- 1
|
||||
- 2
|
||||
- 3
|
||||
- 4
|
||||
title: show name
|
||||
type: episode
|
||||
|
||||
? epi
|
||||
: options: -t episode
|
||||
title: epi
|
||||
|
||||
? Episode20
|
||||
? Episode 20
|
||||
: episode: 20
|
||||
|
||||
? Episode50
|
||||
? Episode 50
|
||||
: episode: 50
|
||||
|
||||
? Episode51
|
||||
? Episode 51
|
||||
: episode: 51
|
||||
|
||||
? Episode70
|
||||
? Episode 70
|
||||
: episode: 70
|
||||
|
||||
? Episode71
|
||||
? Episode 71
|
||||
: episode: 71
|
||||
|
||||
? S01D02.3-5-GROUP
|
||||
: disc: [2, 3, 4, 5]
|
||||
|
||||
? S01D02&4-6&8
|
||||
: disc: [2, 4, 5, 6, 8]
|
||||
|
||||
? Something.4x05-06
|
||||
? Something - 4x05-06
|
||||
? Something:4x05-06
|
||||
? Something 4x05-06
|
||||
? Something-4x05-06
|
||||
: title: Something
|
||||
season: 4
|
||||
episode:
|
||||
- 5
|
||||
- 6
|
||||
|
||||
? Something.4x05-06
|
||||
? Something - 4x05-06
|
||||
? Something:4x05-06
|
||||
? Something 4x05-06
|
||||
? Something-4x05-06
|
||||
: options: -T something
|
||||
title: something
|
||||
season: 4
|
||||
episode:
|
||||
- 5
|
||||
- 6
|
||||
|
||||
? Colony 23/S01E01.Some.title.mkv
|
||||
: title: Colony 23
|
||||
season: 1
|
||||
episode: 1
|
||||
episode_title: Some title
|
||||
|
||||
? Show.Name.E02.2010.mkv
|
||||
: options: -t episode
|
||||
title: Show Name
|
||||
year: 2010
|
||||
episode: 2
|
||||
|
||||
? Show.Name.E02.S2010.mkv
|
||||
: options: -t episode
|
||||
title: Show Name
|
||||
year: 2010
|
||||
season: 2010
|
||||
episode: 2
|
||||
|
||||
|
||||
? Show.Name.E02.2010.mkv
|
||||
: title: Show Name
|
||||
year: 2010
|
||||
episode: 2
|
||||
|
||||
? Show.Name.E02.S2010.mkv
|
||||
: title: Show Name
|
||||
year: 2010
|
||||
season: 2010
|
||||
episode: 2
|
||||
9
lib/guessit/test/rules/film.yml
Normal file
9
lib/guessit/test/rules/film.yml
Normal file
@@ -0,0 +1,9 @@
|
||||
# Multiple input strings having same expected results can be chained.
|
||||
# Use - marker to check inputs that should not match results.
|
||||
? Film Title-f01-Series Title.mkv
|
||||
? Film Title-f01-Series Title
|
||||
? directory/Film Title-f01-Series Title/file.mkv
|
||||
: title: Series Title
|
||||
film_title: Film Title
|
||||
film: 1
|
||||
|
||||
47
lib/guessit/test/rules/language.yml
Normal file
47
lib/guessit/test/rules/language.yml
Normal file
@@ -0,0 +1,47 @@
|
||||
# Multiple input strings having same expected results can be chained.
|
||||
# Use - marker to check inputs that should not match results.
|
||||
? +English
|
||||
? .ENG.
|
||||
: language: English
|
||||
|
||||
? +French
|
||||
: language: French
|
||||
|
||||
? +SubFrench
|
||||
? +SubFr
|
||||
? +STFr
|
||||
? ST.FR
|
||||
: subtitle_language: French
|
||||
|
||||
? +ENG.-.sub.FR
|
||||
? ENG.-.FR Sub
|
||||
? +ENG.-.SubFR
|
||||
? +ENG.-.FRSUB
|
||||
? +ENG.-.FRSUBS
|
||||
? +ENG.-.FR-SUBS
|
||||
: language: English
|
||||
subtitle_language: French
|
||||
|
||||
? "{Fr-Eng}.St{Fr-Eng}"
|
||||
? "Le.Prestige[x264.{Fr-Eng}.St{Fr-Eng}.Chaps].mkv"
|
||||
: language: [French, English]
|
||||
subtitle_language: [French, English]
|
||||
|
||||
? +ENG.-.sub.SWE
|
||||
? ENG.-.SWE Sub
|
||||
? +ENG.-.SubSWE
|
||||
? +ENG.-.SWESUB
|
||||
? +ENG.-.sub.SV
|
||||
? ENG.-.SV Sub
|
||||
? +ENG.-.SubSV
|
||||
? +ENG.-.SVSUB
|
||||
: language: English
|
||||
subtitle_language: Swedish
|
||||
|
||||
? The English Patient (1996)
|
||||
: title: The English Patient
|
||||
-language: english
|
||||
|
||||
? French.Kiss.1995.1080p
|
||||
: title: French Kiss
|
||||
-language: french
|
||||
169
lib/guessit/test/rules/other.yml
Normal file
169
lib/guessit/test/rules/other.yml
Normal file
@@ -0,0 +1,169 @@
|
||||
# Multiple input strings having same expected results can be chained.
|
||||
# Use - marker to check inputs that should not match results.
|
||||
? +DVDSCR
|
||||
? +DVDScreener
|
||||
? +DVD-SCR
|
||||
? +DVD Screener
|
||||
? +DVD AnythingElse Screener
|
||||
? -DVD AnythingElse SCR
|
||||
: other: Screener
|
||||
|
||||
? +AudioFix
|
||||
? +AudioFixed
|
||||
? +Audio Fix
|
||||
? +Audio Fixed
|
||||
: other: Audio Fixed
|
||||
|
||||
? +SyncFix
|
||||
? +SyncFixed
|
||||
? +Sync Fix
|
||||
? +Sync Fixed
|
||||
: other: Sync Fixed
|
||||
|
||||
? +DualAudio
|
||||
? +Dual Audio
|
||||
: other: Dual Audio
|
||||
|
||||
? +ws
|
||||
? +WideScreen
|
||||
? +Wide Screen
|
||||
: other: Widescreen
|
||||
|
||||
# Fix must be surround by others properties to be matched.
|
||||
? DVD.fix.XViD
|
||||
? -DVD.Fix
|
||||
? -Fix.XViD
|
||||
: other: Fix
|
||||
-proper_count: 1
|
||||
|
||||
? -DVD.BlablaBla.Fix.Blablabla.XVID
|
||||
? -DVD.BlablaBla.Fix.XVID
|
||||
? -DVD.Fix.Blablabla.XVID
|
||||
: other: Fix
|
||||
-proper_count: 1
|
||||
|
||||
|
||||
? DVD.Real.PROPER.REPACK
|
||||
: other: Proper
|
||||
proper_count: 3
|
||||
|
||||
|
||||
? Proper.720p
|
||||
? +Repack
|
||||
? +Rerip
|
||||
: other: Proper
|
||||
proper_count: 1
|
||||
|
||||
? XViD.Fansub
|
||||
: other: Fan Subtitled
|
||||
|
||||
? XViD.Fastsub
|
||||
: other: Fast Subtitled
|
||||
|
||||
? +Season Complete
|
||||
? -Complete
|
||||
: other: Complete
|
||||
|
||||
? R5
|
||||
: other: Region 5
|
||||
|
||||
? RC
|
||||
: other: Region C
|
||||
|
||||
? PreAir
|
||||
? Pre Air
|
||||
: other: Preair
|
||||
|
||||
? Screener
|
||||
: other: Screener
|
||||
|
||||
? Remux
|
||||
: other: Remux
|
||||
|
||||
? 3D.2019
|
||||
: other: 3D
|
||||
|
||||
? HD
|
||||
: other: HD
|
||||
|
||||
? FHD
|
||||
? FullHD
|
||||
? Full HD
|
||||
: other: Full HD
|
||||
|
||||
? UHD
|
||||
? Ultra
|
||||
? UltraHD
|
||||
? Ultra HD
|
||||
: other: Ultra HD
|
||||
|
||||
? mHD # ??
|
||||
? HDLight
|
||||
: other: Micro HD
|
||||
|
||||
? HQ
|
||||
: other: High Quality
|
||||
|
||||
? hr
|
||||
: other: High Resolution
|
||||
|
||||
? PAL
|
||||
: other: PAL
|
||||
|
||||
? SECAM
|
||||
: other: SECAM
|
||||
|
||||
? NTSC
|
||||
: other: NTSC
|
||||
|
||||
? LDTV
|
||||
: other: Low Definition
|
||||
|
||||
? LD
|
||||
: other: Line Dubbed
|
||||
|
||||
? MD
|
||||
: other: Mic Dubbed
|
||||
|
||||
? -The complete movie
|
||||
: other: Complete
|
||||
|
||||
? +The complete movie
|
||||
: title: The complete movie
|
||||
|
||||
? +AC3-HQ
|
||||
: audio_profile: High Quality
|
||||
|
||||
? Other-HQ
|
||||
: other: High Quality
|
||||
|
||||
? reenc
|
||||
? re-enc
|
||||
? re-encoded
|
||||
? reencoded
|
||||
: other: Reencoded
|
||||
|
||||
? CONVERT XViD
|
||||
: other: Converted
|
||||
|
||||
? +HDRIP # it's a Rip from non specified HD source
|
||||
: other: [HD, Rip]
|
||||
|
||||
? SDR
|
||||
: other: Standard Dynamic Range
|
||||
|
||||
? HDR
|
||||
? HDR10
|
||||
? -HDR100
|
||||
: other: HDR10
|
||||
|
||||
? BT2020
|
||||
? BT.2020
|
||||
? -BT.20200
|
||||
? -BT.2021
|
||||
: other: BT.2020
|
||||
|
||||
? Upscaled
|
||||
? Upscale
|
||||
: other: Upscaled
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user