PTN 1.3
This commit is contained in:
+8
-10
@@ -7,9 +7,6 @@ import os
|
|||||||
import re
|
import re
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
import lib.babelfish
|
|
||||||
from lib.guessit import guessit
|
|
||||||
|
|
||||||
PY3 = False
|
PY3 = False
|
||||||
if sys.version_info[0] >= 3: PY3 = True; unicode = str; unichr = chr; long = int
|
if sys.version_info[0] >= 3: PY3 = True; unicode = str; unichr = chr; long = int
|
||||||
if PY3:
|
if PY3:
|
||||||
@@ -274,13 +271,14 @@ def scrapeBlock(item, args, block, patron, headers, action, pagination, debug, t
|
|||||||
longtitle = title + (s if title and title2 else '') + title2 + '\n'
|
longtitle = title + (s if title and title2 else '') + title2 + '\n'
|
||||||
|
|
||||||
if sceneTitle:
|
if sceneTitle:
|
||||||
parsedTitle = guessit(title)
|
import lib.PTN.parse as parse
|
||||||
|
parsedTitle = parse(title)
|
||||||
title = longtitle = parsedTitle.get('title', '')
|
title = longtitle = parsedTitle.get('title', '')
|
||||||
log('TITOLO',title)
|
log('TITOLO',title)
|
||||||
if parsedTitle.get('source'):
|
if parsedTitle.get('quality'):
|
||||||
quality = str(parsedTitle.get('source'))
|
quality = str(parsedTitle.get('quality'))
|
||||||
if parsedTitle.get('screen_size'):
|
if parsedTitle.get('resolution'):
|
||||||
quality += ' ' + str(parsedTitle.get('screen_size', ''))
|
quality += ' ' + str(parsedTitle.get('resolution', ''))
|
||||||
if not scraped['year']:
|
if not scraped['year']:
|
||||||
infolabels['year'] = parsedTitle.get('year', '')
|
infolabels['year'] = parsedTitle.get('year', '')
|
||||||
if parsedTitle.get('episode') and parsedTitle.get('season'):
|
if parsedTitle.get('episode') and parsedTitle.get('season'):
|
||||||
@@ -299,8 +297,8 @@ def scrapeBlock(item, args, block, patron, headers, action, pagination, debug, t
|
|||||||
longtitle += s + config.get_localized_string(30140) + " " +str(parsedTitle.get('season')[0]) + '-' + str(parsedTitle.get('season')[-1])
|
longtitle += s + config.get_localized_string(30140) + " " +str(parsedTitle.get('season')[0]) + '-' + str(parsedTitle.get('season')[-1])
|
||||||
elif parsedTitle.get('season'):
|
elif parsedTitle.get('season'):
|
||||||
longtitle += s + config.get_localized_string(60027) % str(parsedTitle.get('season'))
|
longtitle += s + config.get_localized_string(60027) % str(parsedTitle.get('season'))
|
||||||
if parsedTitle.get('episode_title'):
|
if parsedTitle.get('episodeName'):
|
||||||
longtitle += s + parsedTitle.get('episode_title')
|
longtitle += s + parsedTitle.get('episodeName')
|
||||||
|
|
||||||
longtitle = typo(longtitle, 'bold')
|
longtitle = typo(longtitle, 'bold')
|
||||||
lang1, longtitle = scrapeLang(scraped, lang, longtitle)
|
lang1, longtitle = scrapeLang(scraped, lang, longtitle)
|
||||||
|
|||||||
Executable
+15
@@ -0,0 +1,15 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
# https://github.com/platelminto/parse-torrent-name
|
||||||
|
from .parse import PTN
|
||||||
|
|
||||||
|
__author__ = 'Giorgio Momigliano'
|
||||||
|
__email__ = 'gmomigliano@protonmail.com'
|
||||||
|
__version__ = '1.3'
|
||||||
|
__license__ = 'MIT'
|
||||||
|
|
||||||
|
ptn = PTN()
|
||||||
|
|
||||||
|
|
||||||
|
def parse(name):
|
||||||
|
return ptn.parse(name)
|
||||||
Executable
+197
@@ -0,0 +1,197 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
import re
|
||||||
|
from .patterns import patterns, types, exceptions, delimiters, episode_pattern
|
||||||
|
|
||||||
|
|
||||||
|
class PTN(object):
|
||||||
|
def _escape_regex(self, string):
|
||||||
|
return re.sub('[\-\[\]{}()*+?.,\\\^$|#\s]', '\\$&', string)
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.torrent = None
|
||||||
|
self.excess_raw = None
|
||||||
|
self.group_raw = None
|
||||||
|
self.start = None
|
||||||
|
self.end = None
|
||||||
|
self.title_raw = None
|
||||||
|
self.parts = None
|
||||||
|
|
||||||
|
def _part(self, name, match, raw, clean):
|
||||||
|
# The main core instructuions
|
||||||
|
self.parts[name] = clean
|
||||||
|
|
||||||
|
if len(match) != 0:
|
||||||
|
# The instructions for extracting title
|
||||||
|
index = self.torrent['name'].find(match[0])
|
||||||
|
if index == 0:
|
||||||
|
self.start = len(match[0])
|
||||||
|
elif self.end is None or index < self.end:
|
||||||
|
self.end = index
|
||||||
|
|
||||||
|
if name != 'excess':
|
||||||
|
# The instructions for adding excess
|
||||||
|
if name == 'group':
|
||||||
|
self.group_raw = raw
|
||||||
|
if raw is not None:
|
||||||
|
self.excess_raw = self.excess_raw.replace(raw, '')
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _get_pattern(pattern):
|
||||||
|
return [p[1] for p in patterns if p[0] == pattern][0]
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _clean_string(string):
|
||||||
|
clean = re.sub('^ -', '', string)
|
||||||
|
if clean.find(' ') == -1 and clean.find('.') != -1:
|
||||||
|
clean = re.sub('\.', ' ', clean)
|
||||||
|
clean = re.sub('_', ' ', clean)
|
||||||
|
clean = re.sub('([\[\(_]|- )$', '', clean).strip()
|
||||||
|
clean = clean.strip(' _-')
|
||||||
|
|
||||||
|
return clean
|
||||||
|
|
||||||
|
def parse(self, name):
|
||||||
|
name = name.strip()
|
||||||
|
self.parts = {}
|
||||||
|
self.torrent = {'name': name}
|
||||||
|
self.excess_raw = name
|
||||||
|
self.group_raw = ''
|
||||||
|
self.start = 0
|
||||||
|
self.end = None
|
||||||
|
self.title_raw = None
|
||||||
|
|
||||||
|
for key, pattern in patterns:
|
||||||
|
if key not in ('season', 'episode', 'episodeName', 'website'):
|
||||||
|
pattern = r'\b%s\b' % pattern
|
||||||
|
|
||||||
|
clean_name = re.sub('_', ' ', self.torrent['name'])
|
||||||
|
match = re.findall(pattern, clean_name, re.IGNORECASE)
|
||||||
|
if len(match) == 0:
|
||||||
|
continue
|
||||||
|
|
||||||
|
index = {}
|
||||||
|
|
||||||
|
# With multiple matches, we will usually want to use the first match.
|
||||||
|
# For 'year', we instead use the last instance of a year match since,
|
||||||
|
# if a title includes a year, we don't want to use this for the year field.
|
||||||
|
match_index = 0
|
||||||
|
if key == 'year':
|
||||||
|
match_index = -1
|
||||||
|
|
||||||
|
if isinstance(match[match_index], tuple):
|
||||||
|
match = list(match[match_index])
|
||||||
|
if len(match) > 1:
|
||||||
|
index['raw'] = 0
|
||||||
|
index['clean'] = 0
|
||||||
|
# for season we might have it in index 1 or index 2
|
||||||
|
# i.e. "5x09"
|
||||||
|
for i in range(1, len(match)):
|
||||||
|
if match[i]:
|
||||||
|
index['clean'] = i
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
index['raw'] = 0
|
||||||
|
index['clean'] = 0
|
||||||
|
|
||||||
|
# patterns for multiseason/episode make the range, and only the range, appear in match[0]
|
||||||
|
if (key == 'season' or key == 'episode') and index['clean'] == 0:
|
||||||
|
# handle multi season/episode
|
||||||
|
# i.e. S01-S09
|
||||||
|
m = re.findall('[0-9]+', match[0])
|
||||||
|
if m:
|
||||||
|
clean = list(range(int(m[0]), int(m[1])+1))
|
||||||
|
elif key == 'language':
|
||||||
|
# handle multi language
|
||||||
|
m = re.split('{}+'.format(delimiters), match[0])
|
||||||
|
clean = list(filter(None, m))
|
||||||
|
if len(clean) == 1:
|
||||||
|
clean = clean[0]
|
||||||
|
elif key in types.keys() and types[key] == 'boolean':
|
||||||
|
clean = True
|
||||||
|
else:
|
||||||
|
clean = match[index['clean']]
|
||||||
|
if key in types.keys() and types[key] == 'integer':
|
||||||
|
clean = int(clean)
|
||||||
|
|
||||||
|
# Codec, quality and subtitles matches can interfere with group matching,
|
||||||
|
# so we do this later as a special case.
|
||||||
|
if key == 'group':
|
||||||
|
if (re.search(self._get_pattern('codec'), clean, re.IGNORECASE) or
|
||||||
|
re.search(self._get_pattern('quality'), clean, re.IGNORECASE) or
|
||||||
|
re.search(self._get_pattern('subtitles'), clean, re.IGNORECASE)):
|
||||||
|
continue
|
||||||
|
|
||||||
|
self._part(key, match, match[index['raw']], clean)
|
||||||
|
|
||||||
|
# Start process for title
|
||||||
|
raw = self.torrent['name']
|
||||||
|
if self.end is not None:
|
||||||
|
raw = raw[self.start:self.end].split('(')[0]
|
||||||
|
clean = self._clean_string(raw)
|
||||||
|
|
||||||
|
self._part('title', [], raw, clean)
|
||||||
|
|
||||||
|
# Considerations for results that are known to cause issues, such
|
||||||
|
# as media with years in them but without a release year.
|
||||||
|
for exception in exceptions:
|
||||||
|
incorrect_key, incorrect_value = exception['incorrect_parse']
|
||||||
|
if self.parts['title'] == exception['parsed_title'] \
|
||||||
|
and self.parts[incorrect_key] == incorrect_value:
|
||||||
|
self.parts.pop(incorrect_key)
|
||||||
|
self.parts['title'] = exception['actual_title']
|
||||||
|
|
||||||
|
# Start process for end
|
||||||
|
clean = re.sub('(^[-\. ()]+)|([-\. ]+$)', '', self.excess_raw)
|
||||||
|
clean = re.sub('[\(\)\/]', ' ', clean)
|
||||||
|
|
||||||
|
match = re.findall('((?:(?:[A-Za-z][a-z]+|[A-Za-z])(?:[\.\ \-\+\_]|$))+)', clean)
|
||||||
|
if match:
|
||||||
|
match = re.findall(episode_pattern + '[\.\_\-\s\+]*(' + re.escape(match[0]) + ')',
|
||||||
|
self.torrent['name'], re.IGNORECASE)
|
||||||
|
if match:
|
||||||
|
self._part('episodeName', match, match[0], self._clean_string(match[0]))
|
||||||
|
clean = clean.replace(match[0], '')
|
||||||
|
|
||||||
|
clean = re.sub('(^[-_\. ()]+)|([-\. ]+$)', '', clean)
|
||||||
|
clean = re.sub('[\(\)\/]', ' ', clean)
|
||||||
|
match = re.split('\.\.+| +', clean)
|
||||||
|
if len(match) > 0 and isinstance(match[0], tuple):
|
||||||
|
match = list(match[0])
|
||||||
|
|
||||||
|
clean = filter(bool, match)
|
||||||
|
clean = [item for item in filter(lambda a: a != '-', clean)]
|
||||||
|
clean = [item.strip('-') for item in clean]
|
||||||
|
|
||||||
|
if len(clean) != 0:
|
||||||
|
group = clean.pop() + self.group_raw
|
||||||
|
self._part('group', [], group, group)
|
||||||
|
|
||||||
|
# clean group name from having a container name
|
||||||
|
if 'group' in self.parts and 'container' in self.parts:
|
||||||
|
group = self.parts['group']
|
||||||
|
container = self.parts['container']
|
||||||
|
if group.lower().endswith('.'+container.lower()):
|
||||||
|
group = group[:-(len(container)+1)]
|
||||||
|
self.parts['group'] = group
|
||||||
|
|
||||||
|
# split group name and encoder, adding the latter to self.parts
|
||||||
|
if 'group' in self.parts:
|
||||||
|
group = self.parts['group']
|
||||||
|
pat = '(\[(.*)\])'
|
||||||
|
match = re.findall(pat, group, flags=re.IGNORECASE)
|
||||||
|
if match:
|
||||||
|
match = match[0]
|
||||||
|
raw = match[0]
|
||||||
|
if match:
|
||||||
|
self._part('encoder', match, raw, match[1])
|
||||||
|
self.parts['group'] = group.replace(raw, '')
|
||||||
|
if not self.parts['group'].strip():
|
||||||
|
self.parts.pop('group')
|
||||||
|
|
||||||
|
if len(clean) != 0:
|
||||||
|
if len(clean) == 1:
|
||||||
|
clean = clean[0] # Avoids making a list if it only has 1 element
|
||||||
|
self._part('excess', [], self.excess_raw, clean)
|
||||||
|
return self.parts
|
||||||
Executable
+86
@@ -0,0 +1,86 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
delimiters = '[\.\s\-\+_\/]'
|
||||||
|
langs = 'rus|(?:True)?fr(?:ench)?|e?n(?:g(?:lish)?)?|vost(' \
|
||||||
|
'?:fr)?|ita(?:liano)?|castellano|swedish|spanish|dk|german|multi|nordic|exyu|chs|hindi|polish|mandarin'
|
||||||
|
producers = 'ATVP|AMZN|NF|NICK|RED|DSNP'
|
||||||
|
|
||||||
|
season_range_pattern = '(?:Complete' + delimiters + '*)?(?:' + delimiters + '*)?(?:s(?:easons?)?)?' + delimiters + '?(?:s?[0-9]{1,2}[\s]*(' \
|
||||||
|
'?:\-|(?:\s*to\s*))[\s]*s?[0-9]{1,2})(?:' + delimiters + '*Complete)?'
|
||||||
|
|
||||||
|
# Used when matching episodeName in parse.py, when actually matching episodes we use a slightly
|
||||||
|
# modified version that has a capture group on the episode number (as seen below).
|
||||||
|
episode_pattern = '(?:(?:[ex]|ep)(?:[0-9]{1,2}(?:-(?:[ex]|ep)?(?:[0-9]{1,2})))|(?:[ex]|ep)(?:[0-9]{1,2}))'
|
||||||
|
|
||||||
|
year_pattern = '(?:19[0-9]|20[0-2])[0-9]'
|
||||||
|
month_pattern = '0[1-9]|1[0-2]'
|
||||||
|
day_pattern = '[0-2][0-9]|3[01]'
|
||||||
|
|
||||||
|
patterns = [
|
||||||
|
('season', delimiters + '(' # Season description can't be at the beginning, must be after this pattern
|
||||||
|
'' + season_range_pattern + '|' # Describes season ranges
|
||||||
|
'(?:Complete' + delimiters + ')?s([0-9]{1,2})(?:' + episode_pattern + ')?|' # Describes season, optionally with complete or episode
|
||||||
|
'([0-9]{1,2})x[0-9]{2}|' # Describes 5x02, 12x15 type descriptions
|
||||||
|
'(?:Complete' + delimiters + ')?Season[\. -]([0-9]{1,2})' # Describes Season.15 type descriptions
|
||||||
|
')(?:' + delimiters + '|$)'),
|
||||||
|
('episode', '((?:[ex]|ep)(?:[0-9]{1,2}(?:-(?:[ex]|ep)?(?:[0-9]{1,2})))|(?:[ex]|ep)([0-9]{1,2}))(?:[^0-9]|$)'),
|
||||||
|
('year', '([\[\(]?(' + year_pattern + ')[\]\)]?)'),
|
||||||
|
('month', '(?:' + year_pattern + ')' + delimiters + '(' + month_pattern + ')' + delimiters + '(?:' + day_pattern + ')'),
|
||||||
|
('day', '(?:' + year_pattern + ')' + delimiters + '(?:' + month_pattern + ')' + delimiters + '(' + day_pattern + ')'),
|
||||||
|
('resolution', '([0-9]{3,4}p|1280x720)'),
|
||||||
|
('quality', ('((?:PPV\.)?[HP]DTV|(?:HD)?CAM-?(?:Rip)?|B[DR]Rip|(?:HD-?)?TS|'
|
||||||
|
'HDRip|HDTVRip|DVDRip|DVDRIP|'
|
||||||
|
'(?:(?:' + producers + ')' + delimiters + '?)?(?:PPV )?W[EB]B(?:-?DL(?:Mux)?)?(?:Rip| DVDRip)?|BluRay|DvDScr|hdtv|telesync)')),
|
||||||
|
('codec', '(xvid|[hx]\.?26[45])'),
|
||||||
|
('audio', ('(MP3|DD5\.?1|Dual[\- ]Audio|LiNE|DTS|DTS5\.1|'
|
||||||
|
'AAC[ \.-]LC|AAC(?:(?:\.?2(?:\.0)?)?|(?:\.?5(?:\.1)?)?)|'
|
||||||
|
'(?:E-?)?AC-?3(?:' + delimiters + '*?(?:2\.0|5\.1))?)')),
|
||||||
|
('region', 'R[0-9]'),
|
||||||
|
('extended', '(EXTENDED(:?.CUT)?)'),
|
||||||
|
('hardcoded', 'HC'),
|
||||||
|
('proper', 'PROPER'),
|
||||||
|
('repack', 'REPACK'),
|
||||||
|
('container', '(MKV|AVI|MP4)'),
|
||||||
|
('widescreen', 'WS'),
|
||||||
|
('website', '^(\[ ?([^\]]+?) ?\])'),
|
||||||
|
('subtitles', '((?:(?:' + langs + '|e-?)[\-\s.]*)*subs?)'),
|
||||||
|
('language', '((?:(?:' + langs + ')' + delimiters + '*)+)(?!(?:[\-\s.]*(?:' + langs + ')*)+[\-\s.]?subs)'),
|
||||||
|
('sbs', '(?:Half-)?SBS'),
|
||||||
|
('unrated', 'UNRATED'),
|
||||||
|
('size', '(\d+(?:\.\d+)?(?:GB|MB))'),
|
||||||
|
('bitDepth', '(?:8|10)bit'),
|
||||||
|
('3d', '3D'),
|
||||||
|
('internal', 'iNTERNAL'),
|
||||||
|
('readnfo', 'READNFO')
|
||||||
|
]
|
||||||
|
|
||||||
|
types = {
|
||||||
|
'season': 'integer',
|
||||||
|
'episode': 'integer',
|
||||||
|
'year': 'integer',
|
||||||
|
'month': 'integer',
|
||||||
|
'day': 'integer',
|
||||||
|
'extended': 'boolean',
|
||||||
|
'hardcoded': 'boolean',
|
||||||
|
'proper': 'boolean',
|
||||||
|
'repack': 'boolean',
|
||||||
|
'widescreen': 'boolean',
|
||||||
|
'unrated': 'boolean',
|
||||||
|
'3d': 'boolean',
|
||||||
|
'internal': 'boolean',
|
||||||
|
'readnfo': 'boolean'
|
||||||
|
}
|
||||||
|
|
||||||
|
exceptions = [
|
||||||
|
{
|
||||||
|
'parsed_title': '',
|
||||||
|
'incorrect_parse': ('year', 1983),
|
||||||
|
'actual_title': '1983'
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'parsed_title': 'Marvel\'s Agents of S H I E L D',
|
||||||
|
'incorrect_parse': ('title', 'Marvel\'s Agents of S H I E L D'),
|
||||||
|
'actual_title': 'Marvel\'s Agents of S.H.I.E.L.D.'
|
||||||
|
}
|
||||||
|
]
|
||||||
@@ -1,25 +0,0 @@
|
|||||||
# -*- coding: utf-8 -*-
|
|
||||||
#
|
|
||||||
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
|
|
||||||
# Use of this source code is governed by the 3-clause BSD license
|
|
||||||
# that can be found in the LICENSE file.
|
|
||||||
#
|
|
||||||
__title__ = 'babelfish'
|
|
||||||
__version__ = '0.5.5-dev'
|
|
||||||
__author__ = 'Antoine Bertin'
|
|
||||||
__license__ = 'BSD'
|
|
||||||
__copyright__ = 'Copyright 2015 the BabelFish authors'
|
|
||||||
|
|
||||||
import sys
|
|
||||||
|
|
||||||
if sys.version_info[0] >= 3:
|
|
||||||
basestr = str
|
|
||||||
else:
|
|
||||||
basestr = basestring
|
|
||||||
|
|
||||||
from .converters import (LanguageConverter, LanguageReverseConverter, LanguageEquivalenceConverter, CountryConverter,
|
|
||||||
CountryReverseConverter)
|
|
||||||
from .country import country_converters, COUNTRIES, COUNTRY_MATRIX, Country
|
|
||||||
from .exceptions import Error, LanguageConvertError, LanguageReverseError, CountryConvertError, CountryReverseError
|
|
||||||
from .language import language_converters, LANGUAGES, LANGUAGE_MATRIX, Language
|
|
||||||
from .script import SCRIPTS, SCRIPT_MATRIX, Script
|
|
||||||
@@ -1,287 +0,0 @@
|
|||||||
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
|
|
||||||
# Use of this source code is governed by the 3-clause BSD license
|
|
||||||
# that can be found in the LICENSE file.
|
|
||||||
#
|
|
||||||
import collections
|
|
||||||
from pkg_resources import iter_entry_points, EntryPoint
|
|
||||||
from ..exceptions import LanguageConvertError, LanguageReverseError
|
|
||||||
|
|
||||||
|
|
||||||
# from https://github.com/kennethreitz/requests/blob/master/requests/structures.py
|
|
||||||
class CaseInsensitiveDict(collections.MutableMapping):
|
|
||||||
"""A case-insensitive ``dict``-like object.
|
|
||||||
|
|
||||||
Implements all methods and operations of
|
|
||||||
``collections.MutableMapping`` as well as dict's ``copy``. Also
|
|
||||||
provides ``lower_items``.
|
|
||||||
|
|
||||||
All keys are expected to be strings. The structure remembers the
|
|
||||||
case of the last key to be set, and ``iter(instance)``,
|
|
||||||
``keys()``, ``items()``, ``iterkeys()``, and ``iteritems()``
|
|
||||||
will contain case-sensitive keys. However, querying and contains
|
|
||||||
testing is case insensitive:
|
|
||||||
|
|
||||||
cid = CaseInsensitiveDict()
|
|
||||||
cid['English'] = 'eng'
|
|
||||||
cid['ENGLISH'] == 'eng' # True
|
|
||||||
list(cid) == ['English'] # True
|
|
||||||
|
|
||||||
If the constructor, ``.update``, or equality comparison
|
|
||||||
operations are given keys that have equal ``.lower()``s, the
|
|
||||||
behavior is undefined.
|
|
||||||
|
|
||||||
"""
|
|
||||||
def __init__(self, data=None, **kwargs):
|
|
||||||
self._store = dict()
|
|
||||||
if data is None:
|
|
||||||
data = {}
|
|
||||||
self.update(data, **kwargs)
|
|
||||||
|
|
||||||
def __setitem__(self, key, value):
|
|
||||||
# Use the lowercased key for lookups, but store the actual
|
|
||||||
# key alongside the value.
|
|
||||||
self._store[key.lower()] = (key, value)
|
|
||||||
|
|
||||||
def __getitem__(self, key):
|
|
||||||
return self._store[key.lower()][1]
|
|
||||||
|
|
||||||
def __delitem__(self, key):
|
|
||||||
del self._store[key.lower()]
|
|
||||||
|
|
||||||
def __iter__(self):
|
|
||||||
return (casedkey for casedkey, mappedvalue in self._store.values())
|
|
||||||
|
|
||||||
def __len__(self):
|
|
||||||
return len(self._store)
|
|
||||||
|
|
||||||
def lower_items(self):
|
|
||||||
"""Like iteritems(), but with all lowercase keys."""
|
|
||||||
return (
|
|
||||||
(lowerkey, keyval[1])
|
|
||||||
for (lowerkey, keyval)
|
|
||||||
in self._store.items()
|
|
||||||
)
|
|
||||||
|
|
||||||
def __eq__(self, other):
|
|
||||||
if isinstance(other, collections.Mapping):
|
|
||||||
other = CaseInsensitiveDict(other)
|
|
||||||
else:
|
|
||||||
return NotImplemented
|
|
||||||
# Compare insensitively
|
|
||||||
return dict(self.lower_items()) == dict(other.lower_items())
|
|
||||||
|
|
||||||
# Copy is required
|
|
||||||
def copy(self):
|
|
||||||
return CaseInsensitiveDict(self._store.values())
|
|
||||||
|
|
||||||
def __repr__(self):
|
|
||||||
return '%s(%r)' % (self.__class__.__name__, dict(self.items()))
|
|
||||||
|
|
||||||
|
|
||||||
class LanguageConverter(object):
|
|
||||||
"""A :class:`LanguageConverter` supports converting an alpha3 language code with an
|
|
||||||
alpha2 country code and a script code into a custom code
|
|
||||||
|
|
||||||
.. attribute:: codes
|
|
||||||
|
|
||||||
Set of possible custom codes
|
|
||||||
|
|
||||||
"""
|
|
||||||
def convert(self, alpha3, country=None, script=None):
|
|
||||||
"""Convert an alpha3 language code with an alpha2 country code and a script code
|
|
||||||
into a custom code
|
|
||||||
|
|
||||||
:param string alpha3: ISO-639-3 language code
|
|
||||||
:param country: ISO-3166 country code, if any
|
|
||||||
:type country: string or None
|
|
||||||
:param script: ISO-15924 script code, if any
|
|
||||||
:type script: string or None
|
|
||||||
:return: the corresponding custom code
|
|
||||||
:rtype: string
|
|
||||||
:raise: :class:`~babelfish.exceptions.LanguageConvertError`
|
|
||||||
|
|
||||||
"""
|
|
||||||
raise NotImplementedError
|
|
||||||
|
|
||||||
|
|
||||||
class LanguageReverseConverter(LanguageConverter):
|
|
||||||
"""A :class:`LanguageConverter` able to reverse a custom code into a alpha3
|
|
||||||
ISO-639-3 language code, alpha2 ISO-3166-1 country code and ISO-15924 script code
|
|
||||||
|
|
||||||
"""
|
|
||||||
def reverse(self, code):
|
|
||||||
"""Reverse a custom code into alpha3, country and script code
|
|
||||||
|
|
||||||
:param string code: custom code to reverse
|
|
||||||
:return: the corresponding alpha3 ISO-639-3 language code, alpha2 ISO-3166-1 country code and ISO-15924 script code
|
|
||||||
:rtype: tuple
|
|
||||||
:raise: :class:`~babelfish.exceptions.LanguageReverseError`
|
|
||||||
|
|
||||||
"""
|
|
||||||
raise NotImplementedError
|
|
||||||
|
|
||||||
|
|
||||||
class LanguageEquivalenceConverter(LanguageReverseConverter):
|
|
||||||
"""A :class:`LanguageEquivalenceConverter` is a utility class that allows you to easily define a
|
|
||||||
:class:`LanguageReverseConverter` by only specifying the dict from alpha3 to their corresponding symbols.
|
|
||||||
|
|
||||||
You must specify the dict of equivalence as a class variable named SYMBOLS.
|
|
||||||
|
|
||||||
If you also set the class variable CASE_SENSITIVE to ``True`` then the reverse conversion function will be
|
|
||||||
case-sensitive (it is case-insensitive by default).
|
|
||||||
|
|
||||||
Example::
|
|
||||||
|
|
||||||
class MyCodeConverter(babelfish.LanguageEquivalenceConverter):
|
|
||||||
CASE_SENSITIVE = True
|
|
||||||
SYMBOLS = {'fra': 'mycode1', 'eng': 'mycode2'}
|
|
||||||
|
|
||||||
"""
|
|
||||||
CASE_SENSITIVE = False
|
|
||||||
|
|
||||||
def __init__(self):
|
|
||||||
self.codes = set()
|
|
||||||
self.to_symbol = {}
|
|
||||||
if self.CASE_SENSITIVE:
|
|
||||||
self.from_symbol = {}
|
|
||||||
else:
|
|
||||||
self.from_symbol = CaseInsensitiveDict()
|
|
||||||
|
|
||||||
for alpha3, symbol in self.SYMBOLS.items():
|
|
||||||
self.to_symbol[alpha3] = symbol
|
|
||||||
self.from_symbol[symbol] = (alpha3, None, None)
|
|
||||||
self.codes.add(symbol)
|
|
||||||
|
|
||||||
def convert(self, alpha3, country=None, script=None):
|
|
||||||
try:
|
|
||||||
return self.to_symbol[alpha3]
|
|
||||||
except KeyError:
|
|
||||||
raise LanguageConvertError(alpha3, country, script)
|
|
||||||
|
|
||||||
def reverse(self, code):
|
|
||||||
try:
|
|
||||||
return self.from_symbol[code]
|
|
||||||
except KeyError:
|
|
||||||
raise LanguageReverseError(code)
|
|
||||||
|
|
||||||
|
|
||||||
class CountryConverter(object):
|
|
||||||
"""A :class:`CountryConverter` supports converting an alpha2 country code
|
|
||||||
into a custom code
|
|
||||||
|
|
||||||
.. attribute:: codes
|
|
||||||
|
|
||||||
Set of possible custom codes
|
|
||||||
|
|
||||||
"""
|
|
||||||
def convert(self, alpha2):
|
|
||||||
"""Convert an alpha2 country code into a custom code
|
|
||||||
|
|
||||||
:param string alpha2: ISO-3166-1 language code
|
|
||||||
:return: the corresponding custom code
|
|
||||||
:rtype: string
|
|
||||||
:raise: :class:`~babelfish.exceptions.CountryConvertError`
|
|
||||||
|
|
||||||
"""
|
|
||||||
raise NotImplementedError
|
|
||||||
|
|
||||||
|
|
||||||
class CountryReverseConverter(CountryConverter):
|
|
||||||
"""A :class:`CountryConverter` able to reverse a custom code into a alpha2
|
|
||||||
ISO-3166-1 country code
|
|
||||||
|
|
||||||
"""
|
|
||||||
def reverse(self, code):
|
|
||||||
"""Reverse a custom code into alpha2 code
|
|
||||||
|
|
||||||
:param string code: custom code to reverse
|
|
||||||
:return: the corresponding alpha2 ISO-3166-1 country code
|
|
||||||
:rtype: string
|
|
||||||
:raise: :class:`~babelfish.exceptions.CountryReverseError`
|
|
||||||
|
|
||||||
"""
|
|
||||||
raise NotImplementedError
|
|
||||||
|
|
||||||
|
|
||||||
class ConverterManager(object):
|
|
||||||
"""Manager for babelfish converters behaving like a dict with lazy loading
|
|
||||||
|
|
||||||
Loading is done in this order:
|
|
||||||
|
|
||||||
* Entry point converters
|
|
||||||
* Registered converters
|
|
||||||
* Internal converters
|
|
||||||
|
|
||||||
.. attribute:: entry_point
|
|
||||||
|
|
||||||
The entry point where to look for converters
|
|
||||||
|
|
||||||
.. attribute:: internal_converters
|
|
||||||
|
|
||||||
Internal converters with entry point syntax
|
|
||||||
|
|
||||||
"""
|
|
||||||
entry_point = ''
|
|
||||||
internal_converters = []
|
|
||||||
|
|
||||||
def __init__(self):
|
|
||||||
#: Registered converters with entry point syntax
|
|
||||||
self.registered_converters = []
|
|
||||||
|
|
||||||
#: Loaded converters
|
|
||||||
self.converters = {}
|
|
||||||
|
|
||||||
def __getitem__(self, name):
|
|
||||||
"""Get a converter, lazy loading it if necessary"""
|
|
||||||
if name in self.converters:
|
|
||||||
return self.converters[name]
|
|
||||||
for ep in iter_entry_points(self.entry_point):
|
|
||||||
if ep.name == name:
|
|
||||||
self.converters[ep.name] = ep.load()()
|
|
||||||
return self.converters[ep.name]
|
|
||||||
for ep in (EntryPoint.parse(c) for c in self.registered_converters + self.internal_converters):
|
|
||||||
if ep.name == name:
|
|
||||||
# `require` argument of ep.load() is deprecated in newer versions of setuptools
|
|
||||||
if hasattr(ep, 'resolve'):
|
|
||||||
plugin = ep.resolve()
|
|
||||||
elif hasattr(ep, '_load'):
|
|
||||||
plugin = ep._load()
|
|
||||||
else:
|
|
||||||
plugin = ep.load(require=False)
|
|
||||||
self.converters[ep.name] = plugin()
|
|
||||||
return self.converters[ep.name]
|
|
||||||
raise KeyError(name)
|
|
||||||
|
|
||||||
def __setitem__(self, name, converter):
|
|
||||||
"""Load a converter"""
|
|
||||||
self.converters[name] = converter
|
|
||||||
|
|
||||||
def __delitem__(self, name):
|
|
||||||
"""Unload a converter"""
|
|
||||||
del self.converters[name]
|
|
||||||
|
|
||||||
def __iter__(self):
|
|
||||||
"""Iterator over loaded converters"""
|
|
||||||
return iter(self.converters)
|
|
||||||
|
|
||||||
def register(self, entry_point):
|
|
||||||
"""Register a converter
|
|
||||||
|
|
||||||
:param string entry_point: converter to register (entry point syntax)
|
|
||||||
:raise: ValueError if already registered
|
|
||||||
|
|
||||||
"""
|
|
||||||
if entry_point in self.registered_converters:
|
|
||||||
raise ValueError('Already registered')
|
|
||||||
self.registered_converters.insert(0, entry_point)
|
|
||||||
|
|
||||||
def unregister(self, entry_point):
|
|
||||||
"""Unregister a converter
|
|
||||||
|
|
||||||
:param string entry_point: converter to unregister (entry point syntax)
|
|
||||||
|
|
||||||
"""
|
|
||||||
self.registered_converters.remove(entry_point)
|
|
||||||
|
|
||||||
def __contains__(self, name):
|
|
||||||
return name in self.converters
|
|
||||||
@@ -1,17 +0,0 @@
|
|||||||
# -*- coding: utf-8 -*-
|
|
||||||
#
|
|
||||||
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
|
|
||||||
# Use of this source code is governed by the 3-clause BSD license
|
|
||||||
# that can be found in the LICENSE file.
|
|
||||||
#
|
|
||||||
from __future__ import unicode_literals
|
|
||||||
from . import LanguageEquivalenceConverter
|
|
||||||
from ..language import LANGUAGE_MATRIX
|
|
||||||
|
|
||||||
|
|
||||||
class Alpha2Converter(LanguageEquivalenceConverter):
|
|
||||||
CASE_SENSITIVE = True
|
|
||||||
SYMBOLS = {}
|
|
||||||
for iso_language in LANGUAGE_MATRIX:
|
|
||||||
if iso_language.alpha2:
|
|
||||||
SYMBOLS[iso_language.alpha3] = iso_language.alpha2
|
|
||||||
@@ -1,17 +0,0 @@
|
|||||||
# -*- coding: utf-8 -*-
|
|
||||||
#
|
|
||||||
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
|
|
||||||
# Use of this source code is governed by the 3-clause BSD license
|
|
||||||
# that can be found in the LICENSE file.
|
|
||||||
#
|
|
||||||
from __future__ import unicode_literals
|
|
||||||
from . import LanguageEquivalenceConverter
|
|
||||||
from ..language import LANGUAGE_MATRIX
|
|
||||||
|
|
||||||
|
|
||||||
class Alpha3BConverter(LanguageEquivalenceConverter):
|
|
||||||
CASE_SENSITIVE = True
|
|
||||||
SYMBOLS = {}
|
|
||||||
for iso_language in LANGUAGE_MATRIX:
|
|
||||||
if iso_language.alpha3b:
|
|
||||||
SYMBOLS[iso_language.alpha3] = iso_language.alpha3b
|
|
||||||
@@ -1,17 +0,0 @@
|
|||||||
# -*- coding: utf-8 -*-
|
|
||||||
#
|
|
||||||
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
|
|
||||||
# Use of this source code is governed by the 3-clause BSD license
|
|
||||||
# that can be found in the LICENSE file.
|
|
||||||
#
|
|
||||||
from __future__ import unicode_literals
|
|
||||||
from . import LanguageEquivalenceConverter
|
|
||||||
from ..language import LANGUAGE_MATRIX
|
|
||||||
|
|
||||||
|
|
||||||
class Alpha3TConverter(LanguageEquivalenceConverter):
|
|
||||||
CASE_SENSITIVE = True
|
|
||||||
SYMBOLS = {}
|
|
||||||
for iso_language in LANGUAGE_MATRIX:
|
|
||||||
if iso_language.alpha3t:
|
|
||||||
SYMBOLS[iso_language.alpha3] = iso_language.alpha3t
|
|
||||||
@@ -1,31 +0,0 @@
|
|||||||
# -*- coding: utf-8 -*-
|
|
||||||
#
|
|
||||||
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
|
|
||||||
# Use of this source code is governed by the 3-clause BSD license
|
|
||||||
# that can be found in the LICENSE file.
|
|
||||||
#
|
|
||||||
from __future__ import unicode_literals
|
|
||||||
from . import CountryReverseConverter, CaseInsensitiveDict
|
|
||||||
from ..country import COUNTRY_MATRIX
|
|
||||||
from ..exceptions import CountryConvertError, CountryReverseError
|
|
||||||
|
|
||||||
|
|
||||||
class CountryNameConverter(CountryReverseConverter):
|
|
||||||
def __init__(self):
|
|
||||||
self.codes = set()
|
|
||||||
self.to_name = {}
|
|
||||||
self.from_name = CaseInsensitiveDict()
|
|
||||||
for country in COUNTRY_MATRIX:
|
|
||||||
self.codes.add(country.name)
|
|
||||||
self.to_name[country.alpha2] = country.name
|
|
||||||
self.from_name[country.name] = country.alpha2
|
|
||||||
|
|
||||||
def convert(self, alpha2):
|
|
||||||
if alpha2 not in self.to_name:
|
|
||||||
raise CountryConvertError(alpha2)
|
|
||||||
return self.to_name[alpha2]
|
|
||||||
|
|
||||||
def reverse(self, name):
|
|
||||||
if name not in self.from_name:
|
|
||||||
raise CountryReverseError(name)
|
|
||||||
return self.from_name[name]
|
|
||||||
@@ -1,17 +0,0 @@
|
|||||||
# -*- coding: utf-8 -*-
|
|
||||||
#
|
|
||||||
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
|
|
||||||
# Use of this source code is governed by the 3-clause BSD license
|
|
||||||
# that can be found in the LICENSE file.
|
|
||||||
#
|
|
||||||
from __future__ import unicode_literals
|
|
||||||
from . import LanguageEquivalenceConverter
|
|
||||||
from ..language import LANGUAGE_MATRIX
|
|
||||||
|
|
||||||
|
|
||||||
class NameConverter(LanguageEquivalenceConverter):
|
|
||||||
CASE_SENSITIVE = False
|
|
||||||
SYMBOLS = {}
|
|
||||||
for iso_language in LANGUAGE_MATRIX:
|
|
||||||
if iso_language.name:
|
|
||||||
SYMBOLS[iso_language.alpha3] = iso_language.name
|
|
||||||
@@ -1,36 +0,0 @@
|
|||||||
# -*- coding: utf-8 -*-
|
|
||||||
#
|
|
||||||
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
|
|
||||||
# Use of this source code is governed by the 3-clause BSD license
|
|
||||||
# that can be found in the LICENSE file.
|
|
||||||
#
|
|
||||||
from __future__ import unicode_literals
|
|
||||||
from . import LanguageReverseConverter, CaseInsensitiveDict
|
|
||||||
from ..exceptions import LanguageReverseError
|
|
||||||
from ..language import language_converters
|
|
||||||
|
|
||||||
|
|
||||||
class OpenSubtitlesConverter(LanguageReverseConverter):
|
|
||||||
def __init__(self):
|
|
||||||
self.alpha3b_converter = language_converters['alpha3b']
|
|
||||||
self.alpha2_converter = language_converters['alpha2']
|
|
||||||
self.to_opensubtitles = {('por', 'BR'): 'pob', ('gre', None): 'ell', ('srp', None): 'scc', ('srp', 'ME'): 'mne'}
|
|
||||||
self.from_opensubtitles = CaseInsensitiveDict({'pob': ('por', 'BR'), 'pb': ('por', 'BR'), 'ell': ('ell', None),
|
|
||||||
'scc': ('srp', None), 'mne': ('srp', 'ME')})
|
|
||||||
self.codes = (self.alpha2_converter.codes | self.alpha3b_converter.codes | set(self.from_opensubtitles.keys()))
|
|
||||||
|
|
||||||
def convert(self, alpha3, country=None, script=None):
|
|
||||||
alpha3b = self.alpha3b_converter.convert(alpha3, country, script)
|
|
||||||
if (alpha3b, country) in self.to_opensubtitles:
|
|
||||||
return self.to_opensubtitles[(alpha3b, country)]
|
|
||||||
return alpha3b
|
|
||||||
|
|
||||||
def reverse(self, opensubtitles):
|
|
||||||
if opensubtitles in self.from_opensubtitles:
|
|
||||||
return self.from_opensubtitles[opensubtitles]
|
|
||||||
for conv in [self.alpha3b_converter, self.alpha2_converter]:
|
|
||||||
try:
|
|
||||||
return conv.reverse(opensubtitles)
|
|
||||||
except LanguageReverseError:
|
|
||||||
pass
|
|
||||||
raise LanguageReverseError(opensubtitles)
|
|
||||||
@@ -1,23 +0,0 @@
|
|||||||
# -*- coding: utf-8 -*-
|
|
||||||
#
|
|
||||||
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
|
|
||||||
# Use of this source code is governed by the 3-clause BSD license
|
|
||||||
# that can be found in the LICENSE file.
|
|
||||||
#
|
|
||||||
from __future__ import unicode_literals
|
|
||||||
from . import LanguageConverter
|
|
||||||
from ..exceptions import LanguageConvertError
|
|
||||||
from ..language import LANGUAGE_MATRIX
|
|
||||||
|
|
||||||
|
|
||||||
class ScopeConverter(LanguageConverter):
|
|
||||||
FULLNAME = {'I': 'individual', 'M': 'macrolanguage', 'S': 'special'}
|
|
||||||
SYMBOLS = {}
|
|
||||||
for iso_language in LANGUAGE_MATRIX:
|
|
||||||
SYMBOLS[iso_language.alpha3] = iso_language.scope
|
|
||||||
codes = set(SYMBOLS.values())
|
|
||||||
|
|
||||||
def convert(self, alpha3, country=None, script=None):
|
|
||||||
if self.SYMBOLS[alpha3] in self.FULLNAME:
|
|
||||||
return self.FULLNAME[self.SYMBOLS[alpha3]]
|
|
||||||
raise LanguageConvertError(alpha3, country, script)
|
|
||||||
@@ -1,23 +0,0 @@
|
|||||||
# -*- coding: utf-8 -*-
|
|
||||||
#
|
|
||||||
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
|
|
||||||
# Use of this source code is governed by the 3-clause BSD license
|
|
||||||
# that can be found in the LICENSE file.
|
|
||||||
#
|
|
||||||
from __future__ import unicode_literals
|
|
||||||
from . import LanguageConverter
|
|
||||||
from ..exceptions import LanguageConvertError
|
|
||||||
from ..language import LANGUAGE_MATRIX
|
|
||||||
|
|
||||||
|
|
||||||
class LanguageTypeConverter(LanguageConverter):
|
|
||||||
FULLNAME = {'A': 'ancient', 'C': 'constructed', 'E': 'extinct', 'H': 'historical', 'L': 'living', 'S': 'special'}
|
|
||||||
SYMBOLS = {}
|
|
||||||
for iso_language in LANGUAGE_MATRIX:
|
|
||||||
SYMBOLS[iso_language.alpha3] = iso_language.type
|
|
||||||
codes = set(SYMBOLS.values())
|
|
||||||
|
|
||||||
def convert(self, alpha3, country=None, script=None):
|
|
||||||
if self.SYMBOLS[alpha3] in self.FULLNAME:
|
|
||||||
return self.FULLNAME[self.SYMBOLS[alpha3]]
|
|
||||||
raise LanguageConvertError(alpha3, country, script)
|
|
||||||
@@ -1,107 +0,0 @@
|
|||||||
# -*- coding: utf-8 -*-
|
|
||||||
#
|
|
||||||
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
|
|
||||||
# Use of this source code is governed by the 3-clause BSD license
|
|
||||||
# that can be found in the LICENSE file.
|
|
||||||
#
|
|
||||||
from __future__ import unicode_literals
|
|
||||||
from collections import namedtuple
|
|
||||||
from functools import partial
|
|
||||||
from pkg_resources import resource_stream # @UnresolvedImport
|
|
||||||
from .converters import ConverterManager
|
|
||||||
from . import basestr
|
|
||||||
|
|
||||||
|
|
||||||
COUNTRIES = {}
|
|
||||||
COUNTRY_MATRIX = []
|
|
||||||
|
|
||||||
#: The namedtuple used in the :data:`COUNTRY_MATRIX`
|
|
||||||
IsoCountry = namedtuple('IsoCountry', ['name', 'alpha2'])
|
|
||||||
|
|
||||||
f = resource_stream('babelfish', 'data/iso-3166-1.txt')
|
|
||||||
f.readline()
|
|
||||||
for l in f:
|
|
||||||
iso_country = IsoCountry(*l.decode('utf-8').strip().split(';'))
|
|
||||||
COUNTRIES[iso_country.alpha2] = iso_country.name
|
|
||||||
COUNTRY_MATRIX.append(iso_country)
|
|
||||||
f.close()
|
|
||||||
|
|
||||||
|
|
||||||
class CountryConverterManager(ConverterManager):
|
|
||||||
""":class:`~babelfish.converters.ConverterManager` for country converters"""
|
|
||||||
entry_point = 'babelfish.country_converters'
|
|
||||||
internal_converters = ['name = babelfish.converters.countryname:CountryNameConverter']
|
|
||||||
|
|
||||||
country_converters = CountryConverterManager()
|
|
||||||
|
|
||||||
|
|
||||||
class CountryMeta(type):
|
|
||||||
"""The :class:`Country` metaclass
|
|
||||||
|
|
||||||
Dynamically redirect :meth:`Country.frommycode` to :meth:`Country.fromcode` with the ``mycode`` `converter`
|
|
||||||
|
|
||||||
"""
|
|
||||||
def __getattr__(cls, name):
|
|
||||||
if name.startswith('from'):
|
|
||||||
return partial(cls.fromcode, converter=name[4:])
|
|
||||||
return type.__getattribute__(cls, name)
|
|
||||||
|
|
||||||
|
|
||||||
class Country(CountryMeta(str('CountryBase'), (object,), {})):
|
|
||||||
"""A country on Earth
|
|
||||||
|
|
||||||
A country is represented by a 2-letter code from the ISO-3166 standard
|
|
||||||
|
|
||||||
:param string country: 2-letter ISO-3166 country code
|
|
||||||
|
|
||||||
"""
|
|
||||||
def __init__(self, country):
|
|
||||||
if country not in COUNTRIES:
|
|
||||||
raise ValueError('%r is not a valid country' % country)
|
|
||||||
|
|
||||||
#: ISO-3166 2-letter country code
|
|
||||||
self.alpha2 = country
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def fromcode(cls, code, converter):
|
|
||||||
"""Create a :class:`Country` by its `code` using `converter` to
|
|
||||||
:meth:`~babelfish.converters.CountryReverseConverter.reverse` it
|
|
||||||
|
|
||||||
:param string code: the code to reverse
|
|
||||||
:param string converter: name of the :class:`~babelfish.converters.CountryReverseConverter` to use
|
|
||||||
:return: the corresponding :class:`Country` instance
|
|
||||||
:rtype: :class:`Country`
|
|
||||||
|
|
||||||
"""
|
|
||||||
return cls(country_converters[converter].reverse(code))
|
|
||||||
|
|
||||||
def __getstate__(self):
|
|
||||||
return self.alpha2
|
|
||||||
|
|
||||||
def __setstate__(self, state):
|
|
||||||
self.alpha2 = state
|
|
||||||
|
|
||||||
def __getattr__(self, name):
|
|
||||||
try:
|
|
||||||
return country_converters[name].convert(self.alpha2)
|
|
||||||
except KeyError:
|
|
||||||
raise AttributeError(name)
|
|
||||||
|
|
||||||
def __hash__(self):
|
|
||||||
return hash(self.alpha2)
|
|
||||||
|
|
||||||
def __eq__(self, other):
|
|
||||||
if isinstance(other, basestr):
|
|
||||||
return str(self) == other
|
|
||||||
if not isinstance(other, Country):
|
|
||||||
return False
|
|
||||||
return self.alpha2 == other.alpha2
|
|
||||||
|
|
||||||
def __ne__(self, other):
|
|
||||||
return not self == other
|
|
||||||
|
|
||||||
def __repr__(self):
|
|
||||||
return '<Country [%s]>' % self
|
|
||||||
|
|
||||||
def __str__(self):
|
|
||||||
return self.alpha2
|
|
||||||
@@ -1,45 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
#
|
|
||||||
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
|
|
||||||
# Use of this source code is governed by the 3-clause BSD license
|
|
||||||
# that can be found in the LICENSE file.
|
|
||||||
#
|
|
||||||
from __future__ import unicode_literals
|
|
||||||
import os.path
|
|
||||||
import tempfile
|
|
||||||
import zipfile
|
|
||||||
import requests
|
|
||||||
|
|
||||||
|
|
||||||
DATA_DIR = os.path.dirname(__file__)
|
|
||||||
|
|
||||||
# iso-3166-1.txt
|
|
||||||
print('Downloading ISO-3166-1 standard (ISO country codes)...')
|
|
||||||
with open(os.path.join(DATA_DIR, 'iso-3166-1.txt'), 'w') as f:
|
|
||||||
r = requests.get('http://www.iso.org/iso/home/standards/country_codes/country_names_and_code_elements_txt.htm')
|
|
||||||
f.write(r.content.strip())
|
|
||||||
|
|
||||||
# iso-639-3.tab
|
|
||||||
print('Downloading ISO-639-3 standard (ISO language codes)...')
|
|
||||||
with tempfile.TemporaryFile() as f:
|
|
||||||
r = requests.get('http://www-01.sil.org/iso639-3/iso-639-3_Code_Tables_20130531.zip')
|
|
||||||
f.write(r.content)
|
|
||||||
with zipfile.ZipFile(f) as z:
|
|
||||||
z.extract('iso-639-3.tab', DATA_DIR)
|
|
||||||
|
|
||||||
# iso-15924
|
|
||||||
print('Downloading ISO-15924 standard (ISO script codes)...')
|
|
||||||
with tempfile.TemporaryFile() as f:
|
|
||||||
r = requests.get('http://www.unicode.org/iso15924/iso15924.txt.zip')
|
|
||||||
f.write(r.content)
|
|
||||||
with zipfile.ZipFile(f) as z:
|
|
||||||
z.extract('iso15924-utf8-20131012.txt', DATA_DIR)
|
|
||||||
|
|
||||||
# opensubtitles supported languages
|
|
||||||
print('Downloading OpenSubtitles supported languages...')
|
|
||||||
with open(os.path.join(DATA_DIR, 'opensubtitles_languages.txt'), 'w') as f:
|
|
||||||
r = requests.get('http://www.opensubtitles.org/addons/export_languages.php')
|
|
||||||
f.write(r.content)
|
|
||||||
|
|
||||||
print('Done!')
|
|
||||||
@@ -1,250 +0,0 @@
|
|||||||
Country Name;ISO 3166-1-alpha-2 code
|
|
||||||
AFGHANISTAN;AF
|
|
||||||
ÅLAND ISLANDS;AX
|
|
||||||
ALBANIA;AL
|
|
||||||
ALGERIA;DZ
|
|
||||||
AMERICAN SAMOA;AS
|
|
||||||
ANDORRA;AD
|
|
||||||
ANGOLA;AO
|
|
||||||
ANGUILLA;AI
|
|
||||||
ANTARCTICA;AQ
|
|
||||||
ANTIGUA AND BARBUDA;AG
|
|
||||||
ARGENTINA;AR
|
|
||||||
ARMENIA;AM
|
|
||||||
ARUBA;AW
|
|
||||||
AUSTRALIA;AU
|
|
||||||
AUSTRIA;AT
|
|
||||||
AZERBAIJAN;AZ
|
|
||||||
BAHAMAS;BS
|
|
||||||
BAHRAIN;BH
|
|
||||||
BANGLADESH;BD
|
|
||||||
BARBADOS;BB
|
|
||||||
BELARUS;BY
|
|
||||||
BELGIUM;BE
|
|
||||||
BELIZE;BZ
|
|
||||||
BENIN;BJ
|
|
||||||
BERMUDA;BM
|
|
||||||
BHUTAN;BT
|
|
||||||
BOLIVIA, PLURINATIONAL STATE OF;BO
|
|
||||||
BONAIRE, SINT EUSTATIUS AND SABA;BQ
|
|
||||||
BOSNIA AND HERZEGOVINA;BA
|
|
||||||
BOTSWANA;BW
|
|
||||||
BOUVET ISLAND;BV
|
|
||||||
BRAZIL;BR
|
|
||||||
BRITISH INDIAN OCEAN TERRITORY;IO
|
|
||||||
BRUNEI DARUSSALAM;BN
|
|
||||||
BULGARIA;BG
|
|
||||||
BURKINA FASO;BF
|
|
||||||
BURUNDI;BI
|
|
||||||
CAMBODIA;KH
|
|
||||||
CAMEROON;CM
|
|
||||||
CANADA;CA
|
|
||||||
CAPE VERDE;CV
|
|
||||||
CAYMAN ISLANDS;KY
|
|
||||||
CENTRAL AFRICAN REPUBLIC;CF
|
|
||||||
CHAD;TD
|
|
||||||
CHILE;CL
|
|
||||||
CHINA;CN
|
|
||||||
CHRISTMAS ISLAND;CX
|
|
||||||
COCOS (KEELING) ISLANDS;CC
|
|
||||||
COLOMBIA;CO
|
|
||||||
COMOROS;KM
|
|
||||||
CONGO;CG
|
|
||||||
CONGO, THE DEMOCRATIC REPUBLIC OF THE;CD
|
|
||||||
COOK ISLANDS;CK
|
|
||||||
COSTA RICA;CR
|
|
||||||
CÔTE D'IVOIRE;CI
|
|
||||||
CROATIA;HR
|
|
||||||
CUBA;CU
|
|
||||||
CURAÇAO;CW
|
|
||||||
CYPRUS;CY
|
|
||||||
CZECH REPUBLIC;CZ
|
|
||||||
DENMARK;DK
|
|
||||||
DJIBOUTI;DJ
|
|
||||||
DOMINICA;DM
|
|
||||||
DOMINICAN REPUBLIC;DO
|
|
||||||
ECUADOR;EC
|
|
||||||
EGYPT;EG
|
|
||||||
EL SALVADOR;SV
|
|
||||||
EQUATORIAL GUINEA;GQ
|
|
||||||
ERITREA;ER
|
|
||||||
ESTONIA;EE
|
|
||||||
ETHIOPIA;ET
|
|
||||||
FALKLAND ISLANDS (MALVINAS);FK
|
|
||||||
FAROE ISLANDS;FO
|
|
||||||
FIJI;FJ
|
|
||||||
FINLAND;FI
|
|
||||||
FRANCE;FR
|
|
||||||
FRENCH GUIANA;GF
|
|
||||||
FRENCH POLYNESIA;PF
|
|
||||||
FRENCH SOUTHERN TERRITORIES;TF
|
|
||||||
GABON;GA
|
|
||||||
GAMBIA;GM
|
|
||||||
GEORGIA;GE
|
|
||||||
GERMANY;DE
|
|
||||||
GHANA;GH
|
|
||||||
GIBRALTAR;GI
|
|
||||||
GREECE;GR
|
|
||||||
GREENLAND;GL
|
|
||||||
GRENADA;GD
|
|
||||||
GUADELOUPE;GP
|
|
||||||
GUAM;GU
|
|
||||||
GUATEMALA;GT
|
|
||||||
GUERNSEY;GG
|
|
||||||
GUINEA;GN
|
|
||||||
GUINEA-BISSAU;GW
|
|
||||||
GUYANA;GY
|
|
||||||
HAITI;HT
|
|
||||||
HEARD ISLAND AND MCDONALD ISLANDS;HM
|
|
||||||
HOLY SEE (VATICAN CITY STATE);VA
|
|
||||||
HONDURAS;HN
|
|
||||||
HONG KONG;HK
|
|
||||||
HUNGARY;HU
|
|
||||||
ICELAND;IS
|
|
||||||
INDIA;IN
|
|
||||||
INDONESIA;ID
|
|
||||||
IRAN, ISLAMIC REPUBLIC OF;IR
|
|
||||||
IRAQ;IQ
|
|
||||||
IRELAND;IE
|
|
||||||
ISLE OF MAN;IM
|
|
||||||
ISRAEL;IL
|
|
||||||
ITALY;IT
|
|
||||||
JAMAICA;JM
|
|
||||||
JAPAN;JP
|
|
||||||
JERSEY;JE
|
|
||||||
JORDAN;JO
|
|
||||||
KAZAKHSTAN;KZ
|
|
||||||
KENYA;KE
|
|
||||||
KIRIBATI;KI
|
|
||||||
KOREA, DEMOCRATIC PEOPLE'S REPUBLIC OF;KP
|
|
||||||
KOREA, REPUBLIC OF;KR
|
|
||||||
KUWAIT;KW
|
|
||||||
KYRGYZSTAN;KG
|
|
||||||
LAO PEOPLE'S DEMOCRATIC REPUBLIC;LA
|
|
||||||
LATVIA;LV
|
|
||||||
LEBANON;LB
|
|
||||||
LESOTHO;LS
|
|
||||||
LIBERIA;LR
|
|
||||||
LIBYA;LY
|
|
||||||
LIECHTENSTEIN;LI
|
|
||||||
LITHUANIA;LT
|
|
||||||
LUXEMBOURG;LU
|
|
||||||
MACAO;MO
|
|
||||||
MACEDONIA, THE FORMER YUGOSLAV REPUBLIC OF;MK
|
|
||||||
MADAGASCAR;MG
|
|
||||||
MALAWI;MW
|
|
||||||
MALAYSIA;MY
|
|
||||||
MALDIVES;MV
|
|
||||||
MALI;ML
|
|
||||||
MALTA;MT
|
|
||||||
MARSHALL ISLANDS;MH
|
|
||||||
MARTINIQUE;MQ
|
|
||||||
MAURITANIA;MR
|
|
||||||
MAURITIUS;MU
|
|
||||||
MAYOTTE;YT
|
|
||||||
MEXICO;MX
|
|
||||||
MICRONESIA, FEDERATED STATES OF;FM
|
|
||||||
MOLDOVA, REPUBLIC OF;MD
|
|
||||||
MONACO;MC
|
|
||||||
MONGOLIA;MN
|
|
||||||
MONTENEGRO;ME
|
|
||||||
MONTSERRAT;MS
|
|
||||||
MOROCCO;MA
|
|
||||||
MOZAMBIQUE;MZ
|
|
||||||
MYANMAR;MM
|
|
||||||
NAMIBIA;NA
|
|
||||||
NAURU;NR
|
|
||||||
NEPAL;NP
|
|
||||||
NETHERLANDS;NL
|
|
||||||
NEW CALEDONIA;NC
|
|
||||||
NEW ZEALAND;NZ
|
|
||||||
NICARAGUA;NI
|
|
||||||
NIGER;NE
|
|
||||||
NIGERIA;NG
|
|
||||||
NIUE;NU
|
|
||||||
NORFOLK ISLAND;NF
|
|
||||||
NORTHERN MARIANA ISLANDS;MP
|
|
||||||
NORWAY;NO
|
|
||||||
OMAN;OM
|
|
||||||
PAKISTAN;PK
|
|
||||||
PALAU;PW
|
|
||||||
PALESTINE, STATE OF;PS
|
|
||||||
PANAMA;PA
|
|
||||||
PAPUA NEW GUINEA;PG
|
|
||||||
PARAGUAY;PY
|
|
||||||
PERU;PE
|
|
||||||
PHILIPPINES;PH
|
|
||||||
PITCAIRN;PN
|
|
||||||
POLAND;PL
|
|
||||||
PORTUGAL;PT
|
|
||||||
PUERTO RICO;PR
|
|
||||||
QATAR;QA
|
|
||||||
RÉUNION;RE
|
|
||||||
ROMANIA;RO
|
|
||||||
RUSSIAN FEDERATION;RU
|
|
||||||
RWANDA;RW
|
|
||||||
SAINT BARTHÉLEMY;BL
|
|
||||||
SAINT HELENA, ASCENSION AND TRISTAN DA CUNHA;SH
|
|
||||||
SAINT KITTS AND NEVIS;KN
|
|
||||||
SAINT LUCIA;LC
|
|
||||||
SAINT MARTIN (FRENCH PART);MF
|
|
||||||
SAINT PIERRE AND MIQUELON;PM
|
|
||||||
SAINT VINCENT AND THE GRENADINES;VC
|
|
||||||
SAMOA;WS
|
|
||||||
SAN MARINO;SM
|
|
||||||
SAO TOME AND PRINCIPE;ST
|
|
||||||
SAUDI ARABIA;SA
|
|
||||||
SENEGAL;SN
|
|
||||||
SERBIA;RS
|
|
||||||
SEYCHELLES;SC
|
|
||||||
SIERRA LEONE;SL
|
|
||||||
SINGAPORE;SG
|
|
||||||
SINT MAARTEN (DUTCH PART);SX
|
|
||||||
SLOVAKIA;SK
|
|
||||||
SLOVENIA;SI
|
|
||||||
SOLOMON ISLANDS;SB
|
|
||||||
SOMALIA;SO
|
|
||||||
SOUTH AFRICA;ZA
|
|
||||||
SOUTH GEORGIA AND THE SOUTH SANDWICH ISLANDS;GS
|
|
||||||
SOUTH SUDAN;SS
|
|
||||||
SPAIN;ES
|
|
||||||
SRI LANKA;LK
|
|
||||||
SUDAN;SD
|
|
||||||
SURINAME;SR
|
|
||||||
SVALBARD AND JAN MAYEN;SJ
|
|
||||||
SWAZILAND;SZ
|
|
||||||
SWEDEN;SE
|
|
||||||
SWITZERLAND;CH
|
|
||||||
SYRIAN ARAB REPUBLIC;SY
|
|
||||||
TAIWAN, PROVINCE OF CHINA;TW
|
|
||||||
TAJIKISTAN;TJ
|
|
||||||
TANZANIA, UNITED REPUBLIC OF;TZ
|
|
||||||
THAILAND;TH
|
|
||||||
TIMOR-LESTE;TL
|
|
||||||
TOGO;TG
|
|
||||||
TOKELAU;TK
|
|
||||||
TONGA;TO
|
|
||||||
TRINIDAD AND TOBAGO;TT
|
|
||||||
TUNISIA;TN
|
|
||||||
TURKEY;TR
|
|
||||||
TURKMENISTAN;TM
|
|
||||||
TURKS AND CAICOS ISLANDS;TC
|
|
||||||
TUVALU;TV
|
|
||||||
UGANDA;UG
|
|
||||||
UKRAINE;UA
|
|
||||||
UNITED ARAB EMIRATES;AE
|
|
||||||
UNITED KINGDOM;GB
|
|
||||||
UNITED STATES;US
|
|
||||||
UNITED STATES MINOR OUTLYING ISLANDS;UM
|
|
||||||
URUGUAY;UY
|
|
||||||
UZBEKISTAN;UZ
|
|
||||||
VANUATU;VU
|
|
||||||
VENEZUELA, BOLIVARIAN REPUBLIC OF;VE
|
|
||||||
VIET NAM;VN
|
|
||||||
VIRGIN ISLANDS, BRITISH;VG
|
|
||||||
VIRGIN ISLANDS, U.S.;VI
|
|
||||||
WALLIS AND FUTUNA;WF
|
|
||||||
WESTERN SAHARA;EH
|
|
||||||
YEMEN;YE
|
|
||||||
ZAMBIA;ZM
|
|
||||||
ZIMBABWE;ZW
|
|
||||||
File diff suppressed because it is too large
Load Diff
@@ -1,176 +0,0 @@
|
|||||||
#
|
|
||||||
# ISO 15924 - Codes for the representation of names of scripts
|
|
||||||
# Codes pour la représentation des noms d’écritures
|
|
||||||
# Format:
|
|
||||||
# Code;N°;English Name;Nom français;PVA;Date
|
|
||||||
#
|
|
||||||
|
|
||||||
Afak;439;Afaka;afaka;;2010-12-21
|
|
||||||
Aghb;239;Caucasian Albanian;aghbanien;;2012-10-16
|
|
||||||
Ahom;338;Ahom, Tai Ahom;âhom;;2012-11-01
|
|
||||||
Arab;160;Arabic;arabe;Arabic;2004-05-01
|
|
||||||
Armi;124;Imperial Aramaic;araméen impérial;Imperial_Aramaic;2009-06-01
|
|
||||||
Armn;230;Armenian;arménien;Armenian;2004-05-01
|
|
||||||
Avst;134;Avestan;avestique;Avestan;2009-06-01
|
|
||||||
Bali;360;Balinese;balinais;Balinese;2006-10-10
|
|
||||||
Bamu;435;Bamum;bamoum;Bamum;2009-06-01
|
|
||||||
Bass;259;Bassa Vah;bassa;;2010-03-26
|
|
||||||
Batk;365;Batak;batik;Batak;2010-07-23
|
|
||||||
Beng;325;Bengali;bengalî;Bengali;2004-05-01
|
|
||||||
Blis;550;Blissymbols;symboles Bliss;;2004-05-01
|
|
||||||
Bopo;285;Bopomofo;bopomofo;Bopomofo;2004-05-01
|
|
||||||
Brah;300;Brahmi;brahma;Brahmi;2010-07-23
|
|
||||||
Brai;570;Braille;braille;Braille;2004-05-01
|
|
||||||
Bugi;367;Buginese;bouguis;Buginese;2006-06-21
|
|
||||||
Buhd;372;Buhid;bouhide;Buhid;2004-05-01
|
|
||||||
Cakm;349;Chakma;chakma;Chakma;2012-02-06
|
|
||||||
Cans;440;Unified Canadian Aboriginal Syllabics;syllabaire autochtone canadien unifié;Canadian_Aboriginal;2004-05-29
|
|
||||||
Cari;201;Carian;carien;Carian;2007-07-02
|
|
||||||
Cham;358;Cham;cham (čam, tcham);Cham;2009-11-11
|
|
||||||
Cher;445;Cherokee;tchérokî;Cherokee;2004-05-01
|
|
||||||
Cirt;291;Cirth;cirth;;2004-05-01
|
|
||||||
Copt;204;Coptic;copte;Coptic;2006-06-21
|
|
||||||
Cprt;403;Cypriot;syllabaire chypriote;Cypriot;2004-05-01
|
|
||||||
Cyrl;220;Cyrillic;cyrillique;Cyrillic;2004-05-01
|
|
||||||
Cyrs;221;Cyrillic (Old Church Slavonic variant);cyrillique (variante slavonne);;2004-05-01
|
|
||||||
Deva;315;Devanagari (Nagari);dévanâgarî;Devanagari;2004-05-01
|
|
||||||
Dsrt;250;Deseret (Mormon);déseret (mormon);Deseret;2004-05-01
|
|
||||||
Dupl;755;Duployan shorthand, Duployan stenography;sténographie Duployé;;2010-07-18
|
|
||||||
Egyd;070;Egyptian demotic;démotique égyptien;;2004-05-01
|
|
||||||
Egyh;060;Egyptian hieratic;hiératique égyptien;;2004-05-01
|
|
||||||
Egyp;050;Egyptian hieroglyphs;hiéroglyphes égyptiens;Egyptian_Hieroglyphs;2009-06-01
|
|
||||||
Elba;226;Elbasan;elbasan;;2010-07-18
|
|
||||||
Ethi;430;Ethiopic (Geʻez);éthiopien (geʻez, guèze);Ethiopic;2004-10-25
|
|
||||||
Geor;240;Georgian (Mkhedruli);géorgien (mkhédrouli);Georgian;2004-05-29
|
|
||||||
Geok;241;Khutsuri (Asomtavruli and Nuskhuri);khoutsouri (assomtavrouli et nouskhouri);Georgian;2012-10-16
|
|
||||||
Glag;225;Glagolitic;glagolitique;Glagolitic;2006-06-21
|
|
||||||
Goth;206;Gothic;gotique;Gothic;2004-05-01
|
|
||||||
Gran;343;Grantha;grantha;;2009-11-11
|
|
||||||
Grek;200;Greek;grec;Greek;2004-05-01
|
|
||||||
Gujr;320;Gujarati;goudjarâtî (gujrâtî);Gujarati;2004-05-01
|
|
||||||
Guru;310;Gurmukhi;gourmoukhî;Gurmukhi;2004-05-01
|
|
||||||
Hang;286;Hangul (Hangŭl, Hangeul);hangûl (hangŭl, hangeul);Hangul;2004-05-29
|
|
||||||
Hani;500;Han (Hanzi, Kanji, Hanja);idéogrammes han (sinogrammes);Han;2009-02-23
|
|
||||||
Hano;371;Hanunoo (Hanunóo);hanounóo;Hanunoo;2004-05-29
|
|
||||||
Hans;501;Han (Simplified variant);idéogrammes han (variante simplifiée);;2004-05-29
|
|
||||||
Hant;502;Han (Traditional variant);idéogrammes han (variante traditionnelle);;2004-05-29
|
|
||||||
Hatr;127;Hatran;hatrénien;;2012-11-01
|
|
||||||
Hebr;125;Hebrew;hébreu;Hebrew;2004-05-01
|
|
||||||
Hira;410;Hiragana;hiragana;Hiragana;2004-05-01
|
|
||||||
Hluw;080;Anatolian Hieroglyphs (Luwian Hieroglyphs, Hittite Hieroglyphs);hiéroglyphes anatoliens (hiéroglyphes louvites, hiéroglyphes hittites);;2011-12-09
|
|
||||||
Hmng;450;Pahawh Hmong;pahawh hmong;;2004-05-01
|
|
||||||
Hrkt;412;Japanese syllabaries (alias for Hiragana + Katakana);syllabaires japonais (alias pour hiragana + katakana);Katakana_Or_Hiragana;2011-06-21
|
|
||||||
Hung;176;Old Hungarian (Hungarian Runic);runes hongroises (ancien hongrois);;2012-10-16
|
|
||||||
Inds;610;Indus (Harappan);indus;;2004-05-01
|
|
||||||
Ital;210;Old Italic (Etruscan, Oscan, etc.);ancien italique (étrusque, osque, etc.);Old_Italic;2004-05-29
|
|
||||||
Java;361;Javanese;javanais;Javanese;2009-06-01
|
|
||||||
Jpan;413;Japanese (alias for Han + Hiragana + Katakana);japonais (alias pour han + hiragana + katakana);;2006-06-21
|
|
||||||
Jurc;510;Jurchen;jurchen;;2010-12-21
|
|
||||||
Kali;357;Kayah Li;kayah li;Kayah_Li;2007-07-02
|
|
||||||
Kana;411;Katakana;katakana;Katakana;2004-05-01
|
|
||||||
Khar;305;Kharoshthi;kharochthî;Kharoshthi;2006-06-21
|
|
||||||
Khmr;355;Khmer;khmer;Khmer;2004-05-29
|
|
||||||
Khoj;322;Khojki;khojkî;;2011-06-21
|
|
||||||
Knda;345;Kannada;kannara (canara);Kannada;2004-05-29
|
|
||||||
Kore;287;Korean (alias for Hangul + Han);coréen (alias pour hangûl + han);;2007-06-13
|
|
||||||
Kpel;436;Kpelle;kpèllé;;2010-03-26
|
|
||||||
Kthi;317;Kaithi;kaithî;Kaithi;2009-06-01
|
|
||||||
Lana;351;Tai Tham (Lanna);taï tham (lanna);Tai_Tham;2009-06-01
|
|
||||||
Laoo;356;Lao;laotien;Lao;2004-05-01
|
|
||||||
Latf;217;Latin (Fraktur variant);latin (variante brisée);;2004-05-01
|
|
||||||
Latg;216;Latin (Gaelic variant);latin (variante gaélique);;2004-05-01
|
|
||||||
Latn;215;Latin;latin;Latin;2004-05-01
|
|
||||||
Lepc;335;Lepcha (Róng);lepcha (róng);Lepcha;2007-07-02
|
|
||||||
Limb;336;Limbu;limbou;Limbu;2004-05-29
|
|
||||||
Lina;400;Linear A;linéaire A;;2004-05-01
|
|
||||||
Linb;401;Linear B;linéaire B;Linear_B;2004-05-29
|
|
||||||
Lisu;399;Lisu (Fraser);lisu (Fraser);Lisu;2009-06-01
|
|
||||||
Loma;437;Loma;loma;;2010-03-26
|
|
||||||
Lyci;202;Lycian;lycien;Lycian;2007-07-02
|
|
||||||
Lydi;116;Lydian;lydien;Lydian;2007-07-02
|
|
||||||
Mahj;314;Mahajani;mahâjanî;;2012-10-16
|
|
||||||
Mand;140;Mandaic, Mandaean;mandéen;Mandaic;2010-07-23
|
|
||||||
Mani;139;Manichaean;manichéen;;2007-07-15
|
|
||||||
Maya;090;Mayan hieroglyphs;hiéroglyphes mayas;;2004-05-01
|
|
||||||
Mend;438;Mende Kikakui;mendé kikakui;;2013-10-12
|
|
||||||
Merc;101;Meroitic Cursive;cursif méroïtique;Meroitic_Cursive;2012-02-06
|
|
||||||
Mero;100;Meroitic Hieroglyphs;hiéroglyphes méroïtiques;Meroitic_Hieroglyphs;2012-02-06
|
|
||||||
Mlym;347;Malayalam;malayâlam;Malayalam;2004-05-01
|
|
||||||
Modi;323;Modi, Moḍī;modî;;2013-10-12
|
|
||||||
Moon;218;Moon (Moon code, Moon script, Moon type);écriture Moon;;2006-12-11
|
|
||||||
Mong;145;Mongolian;mongol;Mongolian;2004-05-01
|
|
||||||
Mroo;199;Mro, Mru;mro;;2010-12-21
|
|
||||||
Mtei;337;Meitei Mayek (Meithei, Meetei);meitei mayek;Meetei_Mayek;2009-06-01
|
|
||||||
Mult;323; Multani;multanî;;2012-11-01
|
|
||||||
Mymr;350;Myanmar (Burmese);birman;Myanmar;2004-05-01
|
|
||||||
Narb;106;Old North Arabian (Ancient North Arabian);nord-arabique;;2010-03-26
|
|
||||||
Nbat;159;Nabataean;nabatéen;;2010-03-26
|
|
||||||
Nkgb;420;Nakhi Geba ('Na-'Khi ²Ggŏ-¹baw, Naxi Geba);nakhi géba;;2009-02-23
|
|
||||||
Nkoo;165;N’Ko;n’ko;Nko;2006-10-10
|
|
||||||
Nshu;499;Nüshu;nüshu;;2010-12-21
|
|
||||||
Ogam;212;Ogham;ogam;Ogham;2004-05-01
|
|
||||||
Olck;261;Ol Chiki (Ol Cemet’, Ol, Santali);ol tchiki;Ol_Chiki;2007-07-02
|
|
||||||
Orkh;175;Old Turkic, Orkhon Runic;orkhon;Old_Turkic;2009-06-01
|
|
||||||
Orya;327;Oriya;oriyâ;Oriya;2004-05-01
|
|
||||||
Osma;260;Osmanya;osmanais;Osmanya;2004-05-01
|
|
||||||
Palm;126;Palmyrene;palmyrénien;;2010-03-26
|
|
||||||
Pauc;263;Pau Cin Hau;paou chin haou;;2013-10-12
|
|
||||||
Perm;227;Old Permic;ancien permien;;2004-05-01
|
|
||||||
Phag;331;Phags-pa;’phags pa;Phags_Pa;2006-10-10
|
|
||||||
Phli;131;Inscriptional Pahlavi;pehlevi des inscriptions;Inscriptional_Pahlavi;2009-06-01
|
|
||||||
Phlp;132;Psalter Pahlavi;pehlevi des psautiers;;2007-11-26
|
|
||||||
Phlv;133;Book Pahlavi;pehlevi des livres;;2007-07-15
|
|
||||||
Phnx;115;Phoenician;phénicien;Phoenician;2006-10-10
|
|
||||||
Plrd;282;Miao (Pollard);miao (Pollard);Miao;2012-02-06
|
|
||||||
Prti;130;Inscriptional Parthian;parthe des inscriptions;Inscriptional_Parthian;2009-06-01
|
|
||||||
Qaaa;900;Reserved for private use (start);réservé à l’usage privé (début);;2004-05-29
|
|
||||||
Qabx;949;Reserved for private use (end);réservé à l’usage privé (fin);;2004-05-29
|
|
||||||
Rjng;363;Rejang (Redjang, Kaganga);redjang (kaganga);Rejang;2009-02-23
|
|
||||||
Roro;620;Rongorongo;rongorongo;;2004-05-01
|
|
||||||
Runr;211;Runic;runique;Runic;2004-05-01
|
|
||||||
Samr;123;Samaritan;samaritain;Samaritan;2009-06-01
|
|
||||||
Sara;292;Sarati;sarati;;2004-05-29
|
|
||||||
Sarb;105;Old South Arabian;sud-arabique, himyarite;Old_South_Arabian;2009-06-01
|
|
||||||
Saur;344;Saurashtra;saurachtra;Saurashtra;2007-07-02
|
|
||||||
Sgnw;095;SignWriting;SignÉcriture, SignWriting;;2006-10-10
|
|
||||||
Shaw;281;Shavian (Shaw);shavien (Shaw);Shavian;2004-05-01
|
|
||||||
Shrd;319;Sharada, Śāradā;charada, shard;Sharada;2012-02-06
|
|
||||||
Sidd;302;Siddham, Siddhaṃ, Siddhamātṛkā;siddham;;2013-10-12
|
|
||||||
Sind;318;Khudawadi, Sindhi;khoudawadî, sindhî;;2010-12-21
|
|
||||||
Sinh;348;Sinhala;singhalais;Sinhala;2004-05-01
|
|
||||||
Sora;398;Sora Sompeng;sora sompeng;Sora_Sompeng;2012-02-06
|
|
||||||
Sund;362;Sundanese;sundanais;Sundanese;2007-07-02
|
|
||||||
Sylo;316;Syloti Nagri;sylotî nâgrî;Syloti_Nagri;2006-06-21
|
|
||||||
Syrc;135;Syriac;syriaque;Syriac;2004-05-01
|
|
||||||
Syre;138;Syriac (Estrangelo variant);syriaque (variante estranghélo);;2004-05-01
|
|
||||||
Syrj;137;Syriac (Western variant);syriaque (variante occidentale);;2004-05-01
|
|
||||||
Syrn;136;Syriac (Eastern variant);syriaque (variante orientale);;2004-05-01
|
|
||||||
Tagb;373;Tagbanwa;tagbanoua;Tagbanwa;2004-05-01
|
|
||||||
Takr;321;Takri, Ṭākrī, Ṭāṅkrī;tâkrî;Takri;2012-02-06
|
|
||||||
Tale;353;Tai Le;taï-le;Tai_Le;2004-10-25
|
|
||||||
Talu;354;New Tai Lue;nouveau taï-lue;New_Tai_Lue;2006-06-21
|
|
||||||
Taml;346;Tamil;tamoul;Tamil;2004-05-01
|
|
||||||
Tang;520;Tangut;tangoute;;2010-12-21
|
|
||||||
Tavt;359;Tai Viet;taï viêt;Tai_Viet;2009-06-01
|
|
||||||
Telu;340;Telugu;télougou;Telugu;2004-05-01
|
|
||||||
Teng;290;Tengwar;tengwar;;2004-05-01
|
|
||||||
Tfng;120;Tifinagh (Berber);tifinagh (berbère);Tifinagh;2006-06-21
|
|
||||||
Tglg;370;Tagalog (Baybayin, Alibata);tagal (baybayin, alibata);Tagalog;2009-02-23
|
|
||||||
Thaa;170;Thaana;thâna;Thaana;2004-05-01
|
|
||||||
Thai;352;Thai;thaï;Thai;2004-05-01
|
|
||||||
Tibt;330;Tibetan;tibétain;Tibetan;2004-05-01
|
|
||||||
Tirh;326;Tirhuta;tirhouta;;2011-12-09
|
|
||||||
Ugar;040;Ugaritic;ougaritique;Ugaritic;2004-05-01
|
|
||||||
Vaii;470;Vai;vaï;Vai;2007-07-02
|
|
||||||
Visp;280;Visible Speech;parole visible;;2004-05-01
|
|
||||||
Wara;262;Warang Citi (Varang Kshiti);warang citi;;2009-11-11
|
|
||||||
Wole;480;Woleai;woléaï;;2010-12-21
|
|
||||||
Xpeo;030;Old Persian;cunéiforme persépolitain;Old_Persian;2006-06-21
|
|
||||||
Xsux;020;Cuneiform, Sumero-Akkadian;cunéiforme suméro-akkadien;Cuneiform;2006-10-10
|
|
||||||
Yiii;460;Yi;yi;Yi;2004-05-01
|
|
||||||
Zinh;994;Code for inherited script;codet pour écriture héritée;Inherited;2009-02-23
|
|
||||||
Zmth;995;Mathematical notation;notation mathématique;;2007-11-26
|
|
||||||
Zsym;996;Symbols;symboles;;2007-11-26
|
|
||||||
Zxxx;997;Code for unwritten documents;codet pour les documents non écrits;;2011-06-21
|
|
||||||
Zyyy;998;Code for undetermined script;codet pour écriture indéterminée;Common;2004-05-29
|
|
||||||
Zzzz;999;Code for uncoded script;codet pour écriture non codée;Unknown;2006-10-10
|
|
||||||
@@ -1,474 +0,0 @@
|
|||||||
IdSubLanguage ISO639 LanguageName UploadEnabled WebEnabled
|
|
||||||
aar aa Afar, afar 0 0
|
|
||||||
abk ab Abkhazian 0 0
|
|
||||||
ace Achinese 0 0
|
|
||||||
ach Acoli 0 0
|
|
||||||
ada Adangme 0 0
|
|
||||||
ady adyghé 0 0
|
|
||||||
afa Afro-Asiatic (Other) 0 0
|
|
||||||
afh Afrihili 0 0
|
|
||||||
afr af Afrikaans 1 0
|
|
||||||
ain Ainu 0 0
|
|
||||||
aka ak Akan 0 0
|
|
||||||
akk Akkadian 0 0
|
|
||||||
alb sq Albanian 1 1
|
|
||||||
ale Aleut 0 0
|
|
||||||
alg Algonquian languages 0 0
|
|
||||||
alt Southern Altai 0 0
|
|
||||||
amh am Amharic 0 0
|
|
||||||
ang English, Old (ca.450-1100) 0 0
|
|
||||||
apa Apache languages 0 0
|
|
||||||
ara ar Arabic 1 1
|
|
||||||
arc Aramaic 0 0
|
|
||||||
arg an Aragonese 0 0
|
|
||||||
arm hy Armenian 1 0
|
|
||||||
arn Araucanian 0 0
|
|
||||||
arp Arapaho 0 0
|
|
||||||
art Artificial (Other) 0 0
|
|
||||||
arw Arawak 0 0
|
|
||||||
asm as Assamese 0 0
|
|
||||||
ast Asturian, Bable 0 0
|
|
||||||
ath Athapascan languages 0 0
|
|
||||||
aus Australian languages 0 0
|
|
||||||
ava av Avaric 0 0
|
|
||||||
ave ae Avestan 0 0
|
|
||||||
awa Awadhi 0 0
|
|
||||||
aym ay Aymara 0 0
|
|
||||||
aze az Azerbaijani 0 0
|
|
||||||
bad Banda 0 0
|
|
||||||
bai Bamileke languages 0 0
|
|
||||||
bak ba Bashkir 0 0
|
|
||||||
bal Baluchi 0 0
|
|
||||||
bam bm Bambara 0 0
|
|
||||||
ban Balinese 0 0
|
|
||||||
baq eu Basque 1 1
|
|
||||||
bas Basa 0 0
|
|
||||||
bat Baltic (Other) 0 0
|
|
||||||
bej Beja 0 0
|
|
||||||
bel be Belarusian 0 0
|
|
||||||
bem Bemba 0 0
|
|
||||||
ben bn Bengali 1 0
|
|
||||||
ber Berber (Other) 0 0
|
|
||||||
bho Bhojpuri 0 0
|
|
||||||
bih bh Bihari 0 0
|
|
||||||
bik Bikol 0 0
|
|
||||||
bin Bini 0 0
|
|
||||||
bis bi Bislama 0 0
|
|
||||||
bla Siksika 0 0
|
|
||||||
bnt Bantu (Other) 0 0
|
|
||||||
bos bs Bosnian 1 0
|
|
||||||
bra Braj 0 0
|
|
||||||
bre br Breton 1 0
|
|
||||||
btk Batak (Indonesia) 0 0
|
|
||||||
bua Buriat 0 0
|
|
||||||
bug Buginese 0 0
|
|
||||||
bul bg Bulgarian 1 1
|
|
||||||
bur my Burmese 1 0
|
|
||||||
byn Blin 0 0
|
|
||||||
cad Caddo 0 0
|
|
||||||
cai Central American Indian (Other) 0 0
|
|
||||||
car Carib 0 0
|
|
||||||
cat ca Catalan 1 1
|
|
||||||
cau Caucasian (Other) 0 0
|
|
||||||
ceb Cebuano 0 0
|
|
||||||
cel Celtic (Other) 0 0
|
|
||||||
cha ch Chamorro 0 0
|
|
||||||
chb Chibcha 0 0
|
|
||||||
che ce Chechen 0 0
|
|
||||||
chg Chagatai 0 0
|
|
||||||
chi zh Chinese 1 1
|
|
||||||
chk Chuukese 0 0
|
|
||||||
chm Mari 0 0
|
|
||||||
chn Chinook jargon 0 0
|
|
||||||
cho Choctaw 0 0
|
|
||||||
chp Chipewyan 0 0
|
|
||||||
chr Cherokee 0 0
|
|
||||||
chu cu Church Slavic 0 0
|
|
||||||
chv cv Chuvash 0 0
|
|
||||||
chy Cheyenne 0 0
|
|
||||||
cmc Chamic languages 0 0
|
|
||||||
cop Coptic 0 0
|
|
||||||
cor kw Cornish 0 0
|
|
||||||
cos co Corsican 0 0
|
|
||||||
cpe Creoles and pidgins, English based (Other) 0 0
|
|
||||||
cpf Creoles and pidgins, French-based (Other) 0 0
|
|
||||||
cpp Creoles and pidgins, Portuguese-based (Other) 0 0
|
|
||||||
cre cr Cree 0 0
|
|
||||||
crh Crimean Tatar 0 0
|
|
||||||
crp Creoles and pidgins (Other) 0 0
|
|
||||||
csb Kashubian 0 0
|
|
||||||
cus Cushitic (Other)' couchitiques, autres langues 0 0
|
|
||||||
cze cs Czech 1 1
|
|
||||||
dak Dakota 0 0
|
|
||||||
dan da Danish 1 1
|
|
||||||
dar Dargwa 0 0
|
|
||||||
day Dayak 0 0
|
|
||||||
del Delaware 0 0
|
|
||||||
den Slave (Athapascan) 0 0
|
|
||||||
dgr Dogrib 0 0
|
|
||||||
din Dinka 0 0
|
|
||||||
div dv Divehi 0 0
|
|
||||||
doi Dogri 0 0
|
|
||||||
dra Dravidian (Other) 0 0
|
|
||||||
dua Duala 0 0
|
|
||||||
dum Dutch, Middle (ca.1050-1350) 0 0
|
|
||||||
dut nl Dutch 1 1
|
|
||||||
dyu Dyula 0 0
|
|
||||||
dzo dz Dzongkha 0 0
|
|
||||||
efi Efik 0 0
|
|
||||||
egy Egyptian (Ancient) 0 0
|
|
||||||
eka Ekajuk 0 0
|
|
||||||
elx Elamite 0 0
|
|
||||||
eng en English 1 1
|
|
||||||
enm English, Middle (1100-1500) 0 0
|
|
||||||
epo eo Esperanto 1 0
|
|
||||||
est et Estonian 1 1
|
|
||||||
ewe ee Ewe 0 0
|
|
||||||
ewo Ewondo 0 0
|
|
||||||
fan Fang 0 0
|
|
||||||
fao fo Faroese 0 0
|
|
||||||
fat Fanti 0 0
|
|
||||||
fij fj Fijian 0 0
|
|
||||||
fil Filipino 0 0
|
|
||||||
fin fi Finnish 1 1
|
|
||||||
fiu Finno-Ugrian (Other) 0 0
|
|
||||||
fon Fon 0 0
|
|
||||||
fre fr French 1 1
|
|
||||||
frm French, Middle (ca.1400-1600) 0 0
|
|
||||||
fro French, Old (842-ca.1400) 0 0
|
|
||||||
fry fy Frisian 0 0
|
|
||||||
ful ff Fulah 0 0
|
|
||||||
fur Friulian 0 0
|
|
||||||
gaa Ga 0 0
|
|
||||||
gay Gayo 0 0
|
|
||||||
gba Gbaya 0 0
|
|
||||||
gem Germanic (Other) 0 0
|
|
||||||
geo ka Georgian 1 1
|
|
||||||
ger de German 1 1
|
|
||||||
gez Geez 0 0
|
|
||||||
gil Gilbertese 0 0
|
|
||||||
gla gd Gaelic 0 0
|
|
||||||
gle ga Irish 0 0
|
|
||||||
glg gl Galician 1 1
|
|
||||||
glv gv Manx 0 0
|
|
||||||
gmh German, Middle High (ca.1050-1500) 0 0
|
|
||||||
goh German, Old High (ca.750-1050) 0 0
|
|
||||||
gon Gondi 0 0
|
|
||||||
gor Gorontalo 0 0
|
|
||||||
got Gothic 0 0
|
|
||||||
grb Grebo 0 0
|
|
||||||
grc Greek, Ancient (to 1453) 0 0
|
|
||||||
ell el Greek 1 1
|
|
||||||
grn gn Guarani 0 0
|
|
||||||
guj gu Gujarati 0 0
|
|
||||||
gwi Gwich´in 0 0
|
|
||||||
hai Haida 0 0
|
|
||||||
hat ht Haitian 0 0
|
|
||||||
hau ha Hausa 0 0
|
|
||||||
haw Hawaiian 0 0
|
|
||||||
heb he Hebrew 1 1
|
|
||||||
her hz Herero 0 0
|
|
||||||
hil Hiligaynon 0 0
|
|
||||||
him Himachali 0 0
|
|
||||||
hin hi Hindi 1 1
|
|
||||||
hit Hittite 0 0
|
|
||||||
hmn Hmong 0 0
|
|
||||||
hmo ho Hiri Motu 0 0
|
|
||||||
hrv hr Croatian 1 1
|
|
||||||
hun hu Hungarian 1 1
|
|
||||||
hup Hupa 0 0
|
|
||||||
iba Iban 0 0
|
|
||||||
ibo ig Igbo 0 0
|
|
||||||
ice is Icelandic 1 1
|
|
||||||
ido io Ido 0 0
|
|
||||||
iii ii Sichuan Yi 0 0
|
|
||||||
ijo Ijo 0 0
|
|
||||||
iku iu Inuktitut 0 0
|
|
||||||
ile ie Interlingue 0 0
|
|
||||||
ilo Iloko 0 0
|
|
||||||
ina ia Interlingua (International Auxiliary Language Asso 0 0
|
|
||||||
inc Indic (Other) 0 0
|
|
||||||
ind id Indonesian 1 1
|
|
||||||
ine Indo-European (Other) 0 0
|
|
||||||
inh Ingush 0 0
|
|
||||||
ipk ik Inupiaq 0 0
|
|
||||||
ira Iranian (Other) 0 0
|
|
||||||
iro Iroquoian languages 0 0
|
|
||||||
ita it Italian 1 1
|
|
||||||
jav jv Javanese 0 0
|
|
||||||
jpn ja Japanese 1 1
|
|
||||||
jpr Judeo-Persian 0 0
|
|
||||||
jrb Judeo-Arabic 0 0
|
|
||||||
kaa Kara-Kalpak 0 0
|
|
||||||
kab Kabyle 0 0
|
|
||||||
kac Kachin 0 0
|
|
||||||
kal kl Kalaallisut 0 0
|
|
||||||
kam Kamba 0 0
|
|
||||||
kan kn Kannada 0 0
|
|
||||||
kar Karen 0 0
|
|
||||||
kas ks Kashmiri 0 0
|
|
||||||
kau kr Kanuri 0 0
|
|
||||||
kaw Kawi 0 0
|
|
||||||
kaz kk Kazakh 1 0
|
|
||||||
kbd Kabardian 0 0
|
|
||||||
kha Khasi 0 0
|
|
||||||
khi Khoisan (Other) 0 0
|
|
||||||
khm km Khmer 1 1
|
|
||||||
kho Khotanese 0 0
|
|
||||||
kik ki Kikuyu 0 0
|
|
||||||
kin rw Kinyarwanda 0 0
|
|
||||||
kir ky Kirghiz 0 0
|
|
||||||
kmb Kimbundu 0 0
|
|
||||||
kok Konkani 0 0
|
|
||||||
kom kv Komi 0 0
|
|
||||||
kon kg Kongo 0 0
|
|
||||||
kor ko Korean 1 1
|
|
||||||
kos Kosraean 0 0
|
|
||||||
kpe Kpelle 0 0
|
|
||||||
krc Karachay-Balkar 0 0
|
|
||||||
kro Kru 0 0
|
|
||||||
kru Kurukh 0 0
|
|
||||||
kua kj Kuanyama 0 0
|
|
||||||
kum Kumyk 0 0
|
|
||||||
kur ku Kurdish 0 0
|
|
||||||
kut Kutenai 0 0
|
|
||||||
lad Ladino 0 0
|
|
||||||
lah Lahnda 0 0
|
|
||||||
lam Lamba 0 0
|
|
||||||
lao lo Lao 0 0
|
|
||||||
lat la Latin 0 0
|
|
||||||
lav lv Latvian 1 0
|
|
||||||
lez Lezghian 0 0
|
|
||||||
lim li Limburgan 0 0
|
|
||||||
lin ln Lingala 0 0
|
|
||||||
lit lt Lithuanian 1 0
|
|
||||||
lol Mongo 0 0
|
|
||||||
loz Lozi 0 0
|
|
||||||
ltz lb Luxembourgish 1 0
|
|
||||||
lua Luba-Lulua 0 0
|
|
||||||
lub lu Luba-Katanga 0 0
|
|
||||||
lug lg Ganda 0 0
|
|
||||||
lui Luiseno 0 0
|
|
||||||
lun Lunda 0 0
|
|
||||||
luo Luo (Kenya and Tanzania) 0 0
|
|
||||||
lus lushai 0 0
|
|
||||||
mac mk Macedonian 1 1
|
|
||||||
mad Madurese 0 0
|
|
||||||
mag Magahi 0 0
|
|
||||||
mah mh Marshallese 0 0
|
|
||||||
mai Maithili 0 0
|
|
||||||
mak Makasar 0 0
|
|
||||||
mal ml Malayalam 1 0
|
|
||||||
man Mandingo 0 0
|
|
||||||
mao mi Maori 0 0
|
|
||||||
map Austronesian (Other) 0 0
|
|
||||||
mar mr Marathi 0 0
|
|
||||||
mas Masai 0 0
|
|
||||||
may ms Malay 1 1
|
|
||||||
mdf Moksha 0 0
|
|
||||||
mdr Mandar 0 0
|
|
||||||
men Mende 0 0
|
|
||||||
mga Irish, Middle (900-1200) 0 0
|
|
||||||
mic Mi'kmaq 0 0
|
|
||||||
min Minangkabau 0 0
|
|
||||||
mis Miscellaneous languages 0 0
|
|
||||||
mkh Mon-Khmer (Other) 0 0
|
|
||||||
mlg mg Malagasy 0 0
|
|
||||||
mlt mt Maltese 0 0
|
|
||||||
mnc Manchu 0 0
|
|
||||||
mni Manipuri 0 0
|
|
||||||
mno Manobo languages 0 0
|
|
||||||
moh Mohawk 0 0
|
|
||||||
mol mo Moldavian 0 0
|
|
||||||
mon mn Mongolian 1 0
|
|
||||||
mos Mossi 0 0
|
|
||||||
mwl Mirandese 0 0
|
|
||||||
mul Multiple languages 0 0
|
|
||||||
mun Munda languages 0 0
|
|
||||||
mus Creek 0 0
|
|
||||||
mwr Marwari 0 0
|
|
||||||
myn Mayan languages 0 0
|
|
||||||
myv Erzya 0 0
|
|
||||||
nah Nahuatl 0 0
|
|
||||||
nai North American Indian 0 0
|
|
||||||
nap Neapolitan 0 0
|
|
||||||
nau na Nauru 0 0
|
|
||||||
nav nv Navajo 0 0
|
|
||||||
nbl nr Ndebele, South 0 0
|
|
||||||
nde nd Ndebele, North 0 0
|
|
||||||
ndo ng Ndonga 0 0
|
|
||||||
nds Low German 0 0
|
|
||||||
nep ne Nepali 0 0
|
|
||||||
new Nepal Bhasa 0 0
|
|
||||||
nia Nias 0 0
|
|
||||||
nic Niger-Kordofanian (Other) 0 0
|
|
||||||
niu Niuean 0 0
|
|
||||||
nno nn Norwegian Nynorsk 0 0
|
|
||||||
nob nb Norwegian Bokmal 0 0
|
|
||||||
nog Nogai 0 0
|
|
||||||
non Norse, Old 0 0
|
|
||||||
nor no Norwegian 1 1
|
|
||||||
nso Northern Sotho 0 0
|
|
||||||
nub Nubian languages 0 0
|
|
||||||
nwc Classical Newari 0 0
|
|
||||||
nya ny Chichewa 0 0
|
|
||||||
nym Nyamwezi 0 0
|
|
||||||
nyn Nyankole 0 0
|
|
||||||
nyo Nyoro 0 0
|
|
||||||
nzi Nzima 0 0
|
|
||||||
oci oc Occitan 1 1
|
|
||||||
oji oj Ojibwa 0 0
|
|
||||||
ori or Oriya 0 0
|
|
||||||
orm om Oromo 0 0
|
|
||||||
osa Osage 0 0
|
|
||||||
oss os Ossetian 0 0
|
|
||||||
ota Turkish, Ottoman (1500-1928) 0 0
|
|
||||||
oto Otomian languages 0 0
|
|
||||||
paa Papuan (Other) 0 0
|
|
||||||
pag Pangasinan 0 0
|
|
||||||
pal Pahlavi 0 0
|
|
||||||
pam Pampanga 0 0
|
|
||||||
pan pa Panjabi 0 0
|
|
||||||
pap Papiamento 0 0
|
|
||||||
pau Palauan 0 0
|
|
||||||
peo Persian, Old (ca.600-400 B.C.) 0 0
|
|
||||||
per fa Persian 1 1
|
|
||||||
phi Philippine (Other) 0 0
|
|
||||||
phn Phoenician 0 0
|
|
||||||
pli pi Pali 0 0
|
|
||||||
pol pl Polish 1 1
|
|
||||||
pon Pohnpeian 0 0
|
|
||||||
por pt Portuguese 1 1
|
|
||||||
pra Prakrit languages 0 0
|
|
||||||
pro Provençal, Old (to 1500) 0 0
|
|
||||||
pus ps Pushto 0 0
|
|
||||||
que qu Quechua 0 0
|
|
||||||
raj Rajasthani 0 0
|
|
||||||
rap Rapanui 0 0
|
|
||||||
rar Rarotongan 0 0
|
|
||||||
roa Romance (Other) 0 0
|
|
||||||
roh rm Raeto-Romance 0 0
|
|
||||||
rom Romany 0 0
|
|
||||||
run rn Rundi 0 0
|
|
||||||
rup Aromanian 0 0
|
|
||||||
rus ru Russian 1 1
|
|
||||||
sad Sandawe 0 0
|
|
||||||
sag sg Sango 0 0
|
|
||||||
sah Yakut 0 0
|
|
||||||
sai South American Indian (Other) 0 0
|
|
||||||
sal Salishan languages 0 0
|
|
||||||
sam Samaritan Aramaic 0 0
|
|
||||||
san sa Sanskrit 0 0
|
|
||||||
sas Sasak 0 0
|
|
||||||
sat Santali 0 0
|
|
||||||
scc sr Serbian 1 1
|
|
||||||
scn Sicilian 0 0
|
|
||||||
sco Scots 0 0
|
|
||||||
sel Selkup 0 0
|
|
||||||
sem Semitic (Other) 0 0
|
|
||||||
sga Irish, Old (to 900) 0 0
|
|
||||||
sgn Sign Languages 0 0
|
|
||||||
shn Shan 0 0
|
|
||||||
sid Sidamo 0 0
|
|
||||||
sin si Sinhalese 1 1
|
|
||||||
sio Siouan languages 0 0
|
|
||||||
sit Sino-Tibetan (Other) 0 0
|
|
||||||
sla Slavic (Other) 0 0
|
|
||||||
slo sk Slovak 1 1
|
|
||||||
slv sl Slovenian 1 1
|
|
||||||
sma Southern Sami 0 0
|
|
||||||
sme se Northern Sami 0 0
|
|
||||||
smi Sami languages (Other) 0 0
|
|
||||||
smj Lule Sami 0 0
|
|
||||||
smn Inari Sami 0 0
|
|
||||||
smo sm Samoan 0 0
|
|
||||||
sms Skolt Sami 0 0
|
|
||||||
sna sn Shona 0 0
|
|
||||||
snd sd Sindhi 0 0
|
|
||||||
snk Soninke 0 0
|
|
||||||
sog Sogdian 0 0
|
|
||||||
som so Somali 0 0
|
|
||||||
son Songhai 0 0
|
|
||||||
sot st Sotho, Southern 0 0
|
|
||||||
spa es Spanish 1 1
|
|
||||||
srd sc Sardinian 0 0
|
|
||||||
srr Serer 0 0
|
|
||||||
ssa Nilo-Saharan (Other) 0 0
|
|
||||||
ssw ss Swati 0 0
|
|
||||||
suk Sukuma 0 0
|
|
||||||
sun su Sundanese 0 0
|
|
||||||
sus Susu 0 0
|
|
||||||
sux Sumerian 0 0
|
|
||||||
swa sw Swahili 1 0
|
|
||||||
swe sv Swedish 1 1
|
|
||||||
syr Syriac 1 0
|
|
||||||
tah ty Tahitian 0 0
|
|
||||||
tai Tai (Other) 0 0
|
|
||||||
tam ta Tamil 1 0
|
|
||||||
tat tt Tatar 0 0
|
|
||||||
tel te Telugu 1 0
|
|
||||||
tem Timne 0 0
|
|
||||||
ter Tereno 0 0
|
|
||||||
tet Tetum 0 0
|
|
||||||
tgk tg Tajik 0 0
|
|
||||||
tgl tl Tagalog 1 1
|
|
||||||
tha th Thai 1 1
|
|
||||||
tib bo Tibetan 0 0
|
|
||||||
tig Tigre 0 0
|
|
||||||
tir ti Tigrinya 0 0
|
|
||||||
tiv Tiv 0 0
|
|
||||||
tkl Tokelau 0 0
|
|
||||||
tlh Klingon 0 0
|
|
||||||
tli Tlingit 0 0
|
|
||||||
tmh Tamashek 0 0
|
|
||||||
tog Tonga (Nyasa) 0 0
|
|
||||||
ton to Tonga (Tonga Islands) 0 0
|
|
||||||
tpi Tok Pisin 0 0
|
|
||||||
tsi Tsimshian 0 0
|
|
||||||
tsn tn Tswana 0 0
|
|
||||||
tso ts Tsonga 0 0
|
|
||||||
tuk tk Turkmen 0 0
|
|
||||||
tum Tumbuka 0 0
|
|
||||||
tup Tupi languages 0 0
|
|
||||||
tur tr Turkish 1 1
|
|
||||||
tut Altaic (Other) 0 0
|
|
||||||
tvl Tuvalu 0 0
|
|
||||||
twi tw Twi 0 0
|
|
||||||
tyv Tuvinian 0 0
|
|
||||||
udm Udmurt 0 0
|
|
||||||
uga Ugaritic 0 0
|
|
||||||
uig ug Uighur 0 0
|
|
||||||
ukr uk Ukrainian 1 1
|
|
||||||
umb Umbundu 0 0
|
|
||||||
und Undetermined 0 0
|
|
||||||
urd ur Urdu 1 0
|
|
||||||
uzb uz Uzbek 0 0
|
|
||||||
vai Vai 0 0
|
|
||||||
ven ve Venda 0 0
|
|
||||||
vie vi Vietnamese 1 1
|
|
||||||
vol vo Volapük 0 0
|
|
||||||
vot Votic 0 0
|
|
||||||
wak Wakashan languages 0 0
|
|
||||||
wal Walamo 0 0
|
|
||||||
war Waray 0 0
|
|
||||||
was Washo 0 0
|
|
||||||
wel cy Welsh 0 0
|
|
||||||
wen Sorbian languages 0 0
|
|
||||||
wln wa Walloon 0 0
|
|
||||||
wol wo Wolof 0 0
|
|
||||||
xal Kalmyk 0 0
|
|
||||||
xho xh Xhosa 0 0
|
|
||||||
yao Yao 0 0
|
|
||||||
yap Yapese 0 0
|
|
||||||
yid yi Yiddish 0 0
|
|
||||||
yor yo Yoruba 0 0
|
|
||||||
ypk Yupik languages 0 0
|
|
||||||
zap Zapotec 0 0
|
|
||||||
zen Zenaga 0 0
|
|
||||||
zha za Zhuang 0 0
|
|
||||||
znd Zande 0 0
|
|
||||||
zul zu Zulu 0 0
|
|
||||||
zun Zuni 0 0
|
|
||||||
rum ro Romanian 1 1
|
|
||||||
pob pb Brazilian 1 1
|
|
||||||
mne Montenegrin 1 0
|
|
||||||
@@ -1,85 +0,0 @@
|
|||||||
# -*- coding: utf-8 -*-
|
|
||||||
#
|
|
||||||
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
|
|
||||||
# Use of this source code is governed by the 3-clause BSD license
|
|
||||||
# that can be found in the LICENSE file.
|
|
||||||
#
|
|
||||||
from __future__ import unicode_literals
|
|
||||||
|
|
||||||
|
|
||||||
class Error(Exception):
|
|
||||||
"""Base class for all exceptions in babelfish"""
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
class LanguageError(Error, AttributeError):
|
|
||||||
"""Base class for all language exceptions in babelfish"""
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
class LanguageConvertError(LanguageError):
|
|
||||||
"""Exception raised by converters when :meth:`~babelfish.converters.LanguageConverter.convert` fails
|
|
||||||
|
|
||||||
:param string alpha3: alpha3 code that failed conversion
|
|
||||||
:param country: country code that failed conversion, if any
|
|
||||||
:type country: string or None
|
|
||||||
:param script: script code that failed conversion, if any
|
|
||||||
:type script: string or None
|
|
||||||
|
|
||||||
"""
|
|
||||||
def __init__(self, alpha3, country=None, script=None):
|
|
||||||
self.alpha3 = alpha3
|
|
||||||
self.country = country
|
|
||||||
self.script = script
|
|
||||||
|
|
||||||
def __str__(self):
|
|
||||||
s = self.alpha3
|
|
||||||
if self.country is not None:
|
|
||||||
s += '-' + self.country
|
|
||||||
if self.script is not None:
|
|
||||||
s += '-' + self.script
|
|
||||||
return s
|
|
||||||
|
|
||||||
|
|
||||||
class LanguageReverseError(LanguageError):
|
|
||||||
"""Exception raised by converters when :meth:`~babelfish.converters.LanguageReverseConverter.reverse` fails
|
|
||||||
|
|
||||||
:param string code: code that failed reverse conversion
|
|
||||||
|
|
||||||
"""
|
|
||||||
def __init__(self, code):
|
|
||||||
self.code = code
|
|
||||||
|
|
||||||
def __str__(self):
|
|
||||||
return repr(self.code)
|
|
||||||
|
|
||||||
|
|
||||||
class CountryError(Error, AttributeError):
|
|
||||||
"""Base class for all country exceptions in babelfish"""
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
class CountryConvertError(CountryError):
|
|
||||||
"""Exception raised by converters when :meth:`~babelfish.converters.CountryConverter.convert` fails
|
|
||||||
|
|
||||||
:param string alpha2: alpha2 code that failed conversion
|
|
||||||
|
|
||||||
"""
|
|
||||||
def __init__(self, alpha2):
|
|
||||||
self.alpha2 = alpha2
|
|
||||||
|
|
||||||
def __str__(self):
|
|
||||||
return self.alpha2
|
|
||||||
|
|
||||||
|
|
||||||
class CountryReverseError(CountryError):
|
|
||||||
"""Exception raised by converters when :meth:`~babelfish.converters.CountryReverseConverter.reverse` fails
|
|
||||||
|
|
||||||
:param string code: code that failed reverse conversion
|
|
||||||
|
|
||||||
"""
|
|
||||||
def __init__(self, code):
|
|
||||||
self.code = code
|
|
||||||
|
|
||||||
def __str__(self):
|
|
||||||
return repr(self.code)
|
|
||||||
@@ -1,185 +0,0 @@
|
|||||||
# -*- coding: utf-8 -*-
|
|
||||||
#
|
|
||||||
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
|
|
||||||
# Use of this source code is governed by the 3-clause BSD license
|
|
||||||
# that can be found in the LICENSE file.
|
|
||||||
#
|
|
||||||
from __future__ import unicode_literals
|
|
||||||
from collections import namedtuple
|
|
||||||
from functools import partial
|
|
||||||
from pkg_resources import resource_stream # @UnresolvedImport
|
|
||||||
from .converters import ConverterManager
|
|
||||||
from .country import Country
|
|
||||||
from .exceptions import LanguageConvertError
|
|
||||||
from .script import Script
|
|
||||||
from . import basestr
|
|
||||||
|
|
||||||
|
|
||||||
LANGUAGES = set()
|
|
||||||
LANGUAGE_MATRIX = []
|
|
||||||
|
|
||||||
#: The namedtuple used in the :data:`LANGUAGE_MATRIX`
|
|
||||||
IsoLanguage = namedtuple('IsoLanguage', ['alpha3', 'alpha3b', 'alpha3t', 'alpha2', 'scope', 'type', 'name', 'comment'])
|
|
||||||
|
|
||||||
f = resource_stream('babelfish', 'data/iso-639-3.tab')
|
|
||||||
f.readline()
|
|
||||||
for l in f:
|
|
||||||
iso_language = IsoLanguage(*l.decode('utf-8').split('\t'))
|
|
||||||
LANGUAGES.add(iso_language.alpha3)
|
|
||||||
LANGUAGE_MATRIX.append(iso_language)
|
|
||||||
f.close()
|
|
||||||
|
|
||||||
|
|
||||||
class LanguageConverterManager(ConverterManager):
|
|
||||||
""":class:`~babelfish.converters.ConverterManager` for language converters"""
|
|
||||||
entry_point = 'babelfish.language_converters'
|
|
||||||
internal_converters = ['alpha2 = babelfish.converters.alpha2:Alpha2Converter',
|
|
||||||
'alpha3b = babelfish.converters.alpha3b:Alpha3BConverter',
|
|
||||||
'alpha3t = babelfish.converters.alpha3t:Alpha3TConverter',
|
|
||||||
'name = babelfish.converters.name:NameConverter',
|
|
||||||
'scope = babelfish.converters.scope:ScopeConverter',
|
|
||||||
'type = babelfish.converters.type:LanguageTypeConverter',
|
|
||||||
'opensubtitles = babelfish.converters.opensubtitles:OpenSubtitlesConverter']
|
|
||||||
|
|
||||||
language_converters = LanguageConverterManager()
|
|
||||||
|
|
||||||
|
|
||||||
class LanguageMeta(type):
|
|
||||||
"""The :class:`Language` metaclass
|
|
||||||
|
|
||||||
Dynamically redirect :meth:`Language.frommycode` to :meth:`Language.fromcode` with the ``mycode`` `converter`
|
|
||||||
|
|
||||||
"""
|
|
||||||
def __getattr__(cls, name):
|
|
||||||
if name.startswith('from'):
|
|
||||||
return partial(cls.fromcode, converter=name[4:])
|
|
||||||
return type.__getattribute__(cls, name)
|
|
||||||
|
|
||||||
|
|
||||||
class Language(LanguageMeta(str('LanguageBase'), (object,), {})):
|
|
||||||
"""A human language
|
|
||||||
|
|
||||||
A human language is composed of a language part following the ISO-639
|
|
||||||
standard and can be country-specific when a :class:`~babelfish.country.Country`
|
|
||||||
is specified.
|
|
||||||
|
|
||||||
The :class:`Language` is extensible with custom converters (see :ref:`custom_converters`)
|
|
||||||
|
|
||||||
:param string language: the language as a 3-letter ISO-639-3 code
|
|
||||||
:param country: the country (if any) as a 2-letter ISO-3166 code or :class:`~babelfish.country.Country` instance
|
|
||||||
:type country: string or :class:`~babelfish.country.Country` or None
|
|
||||||
:param script: the script (if any) as a 4-letter ISO-15924 code or :class:`~babelfish.script.Script` instance
|
|
||||||
:type script: string or :class:`~babelfish.script.Script` or None
|
|
||||||
:param unknown: the unknown language as a three-letters ISO-639-3 code to use as fallback
|
|
||||||
:type unknown: string or None
|
|
||||||
:raise: ValueError if the language could not be recognized and `unknown` is ``None``
|
|
||||||
|
|
||||||
"""
|
|
||||||
def __init__(self, language, country=None, script=None, unknown=None):
|
|
||||||
if unknown is not None and language not in LANGUAGES:
|
|
||||||
language = unknown
|
|
||||||
if language not in LANGUAGES:
|
|
||||||
raise ValueError('%r is not a valid language' % language)
|
|
||||||
self.alpha3 = language
|
|
||||||
self.country = None
|
|
||||||
if isinstance(country, Country):
|
|
||||||
self.country = country
|
|
||||||
elif country is None:
|
|
||||||
self.country = None
|
|
||||||
else:
|
|
||||||
self.country = Country(country)
|
|
||||||
self.script = None
|
|
||||||
if isinstance(script, Script):
|
|
||||||
self.script = script
|
|
||||||
elif script is None:
|
|
||||||
self.script = None
|
|
||||||
else:
|
|
||||||
self.script = Script(script)
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def fromcode(cls, code, converter):
|
|
||||||
"""Create a :class:`Language` by its `code` using `converter` to
|
|
||||||
:meth:`~babelfish.converters.LanguageReverseConverter.reverse` it
|
|
||||||
|
|
||||||
:param string code: the code to reverse
|
|
||||||
:param string converter: name of the :class:`~babelfish.converters.LanguageReverseConverter` to use
|
|
||||||
:return: the corresponding :class:`Language` instance
|
|
||||||
:rtype: :class:`Language`
|
|
||||||
|
|
||||||
"""
|
|
||||||
return cls(*language_converters[converter].reverse(code))
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def fromietf(cls, ietf):
|
|
||||||
"""Create a :class:`Language` by from an IETF language code
|
|
||||||
|
|
||||||
:param string ietf: the ietf code
|
|
||||||
:return: the corresponding :class:`Language` instance
|
|
||||||
:rtype: :class:`Language`
|
|
||||||
|
|
||||||
"""
|
|
||||||
subtags = ietf.split('-')
|
|
||||||
language_subtag = subtags.pop(0).lower()
|
|
||||||
if len(language_subtag) == 2:
|
|
||||||
language = cls.fromalpha2(language_subtag)
|
|
||||||
else:
|
|
||||||
language = cls(language_subtag)
|
|
||||||
while subtags:
|
|
||||||
subtag = subtags.pop(0)
|
|
||||||
if len(subtag) == 2:
|
|
||||||
language.country = Country(subtag.upper())
|
|
||||||
else:
|
|
||||||
language.script = Script(subtag.capitalize())
|
|
||||||
if language.script is not None:
|
|
||||||
if subtags:
|
|
||||||
raise ValueError('Wrong IETF format. Unmatched subtags: %r' % subtags)
|
|
||||||
break
|
|
||||||
return language
|
|
||||||
|
|
||||||
def __getstate__(self):
|
|
||||||
return self.alpha3, self.country, self.script
|
|
||||||
|
|
||||||
def __setstate__(self, state):
|
|
||||||
self.alpha3, self.country, self.script = state
|
|
||||||
|
|
||||||
def __getattr__(self, name):
|
|
||||||
alpha3 = self.alpha3
|
|
||||||
country = self.country.alpha2 if self.country is not None else None
|
|
||||||
script = self.script.code if self.script is not None else None
|
|
||||||
try:
|
|
||||||
return language_converters[name].convert(alpha3, country, script)
|
|
||||||
except KeyError:
|
|
||||||
raise AttributeError(name)
|
|
||||||
|
|
||||||
def __hash__(self):
|
|
||||||
return hash(str(self))
|
|
||||||
|
|
||||||
def __eq__(self, other):
|
|
||||||
if isinstance(other, basestr):
|
|
||||||
return str(self) == other
|
|
||||||
if not isinstance(other, Language):
|
|
||||||
return False
|
|
||||||
return (self.alpha3 == other.alpha3 and
|
|
||||||
self.country == other.country and
|
|
||||||
self.script == other.script)
|
|
||||||
|
|
||||||
def __ne__(self, other):
|
|
||||||
return not self == other
|
|
||||||
|
|
||||||
def __bool__(self):
|
|
||||||
return self.alpha3 != 'und'
|
|
||||||
__nonzero__ = __bool__
|
|
||||||
|
|
||||||
def __repr__(self):
|
|
||||||
return '<Language [%s]>' % self
|
|
||||||
|
|
||||||
def __str__(self):
|
|
||||||
try:
|
|
||||||
s = self.alpha2
|
|
||||||
except LanguageConvertError:
|
|
||||||
s = self.alpha3
|
|
||||||
if self.country is not None:
|
|
||||||
s += '-' + str(self.country)
|
|
||||||
if self.script is not None:
|
|
||||||
s += '-' + str(self.script)
|
|
||||||
return s
|
|
||||||
@@ -1,76 +0,0 @@
|
|||||||
# -*- coding: utf-8 -*-
|
|
||||||
#
|
|
||||||
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
|
|
||||||
# Use of this source code is governed by the 3-clause BSD license
|
|
||||||
# that can be found in the LICENSE file.
|
|
||||||
#
|
|
||||||
from __future__ import unicode_literals
|
|
||||||
from collections import namedtuple
|
|
||||||
from pkg_resources import resource_stream # @UnresolvedImport
|
|
||||||
from . import basestr
|
|
||||||
|
|
||||||
#: Script code to script name mapping
|
|
||||||
SCRIPTS = {}
|
|
||||||
|
|
||||||
#: List of countries in the ISO-15924 as namedtuple of code, number, name, french_name, pva and date
|
|
||||||
SCRIPT_MATRIX = []
|
|
||||||
|
|
||||||
#: The namedtuple used in the :data:`SCRIPT_MATRIX`
|
|
||||||
IsoScript = namedtuple('IsoScript', ['code', 'number', 'name', 'french_name', 'pva', 'date'])
|
|
||||||
|
|
||||||
f = resource_stream('babelfish', 'data/iso15924-utf8-20131012.txt')
|
|
||||||
f.readline()
|
|
||||||
for l in f:
|
|
||||||
l = l.decode('utf-8').strip()
|
|
||||||
if not l or l.startswith('#'):
|
|
||||||
continue
|
|
||||||
script = IsoScript._make(l.split(';'))
|
|
||||||
SCRIPT_MATRIX.append(script)
|
|
||||||
SCRIPTS[script.code] = script.name
|
|
||||||
f.close()
|
|
||||||
|
|
||||||
|
|
||||||
class Script(object):
|
|
||||||
"""A human writing system
|
|
||||||
|
|
||||||
A script is represented by a 4-letter code from the ISO-15924 standard
|
|
||||||
|
|
||||||
:param string script: 4-letter ISO-15924 script code
|
|
||||||
|
|
||||||
"""
|
|
||||||
def __init__(self, script):
|
|
||||||
if script not in SCRIPTS:
|
|
||||||
raise ValueError('%r is not a valid script' % script)
|
|
||||||
|
|
||||||
#: ISO-15924 4-letter script code
|
|
||||||
self.code = script
|
|
||||||
|
|
||||||
@property
|
|
||||||
def name(self):
|
|
||||||
"""English name of the script"""
|
|
||||||
return SCRIPTS[self.code]
|
|
||||||
|
|
||||||
def __getstate__(self):
|
|
||||||
return self.code
|
|
||||||
|
|
||||||
def __setstate__(self, state):
|
|
||||||
self.code = state
|
|
||||||
|
|
||||||
def __hash__(self):
|
|
||||||
return hash(self.code)
|
|
||||||
|
|
||||||
def __eq__(self, other):
|
|
||||||
if isinstance(other, basestr):
|
|
||||||
return self.code == other
|
|
||||||
if not isinstance(other, Script):
|
|
||||||
return False
|
|
||||||
return self.code == other.code
|
|
||||||
|
|
||||||
def __ne__(self, other):
|
|
||||||
return not self == other
|
|
||||||
|
|
||||||
def __repr__(self):
|
|
||||||
return '<Script [%s]>' % self
|
|
||||||
|
|
||||||
def __str__(self):
|
|
||||||
return self.code
|
|
||||||
@@ -1,377 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
#
|
|
||||||
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
|
|
||||||
# Use of this source code is governed by the 3-clause BSD license
|
|
||||||
# that can be found in the LICENSE file.
|
|
||||||
#
|
|
||||||
from __future__ import unicode_literals
|
|
||||||
import re
|
|
||||||
import sys
|
|
||||||
import pickle
|
|
||||||
from unittest import TestCase, TestSuite, TestLoader, TextTestRunner
|
|
||||||
from pkg_resources import resource_stream # @UnresolvedImport
|
|
||||||
from babelfish import (LANGUAGES, Language, Country, Script, language_converters, country_converters,
|
|
||||||
LanguageReverseConverter, LanguageConvertError, LanguageReverseError, CountryReverseError)
|
|
||||||
|
|
||||||
|
|
||||||
if sys.version_info[:2] <= (2, 6):
|
|
||||||
_MAX_LENGTH = 80
|
|
||||||
|
|
||||||
def safe_repr(obj, short=False):
|
|
||||||
try:
|
|
||||||
result = repr(obj)
|
|
||||||
except Exception:
|
|
||||||
result = object.__repr__(obj)
|
|
||||||
if not short or len(result) < _MAX_LENGTH:
|
|
||||||
return result
|
|
||||||
return result[:_MAX_LENGTH] + ' [truncated]...'
|
|
||||||
|
|
||||||
class _AssertRaisesContext(object):
|
|
||||||
"""A context manager used to implement TestCase.assertRaises* methods."""
|
|
||||||
|
|
||||||
def __init__(self, expected, test_case, expected_regexp=None):
|
|
||||||
self.expected = expected
|
|
||||||
self.failureException = test_case.failureException
|
|
||||||
self.expected_regexp = expected_regexp
|
|
||||||
|
|
||||||
def __enter__(self):
|
|
||||||
return self
|
|
||||||
|
|
||||||
def __exit__(self, exc_type, exc_value, tb):
|
|
||||||
if exc_type is None:
|
|
||||||
try:
|
|
||||||
exc_name = self.expected.__name__
|
|
||||||
except AttributeError:
|
|
||||||
exc_name = str(self.expected)
|
|
||||||
raise self.failureException(
|
|
||||||
"{0} not raised".format(exc_name))
|
|
||||||
if not issubclass(exc_type, self.expected):
|
|
||||||
# let unexpected exceptions pass through
|
|
||||||
return False
|
|
||||||
self.exception = exc_value # store for later retrieval
|
|
||||||
if self.expected_regexp is None:
|
|
||||||
return True
|
|
||||||
|
|
||||||
expected_regexp = self.expected_regexp
|
|
||||||
if isinstance(expected_regexp, basestring):
|
|
||||||
expected_regexp = re.compile(expected_regexp)
|
|
||||||
if not expected_regexp.search(str(exc_value)):
|
|
||||||
raise self.failureException('"%s" does not match "%s"' %
|
|
||||||
(expected_regexp.pattern, str(exc_value)))
|
|
||||||
return True
|
|
||||||
|
|
||||||
class _Py26FixTestCase(object):
|
|
||||||
def assertIsNone(self, obj, msg=None):
|
|
||||||
"""Same as self.assertTrue(obj is None), with a nicer default message."""
|
|
||||||
if obj is not None:
|
|
||||||
standardMsg = '%s is not None' % (safe_repr(obj),)
|
|
||||||
self.fail(self._formatMessage(msg, standardMsg))
|
|
||||||
|
|
||||||
def assertIsNotNone(self, obj, msg=None):
|
|
||||||
"""Included for symmetry with assertIsNone."""
|
|
||||||
if obj is None:
|
|
||||||
standardMsg = 'unexpectedly None'
|
|
||||||
self.fail(self._formatMessage(msg, standardMsg))
|
|
||||||
|
|
||||||
def assertIn(self, member, container, msg=None):
|
|
||||||
"""Just like self.assertTrue(a in b), but with a nicer default message."""
|
|
||||||
if member not in container:
|
|
||||||
standardMsg = '%s not found in %s' % (safe_repr(member),
|
|
||||||
safe_repr(container))
|
|
||||||
self.fail(self._formatMessage(msg, standardMsg))
|
|
||||||
|
|
||||||
def assertNotIn(self, member, container, msg=None):
|
|
||||||
"""Just like self.assertTrue(a not in b), but with a nicer default message."""
|
|
||||||
if member in container:
|
|
||||||
standardMsg = '%s unexpectedly found in %s' % (safe_repr(member),
|
|
||||||
safe_repr(container))
|
|
||||||
self.fail(self._formatMessage(msg, standardMsg))
|
|
||||||
|
|
||||||
def assertIs(self, expr1, expr2, msg=None):
|
|
||||||
"""Just like self.assertTrue(a is b), but with a nicer default message."""
|
|
||||||
if expr1 is not expr2:
|
|
||||||
standardMsg = '%s is not %s' % (safe_repr(expr1),
|
|
||||||
safe_repr(expr2))
|
|
||||||
self.fail(self._formatMessage(msg, standardMsg))
|
|
||||||
|
|
||||||
def assertIsNot(self, expr1, expr2, msg=None):
|
|
||||||
"""Just like self.assertTrue(a is not b), but with a nicer default message."""
|
|
||||||
if expr1 is expr2:
|
|
||||||
standardMsg = 'unexpectedly identical: %s' % (safe_repr(expr1),)
|
|
||||||
self.fail(self._formatMessage(msg, standardMsg))
|
|
||||||
|
|
||||||
else:
|
|
||||||
class _Py26FixTestCase(object):
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
class TestScript(TestCase, _Py26FixTestCase):
|
|
||||||
def test_wrong_script(self):
|
|
||||||
self.assertRaises(ValueError, lambda: Script('Azer'))
|
|
||||||
|
|
||||||
def test_eq(self):
|
|
||||||
self.assertEqual(Script('Latn'), Script('Latn'))
|
|
||||||
|
|
||||||
def test_ne(self):
|
|
||||||
self.assertNotEqual(Script('Cyrl'), Script('Latn'))
|
|
||||||
|
|
||||||
def test_hash(self):
|
|
||||||
self.assertEqual(hash(Script('Hira')), hash('Hira'))
|
|
||||||
|
|
||||||
def test_pickle(self):
|
|
||||||
self.assertEqual(pickle.loads(pickle.dumps(Script('Latn'))), Script('Latn'))
|
|
||||||
|
|
||||||
|
|
||||||
class TestCountry(TestCase, _Py26FixTestCase):
|
|
||||||
def test_wrong_country(self):
|
|
||||||
self.assertRaises(ValueError, lambda: Country('ZZ'))
|
|
||||||
|
|
||||||
def test_eq(self):
|
|
||||||
self.assertEqual(Country('US'), Country('US'))
|
|
||||||
|
|
||||||
def test_ne(self):
|
|
||||||
self.assertNotEqual(Country('GB'), Country('US'))
|
|
||||||
self.assertIsNotNone(Country('US'))
|
|
||||||
|
|
||||||
def test_hash(self):
|
|
||||||
self.assertEqual(hash(Country('US')), hash('US'))
|
|
||||||
|
|
||||||
def test_pickle(self):
|
|
||||||
for country in [Country('GB'), Country('US')]:
|
|
||||||
self.assertEqual(pickle.loads(pickle.dumps(country)), country)
|
|
||||||
|
|
||||||
def test_converter_name(self):
|
|
||||||
self.assertEqual(Country('US').name, 'UNITED STATES')
|
|
||||||
self.assertEqual(Country.fromname('UNITED STATES'), Country('US'))
|
|
||||||
self.assertEqual(Country.fromcode('UNITED STATES', 'name'), Country('US'))
|
|
||||||
self.assertRaises(CountryReverseError, lambda: Country.fromname('ZZZZZ'))
|
|
||||||
self.assertEqual(len(country_converters['name'].codes), 249)
|
|
||||||
|
|
||||||
|
|
||||||
class TestLanguage(TestCase, _Py26FixTestCase):
|
|
||||||
def test_languages(self):
|
|
||||||
self.assertEqual(len(LANGUAGES), 7874)
|
|
||||||
|
|
||||||
def test_wrong_language(self):
|
|
||||||
self.assertRaises(ValueError, lambda: Language('zzz'))
|
|
||||||
|
|
||||||
def test_unknown_language(self):
|
|
||||||
self.assertEqual(Language('zzzz', unknown='und'), Language('und'))
|
|
||||||
|
|
||||||
def test_converter_alpha2(self):
|
|
||||||
self.assertEqual(Language('eng').alpha2, 'en')
|
|
||||||
self.assertEqual(Language.fromalpha2('en'), Language('eng'))
|
|
||||||
self.assertEqual(Language.fromcode('en', 'alpha2'), Language('eng'))
|
|
||||||
self.assertRaises(LanguageReverseError, lambda: Language.fromalpha2('zz'))
|
|
||||||
self.assertRaises(LanguageConvertError, lambda: Language('aaa').alpha2)
|
|
||||||
self.assertEqual(len(language_converters['alpha2'].codes), 184)
|
|
||||||
|
|
||||||
def test_converter_alpha3b(self):
|
|
||||||
self.assertEqual(Language('fra').alpha3b, 'fre')
|
|
||||||
self.assertEqual(Language.fromalpha3b('fre'), Language('fra'))
|
|
||||||
self.assertEqual(Language.fromcode('fre', 'alpha3b'), Language('fra'))
|
|
||||||
self.assertRaises(LanguageReverseError, lambda: Language.fromalpha3b('zzz'))
|
|
||||||
self.assertRaises(LanguageConvertError, lambda: Language('aaa').alpha3b)
|
|
||||||
self.assertEqual(len(language_converters['alpha3b'].codes), 418)
|
|
||||||
|
|
||||||
def test_converter_alpha3t(self):
|
|
||||||
self.assertEqual(Language('fra').alpha3t, 'fra')
|
|
||||||
self.assertEqual(Language.fromalpha3t('fra'), Language('fra'))
|
|
||||||
self.assertEqual(Language.fromcode('fra', 'alpha3t'), Language('fra'))
|
|
||||||
self.assertRaises(LanguageReverseError, lambda: Language.fromalpha3t('zzz'))
|
|
||||||
self.assertRaises(LanguageConvertError, lambda: Language('aaa').alpha3t)
|
|
||||||
self.assertEqual(len(language_converters['alpha3t'].codes), 418)
|
|
||||||
|
|
||||||
def test_converter_name(self):
|
|
||||||
self.assertEqual(Language('eng').name, 'English')
|
|
||||||
self.assertEqual(Language.fromname('English'), Language('eng'))
|
|
||||||
self.assertEqual(Language.fromcode('English', 'name'), Language('eng'))
|
|
||||||
self.assertRaises(LanguageReverseError, lambda: Language.fromname('Zzzzzzzzz'))
|
|
||||||
self.assertEqual(len(language_converters['name'].codes), 7874)
|
|
||||||
|
|
||||||
def test_converter_scope(self):
|
|
||||||
self.assertEqual(language_converters['scope'].codes, set(['I', 'S', 'M']))
|
|
||||||
self.assertEqual(Language('eng').scope, 'individual')
|
|
||||||
self.assertEqual(Language('und').scope, 'special')
|
|
||||||
|
|
||||||
def test_converter_type(self):
|
|
||||||
self.assertEqual(language_converters['type'].codes, set(['A', 'C', 'E', 'H', 'L', 'S']))
|
|
||||||
self.assertEqual(Language('eng').type, 'living')
|
|
||||||
self.assertEqual(Language('und').type, 'special')
|
|
||||||
|
|
||||||
def test_converter_opensubtitles(self):
|
|
||||||
self.assertEqual(Language('fra').opensubtitles, Language('fra').alpha3b)
|
|
||||||
self.assertEqual(Language('por', 'BR').opensubtitles, 'pob')
|
|
||||||
self.assertEqual(Language.fromopensubtitles('fre'), Language('fra'))
|
|
||||||
self.assertEqual(Language.fromopensubtitles('pob'), Language('por', 'BR'))
|
|
||||||
self.assertEqual(Language.fromopensubtitles('pb'), Language('por', 'BR'))
|
|
||||||
# Montenegrin is not recognized as an ISO language (yet?) but for now it is
|
|
||||||
# unofficially accepted as Serbian from Montenegro
|
|
||||||
self.assertEqual(Language.fromopensubtitles('mne'), Language('srp', 'ME'))
|
|
||||||
self.assertEqual(Language.fromcode('pob', 'opensubtitles'), Language('por', 'BR'))
|
|
||||||
self.assertRaises(LanguageReverseError, lambda: Language.fromopensubtitles('zzz'))
|
|
||||||
self.assertRaises(LanguageConvertError, lambda: Language('aaa').opensubtitles)
|
|
||||||
self.assertEqual(len(language_converters['opensubtitles'].codes), 607)
|
|
||||||
|
|
||||||
# test with all the LANGUAGES from the opensubtitles api
|
|
||||||
# downloaded from: http://www.opensubtitles.org/addons/export_languages.php
|
|
||||||
f = resource_stream('babelfish', 'data/opensubtitles_languages.txt')
|
|
||||||
f.readline()
|
|
||||||
for l in f:
|
|
||||||
idlang, alpha2, _, upload_enabled, web_enabled = l.decode('utf-8').strip().split('\t')
|
|
||||||
if not int(upload_enabled) and not int(web_enabled):
|
|
||||||
# do not test LANGUAGES that are too esoteric / not widely available
|
|
||||||
continue
|
|
||||||
self.assertEqual(Language.fromopensubtitles(idlang).opensubtitles, idlang)
|
|
||||||
if alpha2:
|
|
||||||
self.assertEqual(Language.fromopensubtitles(idlang), Language.fromopensubtitles(alpha2))
|
|
||||||
f.close()
|
|
||||||
|
|
||||||
def test_converter_opensubtitles_codes(self):
|
|
||||||
for code in language_converters['opensubtitles'].from_opensubtitles.keys():
|
|
||||||
self.assertIn(code, language_converters['opensubtitles'].codes)
|
|
||||||
|
|
||||||
def test_fromietf_country_script(self):
|
|
||||||
language = Language.fromietf('fra-FR-Latn')
|
|
||||||
self.assertEqual(language.alpha3, 'fra')
|
|
||||||
self.assertEqual(language.country, Country('FR'))
|
|
||||||
self.assertEqual(language.script, Script('Latn'))
|
|
||||||
|
|
||||||
def test_fromietf_country_no_script(self):
|
|
||||||
language = Language.fromietf('fra-FR')
|
|
||||||
self.assertEqual(language.alpha3, 'fra')
|
|
||||||
self.assertEqual(language.country, Country('FR'))
|
|
||||||
self.assertIsNone(language.script)
|
|
||||||
|
|
||||||
def test_fromietf_no_country_no_script(self):
|
|
||||||
language = Language.fromietf('fra-FR')
|
|
||||||
self.assertEqual(language.alpha3, 'fra')
|
|
||||||
self.assertEqual(language.country, Country('FR'))
|
|
||||||
self.assertIsNone(language.script)
|
|
||||||
|
|
||||||
def test_fromietf_no_country_script(self):
|
|
||||||
language = Language.fromietf('fra-Latn')
|
|
||||||
self.assertEqual(language.alpha3, 'fra')
|
|
||||||
self.assertIsNone(language.country)
|
|
||||||
self.assertEqual(language.script, Script('Latn'))
|
|
||||||
|
|
||||||
def test_fromietf_alpha2_language(self):
|
|
||||||
language = Language.fromietf('fr-Latn')
|
|
||||||
self.assertEqual(language.alpha3, 'fra')
|
|
||||||
self.assertIsNone(language.country)
|
|
||||||
self.assertEqual(language.script, Script('Latn'))
|
|
||||||
|
|
||||||
def test_fromietf_wrong_language(self):
|
|
||||||
self.assertRaises(ValueError, lambda: Language.fromietf('xyz-FR'))
|
|
||||||
|
|
||||||
def test_fromietf_wrong_country(self):
|
|
||||||
self.assertRaises(ValueError, lambda: Language.fromietf('fra-YZ'))
|
|
||||||
|
|
||||||
def test_fromietf_wrong_script(self):
|
|
||||||
self.assertRaises(ValueError, lambda: Language.fromietf('fra-FR-Wxyz'))
|
|
||||||
|
|
||||||
def test_eq(self):
|
|
||||||
self.assertEqual(Language('eng'), Language('eng'))
|
|
||||||
|
|
||||||
def test_ne(self):
|
|
||||||
self.assertNotEqual(Language('fra'), Language('eng'))
|
|
||||||
self.assertIsNotNone(Language('fra'))
|
|
||||||
|
|
||||||
def test_nonzero(self):
|
|
||||||
self.assertFalse(bool(Language('und')))
|
|
||||||
self.assertTrue(bool(Language('eng')))
|
|
||||||
|
|
||||||
def test_language_hasattr(self):
|
|
||||||
self.assertTrue(hasattr(Language('fra'), 'alpha3'))
|
|
||||||
self.assertTrue(hasattr(Language('fra'), 'alpha2'))
|
|
||||||
self.assertFalse(hasattr(Language('bej'), 'alpha2'))
|
|
||||||
|
|
||||||
def test_country_hasattr(self):
|
|
||||||
self.assertTrue(hasattr(Country('US'), 'name'))
|
|
||||||
self.assertTrue(hasattr(Country('FR'), 'alpha2'))
|
|
||||||
self.assertFalse(hasattr(Country('BE'), 'none'))
|
|
||||||
|
|
||||||
def test_country(self):
|
|
||||||
self.assertEqual(Language('por', 'BR').country, Country('BR'))
|
|
||||||
self.assertEqual(Language('eng', Country('US')).country, Country('US'))
|
|
||||||
|
|
||||||
def test_eq_with_country(self):
|
|
||||||
self.assertEqual(Language('eng', 'US'), Language('eng', Country('US')))
|
|
||||||
|
|
||||||
def test_ne_with_country(self):
|
|
||||||
self.assertNotEqual(Language('eng', 'US'), Language('eng', Country('GB')))
|
|
||||||
|
|
||||||
def test_script(self):
|
|
||||||
self.assertEqual(Language('srp', script='Latn').script, Script('Latn'))
|
|
||||||
self.assertEqual(Language('srp', script=Script('Cyrl')).script, Script('Cyrl'))
|
|
||||||
|
|
||||||
def test_eq_with_script(self):
|
|
||||||
self.assertEqual(Language('srp', script='Latn'), Language('srp', script=Script('Latn')))
|
|
||||||
|
|
||||||
def test_ne_with_script(self):
|
|
||||||
self.assertNotEqual(Language('srp', script='Latn'), Language('srp', script=Script('Cyrl')))
|
|
||||||
|
|
||||||
def test_eq_with_country_and_script(self):
|
|
||||||
self.assertEqual(Language('srp', 'SR', 'Latn'), Language('srp', Country('SR'), Script('Latn')))
|
|
||||||
|
|
||||||
def test_ne_with_country_and_script(self):
|
|
||||||
self.assertNotEqual(Language('srp', 'SR', 'Latn'), Language('srp', Country('SR'), Script('Cyrl')))
|
|
||||||
|
|
||||||
def test_hash(self):
|
|
||||||
self.assertEqual(hash(Language('fra')), hash('fr'))
|
|
||||||
self.assertEqual(hash(Language('ace')), hash('ace'))
|
|
||||||
self.assertEqual(hash(Language('por', 'BR')), hash('pt-BR'))
|
|
||||||
self.assertEqual(hash(Language('srp', script='Cyrl')), hash('sr-Cyrl'))
|
|
||||||
self.assertEqual(hash(Language('eng', 'US', 'Latn')), hash('en-US-Latn'))
|
|
||||||
|
|
||||||
def test_pickle(self):
|
|
||||||
for lang in [Language('fra'),
|
|
||||||
Language('eng', 'US'),
|
|
||||||
Language('srp', script='Latn'),
|
|
||||||
Language('eng', 'US', 'Latn')]:
|
|
||||||
self.assertEqual(pickle.loads(pickle.dumps(lang)), lang)
|
|
||||||
|
|
||||||
def test_str(self):
|
|
||||||
self.assertEqual(Language.fromietf(str(Language('eng', 'US', 'Latn'))), Language('eng', 'US', 'Latn'))
|
|
||||||
self.assertEqual(Language.fromietf(str(Language('fra', 'FR'))), Language('fra', 'FR'))
|
|
||||||
self.assertEqual(Language.fromietf(str(Language('bel'))), Language('bel'))
|
|
||||||
|
|
||||||
def test_register_converter(self):
|
|
||||||
class TestConverter(LanguageReverseConverter):
|
|
||||||
def __init__(self):
|
|
||||||
self.to_test = {'fra': 'test1', 'eng': 'test2'}
|
|
||||||
self.from_test = {'test1': 'fra', 'test2': 'eng'}
|
|
||||||
|
|
||||||
def convert(self, alpha3, country=None, script=None):
|
|
||||||
if alpha3 not in self.to_test:
|
|
||||||
raise LanguageConvertError(alpha3, country, script)
|
|
||||||
return self.to_test[alpha3]
|
|
||||||
|
|
||||||
def reverse(self, test):
|
|
||||||
if test not in self.from_test:
|
|
||||||
raise LanguageReverseError(test)
|
|
||||||
return (self.from_test[test], None)
|
|
||||||
language = Language('fra')
|
|
||||||
self.assertFalse(hasattr(language, 'test'))
|
|
||||||
language_converters['test'] = TestConverter()
|
|
||||||
self.assertTrue(hasattr(language, 'test'))
|
|
||||||
self.assertIn('test', language_converters)
|
|
||||||
self.assertEqual(Language('fra').test, 'test1')
|
|
||||||
self.assertEqual(Language.fromtest('test2').alpha3, 'eng')
|
|
||||||
del language_converters['test']
|
|
||||||
self.assertNotIn('test', language_converters)
|
|
||||||
self.assertRaises(KeyError, lambda: Language.fromtest('test1'))
|
|
||||||
self.assertRaises(AttributeError, lambda: Language('fra').test)
|
|
||||||
|
|
||||||
|
|
||||||
def suite():
|
|
||||||
suite = TestSuite()
|
|
||||||
suite.addTest(TestLoader().loadTestsFromTestCase(TestScript))
|
|
||||||
suite.addTest(TestLoader().loadTestsFromTestCase(TestCountry))
|
|
||||||
suite.addTest(TestLoader().loadTestsFromTestCase(TestLanguage))
|
|
||||||
return suite
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
TextTestRunner().run(suite())
|
|
||||||
@@ -1,14 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
"""
|
|
||||||
Extracts as much information as possible from a video file.
|
|
||||||
"""
|
|
||||||
from . import monkeypatch as _monkeypatch
|
|
||||||
|
|
||||||
from .api import guessit, GuessItApi
|
|
||||||
from .options import ConfigurationException
|
|
||||||
from .rules.common.quantity import Size
|
|
||||||
|
|
||||||
from .__version__ import __version__
|
|
||||||
|
|
||||||
_monkeypatch.monkeypatch_rebulk()
|
|
||||||
@@ -1,180 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
"""
|
|
||||||
Entry point module
|
|
||||||
"""
|
|
||||||
# pragma: no cover
|
|
||||||
from __future__ import print_function
|
|
||||||
|
|
||||||
import json
|
|
||||||
import logging
|
|
||||||
import os
|
|
||||||
import sys
|
|
||||||
|
|
||||||
import six
|
|
||||||
from rebulk.__version__ import __version__ as __rebulk_version__
|
|
||||||
|
|
||||||
from guessit import api
|
|
||||||
from guessit.__version__ import __version__
|
|
||||||
from guessit.jsonutils import GuessitEncoder
|
|
||||||
from guessit.options import argument_parser, parse_options, load_config, merge_options
|
|
||||||
|
|
||||||
|
|
||||||
try:
|
|
||||||
from collections import OrderedDict
|
|
||||||
except ImportError: # pragma: no-cover
|
|
||||||
from ordereddict import OrderedDict # pylint:disable=import-error
|
|
||||||
|
|
||||||
|
|
||||||
def guess_filename(filename, options):
|
|
||||||
"""
|
|
||||||
Guess a single filename using given options
|
|
||||||
:param filename: filename to parse
|
|
||||||
:type filename: str
|
|
||||||
:param options:
|
|
||||||
:type options: dict
|
|
||||||
:return:
|
|
||||||
:rtype:
|
|
||||||
"""
|
|
||||||
if not options.get('yaml') and not options.get('json') and not options.get('show_property'):
|
|
||||||
print('For:', filename)
|
|
||||||
|
|
||||||
guess = api.guessit(filename, options)
|
|
||||||
|
|
||||||
if options.get('show_property'):
|
|
||||||
print(guess.get(options.get('show_property'), ''))
|
|
||||||
return
|
|
||||||
|
|
||||||
if options.get('json'):
|
|
||||||
print(json.dumps(guess, cls=GuessitEncoder, ensure_ascii=False))
|
|
||||||
elif options.get('yaml'):
|
|
||||||
import yaml
|
|
||||||
from guessit import yamlutils
|
|
||||||
|
|
||||||
ystr = yaml.dump({filename: OrderedDict(guess)}, Dumper=yamlutils.CustomDumper, default_flow_style=False,
|
|
||||||
allow_unicode=True)
|
|
||||||
i = 0
|
|
||||||
for yline in ystr.splitlines():
|
|
||||||
if i == 0:
|
|
||||||
print("? " + yline[:-1])
|
|
||||||
elif i == 1:
|
|
||||||
print(":" + yline[1:])
|
|
||||||
else:
|
|
||||||
print(yline)
|
|
||||||
i += 1
|
|
||||||
else:
|
|
||||||
print('GuessIt found:', json.dumps(guess, cls=GuessitEncoder, indent=4, ensure_ascii=False))
|
|
||||||
|
|
||||||
|
|
||||||
def display_properties(options):
|
|
||||||
"""
|
|
||||||
Display properties
|
|
||||||
"""
|
|
||||||
properties = api.properties(options)
|
|
||||||
|
|
||||||
if options.get('json'):
|
|
||||||
if options.get('values'):
|
|
||||||
print(json.dumps(properties, cls=GuessitEncoder, ensure_ascii=False))
|
|
||||||
else:
|
|
||||||
print(json.dumps(list(properties.keys()), cls=GuessitEncoder, ensure_ascii=False))
|
|
||||||
elif options.get('yaml'):
|
|
||||||
import yaml
|
|
||||||
from guessit import yamlutils
|
|
||||||
if options.get('values'):
|
|
||||||
print(yaml.dump(properties, Dumper=yamlutils.CustomDumper, default_flow_style=False, allow_unicode=True))
|
|
||||||
else:
|
|
||||||
print(yaml.dump(list(properties.keys()), Dumper=yamlutils.CustomDumper, default_flow_style=False,
|
|
||||||
allow_unicode=True))
|
|
||||||
else:
|
|
||||||
print('GuessIt properties:')
|
|
||||||
|
|
||||||
properties_list = list(sorted(properties.keys()))
|
|
||||||
for property_name in properties_list:
|
|
||||||
property_values = properties.get(property_name)
|
|
||||||
print(2 * ' ' + '[+] %s' % (property_name,))
|
|
||||||
if property_values and options.get('values'):
|
|
||||||
for property_value in property_values:
|
|
||||||
print(4 * ' ' + '[!] %s' % (property_value,))
|
|
||||||
|
|
||||||
|
|
||||||
def fix_argv_encoding():
|
|
||||||
"""
|
|
||||||
Fix encoding of sys.argv on windows Python 2
|
|
||||||
"""
|
|
||||||
if six.PY2 and os.name == 'nt': # pragma: no cover
|
|
||||||
# see http://bugs.python.org/issue2128
|
|
||||||
import locale
|
|
||||||
|
|
||||||
for i, j in enumerate(sys.argv):
|
|
||||||
sys.argv[i] = j.decode(locale.getpreferredencoding())
|
|
||||||
|
|
||||||
|
|
||||||
def main(args=None): # pylint:disable=too-many-branches
|
|
||||||
"""
|
|
||||||
Main function for entry point
|
|
||||||
"""
|
|
||||||
fix_argv_encoding()
|
|
||||||
|
|
||||||
if args is None: # pragma: no cover
|
|
||||||
options = parse_options()
|
|
||||||
else:
|
|
||||||
options = parse_options(args)
|
|
||||||
|
|
||||||
config = load_config(options)
|
|
||||||
options = merge_options(config, options)
|
|
||||||
|
|
||||||
if options.get('verbose'):
|
|
||||||
logging.basicConfig(stream=sys.stdout, format='%(message)s')
|
|
||||||
logging.getLogger().setLevel(logging.DEBUG)
|
|
||||||
|
|
||||||
help_required = True
|
|
||||||
|
|
||||||
if options.get('version'):
|
|
||||||
print('+-------------------------------------------------------+')
|
|
||||||
print('+ GuessIt ' + __version__ + (28 - len(__version__)) * ' ' + '+')
|
|
||||||
print('+-------------------------------------------------------+')
|
|
||||||
print('+ Rebulk ' + __rebulk_version__ + (29 - len(__rebulk_version__)) * ' ' + '+')
|
|
||||||
print('+-------------------------------------------------------+')
|
|
||||||
print('| Please report any bug or feature request at |')
|
|
||||||
print('| https://github.com/guessit-io/guessit/issues. |')
|
|
||||||
print('+-------------------------------------------------------+')
|
|
||||||
help_required = False
|
|
||||||
|
|
||||||
if options.get('yaml'):
|
|
||||||
try:
|
|
||||||
import yaml # pylint:disable=unused-variable,unused-import
|
|
||||||
except ImportError: # pragma: no cover
|
|
||||||
del options['yaml']
|
|
||||||
print('PyYAML is not installed. \'--yaml\' option will be ignored ...', file=sys.stderr)
|
|
||||||
|
|
||||||
if options.get('properties') or options.get('values'):
|
|
||||||
display_properties(options)
|
|
||||||
help_required = False
|
|
||||||
|
|
||||||
filenames = []
|
|
||||||
if options.get('filename'):
|
|
||||||
for filename in options.get('filename'):
|
|
||||||
filenames.append(filename)
|
|
||||||
if options.get('input_file'):
|
|
||||||
if six.PY2:
|
|
||||||
input_file = open(options.get('input_file'), 'r')
|
|
||||||
else:
|
|
||||||
input_file = open(options.get('input_file'), 'r', encoding='utf-8')
|
|
||||||
try:
|
|
||||||
filenames.extend([line.strip() for line in input_file.readlines()])
|
|
||||||
finally:
|
|
||||||
input_file.close()
|
|
||||||
|
|
||||||
filenames = list(filter(lambda f: f, filenames))
|
|
||||||
|
|
||||||
if filenames:
|
|
||||||
for filename in filenames:
|
|
||||||
help_required = False
|
|
||||||
guess_filename(filename, options)
|
|
||||||
|
|
||||||
if help_required: # pragma: no cover
|
|
||||||
argument_parser.print_help()
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__': # pragma: no cover
|
|
||||||
main()
|
|
||||||
@@ -1,7 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
"""
|
|
||||||
Version module
|
|
||||||
"""
|
|
||||||
# pragma: no cover
|
|
||||||
__version__ = '3.1.1.dev0'
|
|
||||||
@@ -1,263 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
"""
|
|
||||||
API functions that can be used by external software
|
|
||||||
"""
|
|
||||||
|
|
||||||
try:
|
|
||||||
from collections import OrderedDict
|
|
||||||
except ImportError: # pragma: no-cover
|
|
||||||
from ordereddict import OrderedDict # pylint:disable=import-error
|
|
||||||
|
|
||||||
import os
|
|
||||||
import traceback
|
|
||||||
|
|
||||||
import six
|
|
||||||
from rebulk.introspector import introspect
|
|
||||||
|
|
||||||
from .__version__ import __version__
|
|
||||||
from .options import parse_options, load_config, merge_options
|
|
||||||
from .rules import rebulk_builder
|
|
||||||
|
|
||||||
|
|
||||||
class GuessitException(Exception):
|
|
||||||
"""
|
|
||||||
Exception raised when guessit fails to perform a guess because of an internal error.
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self, string, options):
|
|
||||||
super(GuessitException, self).__init__("An internal error has occured in guessit.\n"
|
|
||||||
"===================== Guessit Exception Report =====================\n"
|
|
||||||
"version=%s\n"
|
|
||||||
"string=%s\n"
|
|
||||||
"options=%s\n"
|
|
||||||
"--------------------------------------------------------------------\n"
|
|
||||||
"%s"
|
|
||||||
"--------------------------------------------------------------------\n"
|
|
||||||
"Please report at "
|
|
||||||
"https://github.com/guessit-io/guessit/issues.\n"
|
|
||||||
"====================================================================" %
|
|
||||||
(__version__, str(string), str(options), traceback.format_exc()))
|
|
||||||
|
|
||||||
self.string = string
|
|
||||||
self.options = options
|
|
||||||
|
|
||||||
|
|
||||||
def configure(options=None, rules_builder=rebulk_builder, force=False):
|
|
||||||
"""
|
|
||||||
Load configuration files and initialize rebulk rules if required.
|
|
||||||
|
|
||||||
:param options:
|
|
||||||
:type options: dict
|
|
||||||
:param rules_builder:
|
|
||||||
:type rules_builder:
|
|
||||||
:param force:
|
|
||||||
:type force: bool
|
|
||||||
:return:
|
|
||||||
"""
|
|
||||||
default_api.configure(options, rules_builder=rules_builder, force=force)
|
|
||||||
|
|
||||||
|
|
||||||
def guessit(string, options=None):
|
|
||||||
"""
|
|
||||||
Retrieves all matches from string as a dict
|
|
||||||
:param string: the filename or release name
|
|
||||||
:type string: str
|
|
||||||
:param options:
|
|
||||||
:type options: str|dict
|
|
||||||
:return:
|
|
||||||
:rtype:
|
|
||||||
"""
|
|
||||||
return default_api.guessit(string, options)
|
|
||||||
|
|
||||||
|
|
||||||
def properties(options=None):
|
|
||||||
"""
|
|
||||||
Retrieves all properties with possible values that can be guessed
|
|
||||||
:param options:
|
|
||||||
:type options: str|dict
|
|
||||||
:return:
|
|
||||||
:rtype:
|
|
||||||
"""
|
|
||||||
return default_api.properties(options)
|
|
||||||
|
|
||||||
|
|
||||||
def suggested_expected(titles, options=None):
|
|
||||||
"""
|
|
||||||
Return a list of suggested titles to be used as `expected_title` based on the list of titles
|
|
||||||
:param titles: the filename or release name
|
|
||||||
:type titles: list|set|dict
|
|
||||||
:param options:
|
|
||||||
:type options: str|dict
|
|
||||||
:return:
|
|
||||||
:rtype: list of str
|
|
||||||
"""
|
|
||||||
return default_api.suggested_expected(titles, options)
|
|
||||||
|
|
||||||
|
|
||||||
class GuessItApi(object):
|
|
||||||
"""
|
|
||||||
An api class that can be configured with custom Rebulk configuration.
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self):
|
|
||||||
"""Default constructor."""
|
|
||||||
self.rebulk = None
|
|
||||||
self.config = None
|
|
||||||
self.load_config_options = None
|
|
||||||
self.advanced_config = None
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def _fix_encoding(cls, value):
|
|
||||||
if isinstance(value, list):
|
|
||||||
return [cls._fix_encoding(item) for item in value]
|
|
||||||
if isinstance(value, dict):
|
|
||||||
return {cls._fix_encoding(k): cls._fix_encoding(v) for k, v in value.items()}
|
|
||||||
if six.PY2 and isinstance(value, six.text_type):
|
|
||||||
return value.encode('utf-8')
|
|
||||||
if six.PY3 and isinstance(value, six.binary_type):
|
|
||||||
return value.decode('ascii')
|
|
||||||
return value
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def _has_same_properties(cls, dic1, dic2, values):
|
|
||||||
for value in values:
|
|
||||||
if dic1.get(value) != dic2.get(value):
|
|
||||||
return False
|
|
||||||
return True
|
|
||||||
|
|
||||||
def configure(self, options=None, rules_builder=rebulk_builder, force=False, sanitize_options=True):
|
|
||||||
"""
|
|
||||||
Load configuration files and initialize rebulk rules if required.
|
|
||||||
|
|
||||||
:param options:
|
|
||||||
:type options: str|dict
|
|
||||||
:param rules_builder:
|
|
||||||
:type rules_builder:
|
|
||||||
:param force:
|
|
||||||
:type force: bool
|
|
||||||
:return:
|
|
||||||
:rtype: dict
|
|
||||||
"""
|
|
||||||
if sanitize_options:
|
|
||||||
options = parse_options(options, True)
|
|
||||||
options = self._fix_encoding(options)
|
|
||||||
|
|
||||||
if self.config is None or self.load_config_options is None or force or \
|
|
||||||
not self._has_same_properties(self.load_config_options,
|
|
||||||
options,
|
|
||||||
['config', 'no_user_config', 'no_default_config']):
|
|
||||||
config = load_config(options)
|
|
||||||
config = self._fix_encoding(config)
|
|
||||||
self.load_config_options = options
|
|
||||||
else:
|
|
||||||
config = self.config
|
|
||||||
|
|
||||||
advanced_config = merge_options(config.get('advanced_config'), options.get('advanced_config'))
|
|
||||||
|
|
||||||
should_build_rebulk = force or not self.rebulk or not self.advanced_config or \
|
|
||||||
self.advanced_config != advanced_config
|
|
||||||
|
|
||||||
if should_build_rebulk:
|
|
||||||
self.advanced_config = advanced_config
|
|
||||||
self.rebulk = rules_builder(advanced_config)
|
|
||||||
|
|
||||||
self.config = config
|
|
||||||
return self.config
|
|
||||||
|
|
||||||
def guessit(self, string, options=None): # pylint: disable=too-many-branches
|
|
||||||
"""
|
|
||||||
Retrieves all matches from string as a dict
|
|
||||||
:param string: the filename or release name
|
|
||||||
:type string: str|Path
|
|
||||||
:param options:
|
|
||||||
:type options: str|dict
|
|
||||||
:return:
|
|
||||||
:rtype:
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
from pathlib import Path
|
|
||||||
if isinstance(string, Path):
|
|
||||||
try:
|
|
||||||
# Handle path-like object
|
|
||||||
string = os.fspath(string)
|
|
||||||
except AttributeError:
|
|
||||||
string = str(string)
|
|
||||||
except ImportError:
|
|
||||||
pass
|
|
||||||
|
|
||||||
try:
|
|
||||||
options = parse_options(options, True)
|
|
||||||
options = self._fix_encoding(options)
|
|
||||||
config = self.configure(options, sanitize_options=False)
|
|
||||||
options = merge_options(config, options)
|
|
||||||
result_decode = False
|
|
||||||
result_encode = False
|
|
||||||
|
|
||||||
if six.PY2:
|
|
||||||
if isinstance(string, six.text_type):
|
|
||||||
string = string.encode("utf-8")
|
|
||||||
result_decode = True
|
|
||||||
elif isinstance(string, six.binary_type):
|
|
||||||
string = six.binary_type(string)
|
|
||||||
if six.PY3:
|
|
||||||
if isinstance(string, six.binary_type):
|
|
||||||
string = string.decode('ascii')
|
|
||||||
result_encode = True
|
|
||||||
elif isinstance(string, six.text_type):
|
|
||||||
string = six.text_type(string)
|
|
||||||
|
|
||||||
matches = self.rebulk.matches(string, options)
|
|
||||||
if result_decode:
|
|
||||||
for match in matches:
|
|
||||||
if isinstance(match.value, six.binary_type):
|
|
||||||
match.value = match.value.decode("utf-8")
|
|
||||||
if result_encode:
|
|
||||||
for match in matches:
|
|
||||||
if isinstance(match.value, six.text_type):
|
|
||||||
match.value = match.value.encode("ascii")
|
|
||||||
return matches.to_dict(options.get('advanced', False), options.get('single_value', False),
|
|
||||||
options.get('enforce_list', False))
|
|
||||||
except:
|
|
||||||
raise GuessitException(string, options)
|
|
||||||
|
|
||||||
def properties(self, options=None):
|
|
||||||
"""
|
|
||||||
Grab properties and values that can be generated.
|
|
||||||
:param options:
|
|
||||||
:type options:
|
|
||||||
:return:
|
|
||||||
:rtype:
|
|
||||||
"""
|
|
||||||
options = parse_options(options, True)
|
|
||||||
options = self._fix_encoding(options)
|
|
||||||
config = self.configure(options, sanitize_options=False)
|
|
||||||
options = merge_options(config, options)
|
|
||||||
unordered = introspect(self.rebulk, options).properties
|
|
||||||
ordered = OrderedDict()
|
|
||||||
for k in sorted(unordered.keys(), key=six.text_type):
|
|
||||||
ordered[k] = list(sorted(unordered[k], key=six.text_type))
|
|
||||||
if hasattr(self.rebulk, 'customize_properties'):
|
|
||||||
ordered = self.rebulk.customize_properties(ordered)
|
|
||||||
return ordered
|
|
||||||
|
|
||||||
def suggested_expected(self, titles, options=None):
|
|
||||||
"""
|
|
||||||
Return a list of suggested titles to be used as `expected_title` based on the list of titles
|
|
||||||
:param titles: the filename or release name
|
|
||||||
:type titles: list|set|dict
|
|
||||||
:param options:
|
|
||||||
:type options: str|dict
|
|
||||||
:return:
|
|
||||||
:rtype: list of str
|
|
||||||
"""
|
|
||||||
suggested = []
|
|
||||||
for title in titles:
|
|
||||||
guess = self.guessit(title, options)
|
|
||||||
if len(guess) != 2 or 'title' not in guess:
|
|
||||||
suggested.append(title)
|
|
||||||
|
|
||||||
return suggested
|
|
||||||
|
|
||||||
|
|
||||||
default_api = GuessItApi()
|
|
||||||
@@ -1,27 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
"""
|
|
||||||
Backports
|
|
||||||
"""
|
|
||||||
# pragma: no-cover
|
|
||||||
# pylint: disabled
|
|
||||||
|
|
||||||
def cmp_to_key(mycmp):
|
|
||||||
"""functools.cmp_to_key backport"""
|
|
||||||
class KeyClass(object):
|
|
||||||
"""Key class"""
|
|
||||||
def __init__(self, obj, *args): # pylint: disable=unused-argument
|
|
||||||
self.obj = obj
|
|
||||||
def __lt__(self, other):
|
|
||||||
return mycmp(self.obj, other.obj) < 0
|
|
||||||
def __gt__(self, other):
|
|
||||||
return mycmp(self.obj, other.obj) > 0
|
|
||||||
def __eq__(self, other):
|
|
||||||
return mycmp(self.obj, other.obj) == 0
|
|
||||||
def __le__(self, other):
|
|
||||||
return mycmp(self.obj, other.obj) <= 0
|
|
||||||
def __ge__(self, other):
|
|
||||||
return mycmp(self.obj, other.obj) >= 0
|
|
||||||
def __ne__(self, other):
|
|
||||||
return mycmp(self.obj, other.obj) != 0
|
|
||||||
return KeyClass
|
|
||||||
@@ -1,586 +0,0 @@
|
|||||||
{
|
|
||||||
"expected_title": [
|
|
||||||
"OSS 117",
|
|
||||||
"This is Us"
|
|
||||||
],
|
|
||||||
"allowed_countries": [
|
|
||||||
"au",
|
|
||||||
"gb",
|
|
||||||
"us"
|
|
||||||
],
|
|
||||||
"allowed_languages": [
|
|
||||||
"ca",
|
|
||||||
"cs",
|
|
||||||
"de",
|
|
||||||
"en",
|
|
||||||
"es",
|
|
||||||
"fr",
|
|
||||||
"he",
|
|
||||||
"hi",
|
|
||||||
"hu",
|
|
||||||
"it",
|
|
||||||
"ja",
|
|
||||||
"ko",
|
|
||||||
"mul",
|
|
||||||
"nl",
|
|
||||||
"no",
|
|
||||||
"pl",
|
|
||||||
"pt",
|
|
||||||
"ro",
|
|
||||||
"ru",
|
|
||||||
"sv",
|
|
||||||
"te",
|
|
||||||
"uk",
|
|
||||||
"und"
|
|
||||||
],
|
|
||||||
"advanced_config": {
|
|
||||||
"common_words": [
|
|
||||||
"ca",
|
|
||||||
"cat",
|
|
||||||
"de",
|
|
||||||
"he",
|
|
||||||
"it",
|
|
||||||
"no",
|
|
||||||
"por",
|
|
||||||
"rum",
|
|
||||||
"se",
|
|
||||||
"st",
|
|
||||||
"sub"
|
|
||||||
],
|
|
||||||
"groups": {
|
|
||||||
"starting": "([{",
|
|
||||||
"ending": ")]}"
|
|
||||||
},
|
|
||||||
"audio_codec": {
|
|
||||||
"audio_channels": {
|
|
||||||
"1.0": [
|
|
||||||
"1ch",
|
|
||||||
"mono"
|
|
||||||
],
|
|
||||||
"2.0": [
|
|
||||||
"2ch",
|
|
||||||
"stereo",
|
|
||||||
"re:(2[\\W_]0(?:ch)?)(?=[^\\d]|$)"
|
|
||||||
],
|
|
||||||
"5.1": [
|
|
||||||
"5ch",
|
|
||||||
"6ch",
|
|
||||||
"re:(5[\\W_][01](?:ch)?)(?=[^\\d]|$)",
|
|
||||||
"re:(6[\\W_]0(?:ch)?)(?=[^\\d]|$)"
|
|
||||||
],
|
|
||||||
"7.1": [
|
|
||||||
"7ch",
|
|
||||||
"8ch",
|
|
||||||
"re:(7[\\W_][01](?:ch)?)(?=[^\\d]|$)"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"container": {
|
|
||||||
"subtitles": [
|
|
||||||
"srt",
|
|
||||||
"idx",
|
|
||||||
"sub",
|
|
||||||
"ssa",
|
|
||||||
"ass"
|
|
||||||
],
|
|
||||||
"info": [
|
|
||||||
"nfo"
|
|
||||||
],
|
|
||||||
"videos": [
|
|
||||||
"3g2",
|
|
||||||
"3gp",
|
|
||||||
"3gp2",
|
|
||||||
"asf",
|
|
||||||
"avi",
|
|
||||||
"divx",
|
|
||||||
"flv",
|
|
||||||
"iso",
|
|
||||||
"m4v",
|
|
||||||
"mk2",
|
|
||||||
"mk3d",
|
|
||||||
"mka",
|
|
||||||
"mkv",
|
|
||||||
"mov",
|
|
||||||
"mp4",
|
|
||||||
"mp4a",
|
|
||||||
"mpeg",
|
|
||||||
"mpg",
|
|
||||||
"ogg",
|
|
||||||
"ogm",
|
|
||||||
"ogv",
|
|
||||||
"qt",
|
|
||||||
"ra",
|
|
||||||
"ram",
|
|
||||||
"rm",
|
|
||||||
"ts",
|
|
||||||
"vob",
|
|
||||||
"wav",
|
|
||||||
"webm",
|
|
||||||
"wma",
|
|
||||||
"wmv"
|
|
||||||
],
|
|
||||||
"torrent": [
|
|
||||||
"torrent"
|
|
||||||
],
|
|
||||||
"nzb": [
|
|
||||||
"nzb"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"country": {
|
|
||||||
"synonyms": {
|
|
||||||
"ES": [
|
|
||||||
"españa"
|
|
||||||
],
|
|
||||||
"GB": [
|
|
||||||
"UK"
|
|
||||||
],
|
|
||||||
"BR": [
|
|
||||||
"brazilian",
|
|
||||||
"bra"
|
|
||||||
],
|
|
||||||
"CA": [
|
|
||||||
"québec",
|
|
||||||
"quebec",
|
|
||||||
"qc"
|
|
||||||
],
|
|
||||||
"MX": [
|
|
||||||
"Latinoamérica",
|
|
||||||
"latin america"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"episodes": {
|
|
||||||
"season_max_range": 100,
|
|
||||||
"episode_max_range": 100,
|
|
||||||
"max_range_gap": 1,
|
|
||||||
"season_markers": [
|
|
||||||
"s"
|
|
||||||
],
|
|
||||||
"season_ep_markers": [
|
|
||||||
"x"
|
|
||||||
],
|
|
||||||
"disc_markers": [
|
|
||||||
"d"
|
|
||||||
],
|
|
||||||
"episode_markers": [
|
|
||||||
"xe",
|
|
||||||
"ex",
|
|
||||||
"ep",
|
|
||||||
"e",
|
|
||||||
"x"
|
|
||||||
],
|
|
||||||
"range_separators": [
|
|
||||||
"-",
|
|
||||||
"~",
|
|
||||||
"to",
|
|
||||||
"a"
|
|
||||||
],
|
|
||||||
"discrete_separators": [
|
|
||||||
"+",
|
|
||||||
"&",
|
|
||||||
"and",
|
|
||||||
"et"
|
|
||||||
],
|
|
||||||
"season_words": [
|
|
||||||
"season",
|
|
||||||
"saison",
|
|
||||||
"seizoen",
|
|
||||||
"seasons",
|
|
||||||
"saisons",
|
|
||||||
"tem",
|
|
||||||
"temp",
|
|
||||||
"temporada",
|
|
||||||
"temporadas",
|
|
||||||
"stagione"
|
|
||||||
],
|
|
||||||
"episode_words": [
|
|
||||||
"episode",
|
|
||||||
"episodes",
|
|
||||||
"eps",
|
|
||||||
"ep",
|
|
||||||
"episodio",
|
|
||||||
"episodios",
|
|
||||||
"capitulo",
|
|
||||||
"capitulos"
|
|
||||||
],
|
|
||||||
"of_words": [
|
|
||||||
"of",
|
|
||||||
"sur"
|
|
||||||
],
|
|
||||||
"all_words": [
|
|
||||||
"All"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"language": {
|
|
||||||
"synonyms": {
|
|
||||||
"ell": [
|
|
||||||
"gr",
|
|
||||||
"greek"
|
|
||||||
],
|
|
||||||
"spa": [
|
|
||||||
"esp",
|
|
||||||
"español",
|
|
||||||
"espanol"
|
|
||||||
],
|
|
||||||
"fra": [
|
|
||||||
"français",
|
|
||||||
"vf",
|
|
||||||
"vff",
|
|
||||||
"vfi",
|
|
||||||
"vfq"
|
|
||||||
],
|
|
||||||
"swe": [
|
|
||||||
"se"
|
|
||||||
],
|
|
||||||
"por_BR": [
|
|
||||||
"po",
|
|
||||||
"pb",
|
|
||||||
"pob",
|
|
||||||
"ptbr",
|
|
||||||
"br",
|
|
||||||
"brazilian"
|
|
||||||
],
|
|
||||||
"deu_CH": [
|
|
||||||
"swissgerman",
|
|
||||||
"swiss german"
|
|
||||||
],
|
|
||||||
"nld_BE": [
|
|
||||||
"flemish"
|
|
||||||
],
|
|
||||||
"cat": [
|
|
||||||
"català",
|
|
||||||
"castellano",
|
|
||||||
"espanol castellano",
|
|
||||||
"español castellano"
|
|
||||||
],
|
|
||||||
"ces": [
|
|
||||||
"cz"
|
|
||||||
],
|
|
||||||
"ukr": [
|
|
||||||
"ua"
|
|
||||||
],
|
|
||||||
"zho": [
|
|
||||||
"cn"
|
|
||||||
],
|
|
||||||
"jpn": [
|
|
||||||
"jp"
|
|
||||||
],
|
|
||||||
"hrv": [
|
|
||||||
"scr"
|
|
||||||
],
|
|
||||||
"mul": [
|
|
||||||
"multi",
|
|
||||||
"dl"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"subtitle_affixes": [
|
|
||||||
"sub",
|
|
||||||
"subs",
|
|
||||||
"esub",
|
|
||||||
"esubs",
|
|
||||||
"subbed",
|
|
||||||
"custom subbed",
|
|
||||||
"custom subs",
|
|
||||||
"custom sub",
|
|
||||||
"customsubbed",
|
|
||||||
"customsubs",
|
|
||||||
"customsub",
|
|
||||||
"soft subtitles",
|
|
||||||
"soft subs"
|
|
||||||
],
|
|
||||||
"subtitle_prefixes": [
|
|
||||||
"st",
|
|
||||||
"vost",
|
|
||||||
"subforced",
|
|
||||||
"fansub",
|
|
||||||
"hardsub",
|
|
||||||
"legenda",
|
|
||||||
"legendas",
|
|
||||||
"legendado",
|
|
||||||
"subtitulado",
|
|
||||||
"soft",
|
|
||||||
"subtitles"
|
|
||||||
],
|
|
||||||
"subtitle_suffixes": [
|
|
||||||
"subforced",
|
|
||||||
"fansub",
|
|
||||||
"hardsub"
|
|
||||||
],
|
|
||||||
"language_affixes": [
|
|
||||||
"dublado",
|
|
||||||
"dubbed",
|
|
||||||
"dub"
|
|
||||||
],
|
|
||||||
"language_prefixes": [
|
|
||||||
"true"
|
|
||||||
],
|
|
||||||
"language_suffixes": [
|
|
||||||
"audio"
|
|
||||||
],
|
|
||||||
"weak_affixes": [
|
|
||||||
"v",
|
|
||||||
"audio",
|
|
||||||
"true"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"part": {
|
|
||||||
"prefixes": [
|
|
||||||
"pt",
|
|
||||||
"part"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"release_group": {
|
|
||||||
"forbidden_names": [
|
|
||||||
"bonus",
|
|
||||||
"by",
|
|
||||||
"for",
|
|
||||||
"par",
|
|
||||||
"pour",
|
|
||||||
"rip"
|
|
||||||
],
|
|
||||||
"ignored_seps": "[]{}()"
|
|
||||||
},
|
|
||||||
"screen_size": {
|
|
||||||
"frame_rates": [
|
|
||||||
"23.976",
|
|
||||||
"24",
|
|
||||||
"25",
|
|
||||||
"29.970",
|
|
||||||
"30",
|
|
||||||
"48",
|
|
||||||
"50",
|
|
||||||
"60",
|
|
||||||
"120"
|
|
||||||
],
|
|
||||||
"min_ar": 1.333,
|
|
||||||
"max_ar": 1.898,
|
|
||||||
"interlaced": [
|
|
||||||
"360",
|
|
||||||
"480",
|
|
||||||
"576",
|
|
||||||
"900",
|
|
||||||
"1080"
|
|
||||||
],
|
|
||||||
"progressive": [
|
|
||||||
"360",
|
|
||||||
"480",
|
|
||||||
"540",
|
|
||||||
"576",
|
|
||||||
"900",
|
|
||||||
"1080",
|
|
||||||
"368",
|
|
||||||
"720",
|
|
||||||
"1440",
|
|
||||||
"2160",
|
|
||||||
"4320"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"website": {
|
|
||||||
"safe_tlds": [
|
|
||||||
"com",
|
|
||||||
"net",
|
|
||||||
"org"
|
|
||||||
],
|
|
||||||
"safe_subdomains": [
|
|
||||||
"www"
|
|
||||||
],
|
|
||||||
"safe_prefixes": [
|
|
||||||
"co",
|
|
||||||
"com",
|
|
||||||
"net",
|
|
||||||
"org"
|
|
||||||
],
|
|
||||||
"prefixes": [
|
|
||||||
"from"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"streaming_service": {
|
|
||||||
"A&E": [
|
|
||||||
"AE",
|
|
||||||
"A&E"
|
|
||||||
],
|
|
||||||
"ABC": "AMBC",
|
|
||||||
"ABC Australia": "AUBC",
|
|
||||||
"Al Jazeera English": "AJAZ",
|
|
||||||
"AMC": "AMC",
|
|
||||||
"Amazon Prime": [
|
|
||||||
"AMZN",
|
|
||||||
"Amazon",
|
|
||||||
"re:Amazon-?Prime"
|
|
||||||
],
|
|
||||||
"Adult Swim": [
|
|
||||||
"AS",
|
|
||||||
"re:Adult-?Swim"
|
|
||||||
],
|
|
||||||
"America's Test Kitchen": "ATK",
|
|
||||||
"Animal Planet": "ANPL",
|
|
||||||
"AnimeLab": "ANLB",
|
|
||||||
"AOL": "AOL",
|
|
||||||
"ARD": "ARD",
|
|
||||||
"BBC iPlayer": [
|
|
||||||
"iP",
|
|
||||||
"re:BBC-?iPlayer"
|
|
||||||
],
|
|
||||||
"BravoTV": "BRAV",
|
|
||||||
"Canal+": "CNLP",
|
|
||||||
"Cartoon Network": "CN",
|
|
||||||
"CBC": "CBC",
|
|
||||||
"CBS": "CBS",
|
|
||||||
"CNBC": "CNBC",
|
|
||||||
"Comedy Central": [
|
|
||||||
"CC",
|
|
||||||
"re:Comedy-?Central"
|
|
||||||
],
|
|
||||||
"Channel 4": "4OD",
|
|
||||||
"CHRGD": "CHGD",
|
|
||||||
"Cinemax": "CMAX",
|
|
||||||
"Country Music Television": "CMT",
|
|
||||||
"Comedians in Cars Getting Coffee": "CCGC",
|
|
||||||
"Crunchy Roll": [
|
|
||||||
"CR",
|
|
||||||
"re:Crunchy-?Roll"
|
|
||||||
],
|
|
||||||
"Crackle": "CRKL",
|
|
||||||
"CSpan": "CSPN",
|
|
||||||
"CTV": "CTV",
|
|
||||||
"CuriosityStream": "CUR",
|
|
||||||
"CWSeed": "CWS",
|
|
||||||
"Daisuki": "DSKI",
|
|
||||||
"DC Universe": "DCU",
|
|
||||||
"Deadhouse Films": "DHF",
|
|
||||||
"DramaFever": [
|
|
||||||
"DF",
|
|
||||||
"DramaFever"
|
|
||||||
],
|
|
||||||
"Digiturk Diledigin Yerde": "DDY",
|
|
||||||
"Discovery": [
|
|
||||||
"DISC",
|
|
||||||
"Discovery"
|
|
||||||
],
|
|
||||||
"Disney": [
|
|
||||||
"DSNY",
|
|
||||||
"Disney"
|
|
||||||
],
|
|
||||||
"DIY Network": "DIY",
|
|
||||||
"Doc Club": "DOCC",
|
|
||||||
"DPlay": "DPLY",
|
|
||||||
"E!": "ETV",
|
|
||||||
"ePix": "EPIX",
|
|
||||||
"El Trece": "ETTV",
|
|
||||||
"ESPN": "ESPN",
|
|
||||||
"Esquire": "ESQ",
|
|
||||||
"Family": "FAM",
|
|
||||||
"Family Jr": "FJR",
|
|
||||||
"Food Network": "FOOD",
|
|
||||||
"Fox": "FOX",
|
|
||||||
"Freeform": "FREE",
|
|
||||||
"FYI Network": "FYI",
|
|
||||||
"Global": "GLBL",
|
|
||||||
"GloboSat Play": "GLOB",
|
|
||||||
"Hallmark": "HLMK",
|
|
||||||
"HBO Go": [
|
|
||||||
"HBO",
|
|
||||||
"re:HBO-?Go"
|
|
||||||
],
|
|
||||||
"HGTV": "HGTV",
|
|
||||||
"History": [
|
|
||||||
"HIST",
|
|
||||||
"History"
|
|
||||||
],
|
|
||||||
"Hulu": "HULU",
|
|
||||||
"Investigation Discovery": "ID",
|
|
||||||
"IFC": "IFC",
|
|
||||||
"iTunes": "iTunes",
|
|
||||||
"ITV": "ITV",
|
|
||||||
"Knowledge Network": "KNOW",
|
|
||||||
"Lifetime": "LIFE",
|
|
||||||
"Motor Trend OnDemand": "MTOD",
|
|
||||||
"MBC": [
|
|
||||||
"MBC",
|
|
||||||
"MBCVOD"
|
|
||||||
],
|
|
||||||
"MSNBC": "MNBC",
|
|
||||||
"MTV": "MTV",
|
|
||||||
"National Geographic": [
|
|
||||||
"NATG",
|
|
||||||
"re:National-?Geographic"
|
|
||||||
],
|
|
||||||
"NBA TV": [
|
|
||||||
"NBA",
|
|
||||||
"re:NBA-?TV"
|
|
||||||
],
|
|
||||||
"NBC": "NBC",
|
|
||||||
"Netflix": [
|
|
||||||
"NF",
|
|
||||||
"Netflix"
|
|
||||||
],
|
|
||||||
"NFL": "NFL",
|
|
||||||
"NFL Now": "NFLN",
|
|
||||||
"NHL GameCenter": "GC",
|
|
||||||
"Nickelodeon": [
|
|
||||||
"NICK",
|
|
||||||
"Nickelodeon"
|
|
||||||
],
|
|
||||||
"Norsk Rikskringkasting": "NRK",
|
|
||||||
"OnDemandKorea": [
|
|
||||||
"ODK",
|
|
||||||
"OnDemandKorea"
|
|
||||||
],
|
|
||||||
"PBS": "PBS",
|
|
||||||
"PBS Kids": "PBSK",
|
|
||||||
"Playstation Network": "PSN",
|
|
||||||
"Pluzz": "PLUZ",
|
|
||||||
"RTE One": "RTE",
|
|
||||||
"SBS (AU)": "SBS",
|
|
||||||
"SeeSo": [
|
|
||||||
"SESO",
|
|
||||||
"SeeSo"
|
|
||||||
],
|
|
||||||
"Shomi": "SHMI",
|
|
||||||
"Spike": "SPIK",
|
|
||||||
"Spike TV": [
|
|
||||||
"SPKE",
|
|
||||||
"re:Spike-?TV"
|
|
||||||
],
|
|
||||||
"Sportsnet": "SNET",
|
|
||||||
"Sprout": "SPRT",
|
|
||||||
"Stan": "STAN",
|
|
||||||
"Starz": "STZ",
|
|
||||||
"Sveriges Television": "SVT",
|
|
||||||
"SwearNet": "SWER",
|
|
||||||
"Syfy": "SYFY",
|
|
||||||
"TBS": "TBS",
|
|
||||||
"TFou": "TFOU",
|
|
||||||
"The CW": [
|
|
||||||
"CW",
|
|
||||||
"re:The-?CW"
|
|
||||||
],
|
|
||||||
"TLC": "TLC",
|
|
||||||
"TubiTV": "TUBI",
|
|
||||||
"TV3 Ireland": "TV3",
|
|
||||||
"TV4 Sweeden": "TV4",
|
|
||||||
"TVING": "TVING",
|
|
||||||
"TV Land": [
|
|
||||||
"TVL",
|
|
||||||
"re:TV-?Land"
|
|
||||||
],
|
|
||||||
"UFC": "UFC",
|
|
||||||
"UKTV": "UKTV",
|
|
||||||
"Univision": "UNIV",
|
|
||||||
"USA Network": "USAN",
|
|
||||||
"Velocity": "VLCT",
|
|
||||||
"VH1": "VH1",
|
|
||||||
"Viceland": "VICE",
|
|
||||||
"Viki": "VIKI",
|
|
||||||
"Vimeo": "VMEO",
|
|
||||||
"VRV": "VRV",
|
|
||||||
"W Network": "WNET",
|
|
||||||
"WatchMe": "WME",
|
|
||||||
"WWE Network": "WWEN",
|
|
||||||
"Xbox Video": "XBOX",
|
|
||||||
"Yahoo": "YHOO",
|
|
||||||
"YouTube Red": "RED",
|
|
||||||
"ZDF": "ZDF"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,22 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
"""
|
|
||||||
JSON Utils
|
|
||||||
"""
|
|
||||||
import json
|
|
||||||
|
|
||||||
from six import text_type
|
|
||||||
from rebulk.match import Match
|
|
||||||
|
|
||||||
class GuessitEncoder(json.JSONEncoder):
|
|
||||||
"""
|
|
||||||
JSON Encoder for guessit response
|
|
||||||
"""
|
|
||||||
|
|
||||||
def default(self, o): # pylint:disable=method-hidden
|
|
||||||
if isinstance(o, Match):
|
|
||||||
return o.advanced
|
|
||||||
if hasattr(o, 'name'): # Babelfish languages/countries long name
|
|
||||||
return text_type(o.name)
|
|
||||||
# pragma: no cover
|
|
||||||
return text_type(o)
|
|
||||||
@@ -1,34 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
"""
|
|
||||||
Monkeypatch initialisation functions
|
|
||||||
"""
|
|
||||||
|
|
||||||
try:
|
|
||||||
from collections import OrderedDict
|
|
||||||
except ImportError: # pragma: no-cover
|
|
||||||
from ordereddict import OrderedDict # pylint:disable=import-error
|
|
||||||
|
|
||||||
from rebulk.match import Match
|
|
||||||
|
|
||||||
|
|
||||||
def monkeypatch_rebulk():
|
|
||||||
"""Monkeypatch rebulk classes"""
|
|
||||||
|
|
||||||
@property
|
|
||||||
def match_advanced(self):
|
|
||||||
"""
|
|
||||||
Build advanced dict from match
|
|
||||||
:param self:
|
|
||||||
:return:
|
|
||||||
"""
|
|
||||||
|
|
||||||
ret = OrderedDict()
|
|
||||||
ret['value'] = self.value
|
|
||||||
if self.raw:
|
|
||||||
ret['raw'] = self.raw
|
|
||||||
ret['start'] = self.start
|
|
||||||
ret['end'] = self.end
|
|
||||||
return ret
|
|
||||||
|
|
||||||
Match.advanced = match_advanced
|
|
||||||
@@ -1,295 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
"""
|
|
||||||
Options
|
|
||||||
"""
|
|
||||||
import copy
|
|
||||||
import json
|
|
||||||
import os
|
|
||||||
import pkgutil
|
|
||||||
import shlex
|
|
||||||
|
|
||||||
from argparse import ArgumentParser
|
|
||||||
|
|
||||||
import six
|
|
||||||
|
|
||||||
|
|
||||||
def build_argument_parser():
|
|
||||||
"""
|
|
||||||
Builds the argument parser
|
|
||||||
:return: the argument parser
|
|
||||||
:rtype: ArgumentParser
|
|
||||||
"""
|
|
||||||
opts = ArgumentParser()
|
|
||||||
opts.add_argument(dest='filename', help='Filename or release name to guess', nargs='*')
|
|
||||||
|
|
||||||
naming_opts = opts.add_argument_group("Naming")
|
|
||||||
naming_opts.add_argument('-t', '--type', dest='type', default=None,
|
|
||||||
help='The suggested file type: movie, episode. If undefined, type will be guessed.')
|
|
||||||
naming_opts.add_argument('-n', '--name-only', dest='name_only', action='store_true', default=None,
|
|
||||||
help='Parse files as name only, considering "/" and "\\" like other separators.')
|
|
||||||
naming_opts.add_argument('-Y', '--date-year-first', action='store_true', dest='date_year_first', default=None,
|
|
||||||
help='If short date is found, consider the first digits as the year.')
|
|
||||||
naming_opts.add_argument('-D', '--date-day-first', action='store_true', dest='date_day_first', default=None,
|
|
||||||
help='If short date is found, consider the second digits as the day.')
|
|
||||||
naming_opts.add_argument('-L', '--allowed-languages', action='append', dest='allowed_languages', default=None,
|
|
||||||
help='Allowed language (can be used multiple times)')
|
|
||||||
naming_opts.add_argument('-C', '--allowed-countries', action='append', dest='allowed_countries', default=None,
|
|
||||||
help='Allowed country (can be used multiple times)')
|
|
||||||
naming_opts.add_argument('-E', '--episode-prefer-number', action='store_true', dest='episode_prefer_number',
|
|
||||||
default=None,
|
|
||||||
help='Guess "serie.213.avi" as the episode 213. Without this option, '
|
|
||||||
'it will be guessed as season 2, episode 13')
|
|
||||||
naming_opts.add_argument('-T', '--expected-title', action='append', dest='expected_title', default=None,
|
|
||||||
help='Expected title to parse (can be used multiple times)')
|
|
||||||
naming_opts.add_argument('-G', '--expected-group', action='append', dest='expected_group', default=None,
|
|
||||||
help='Expected release group (can be used multiple times)')
|
|
||||||
naming_opts.add_argument('--includes', action='append', dest='includes', default=None,
|
|
||||||
help='List of properties to be detected')
|
|
||||||
naming_opts.add_argument('--excludes', action='append', dest='excludes', default=None,
|
|
||||||
help='List of properties to be ignored')
|
|
||||||
|
|
||||||
input_opts = opts.add_argument_group("Input")
|
|
||||||
input_opts.add_argument('-f', '--input-file', dest='input_file', default=None,
|
|
||||||
help='Read filenames from an input text file. File should use UTF-8 charset.')
|
|
||||||
|
|
||||||
output_opts = opts.add_argument_group("Output")
|
|
||||||
output_opts.add_argument('-v', '--verbose', action='store_true', dest='verbose', default=None,
|
|
||||||
help='Display debug output')
|
|
||||||
output_opts.add_argument('-P', '--show-property', dest='show_property', default=None,
|
|
||||||
help='Display the value of a single property (title, series, video_codec, year, ...)')
|
|
||||||
output_opts.add_argument('-a', '--advanced', dest='advanced', action='store_true', default=None,
|
|
||||||
help='Display advanced information for filename guesses, as json output')
|
|
||||||
output_opts.add_argument('-s', '--single-value', dest='single_value', action='store_true', default=None,
|
|
||||||
help='Keep only first value found for each property')
|
|
||||||
output_opts.add_argument('-l', '--enforce-list', dest='enforce_list', action='store_true', default=None,
|
|
||||||
help='Wrap each found value in a list even when property has a single value')
|
|
||||||
output_opts.add_argument('-j', '--json', dest='json', action='store_true', default=None,
|
|
||||||
help='Display information for filename guesses as json output')
|
|
||||||
output_opts.add_argument('-y', '--yaml', dest='yaml', action='store_true', default=None,
|
|
||||||
help='Display information for filename guesses as yaml output')
|
|
||||||
|
|
||||||
conf_opts = opts.add_argument_group("Configuration")
|
|
||||||
conf_opts.add_argument('-c', '--config', dest='config', action='append', default=None,
|
|
||||||
help='Filepath to configuration file. Configuration file contains the same '
|
|
||||||
'options as those from command line options, but option names have "-" characters '
|
|
||||||
'replaced with "_". This configuration will be merged with default and user '
|
|
||||||
'configuration files.')
|
|
||||||
conf_opts.add_argument('--no-user-config', dest='no_user_config', action='store_true',
|
|
||||||
default=None,
|
|
||||||
help='Disable user configuration. If not defined, guessit tries to read configuration files '
|
|
||||||
'at ~/.guessit/options.(json|yml|yaml) and ~/.config/guessit/options.(json|yml|yaml)')
|
|
||||||
conf_opts.add_argument('--no-default-config', dest='no_default_config', action='store_true',
|
|
||||||
default=None,
|
|
||||||
help='Disable default configuration. This should be done only if you are providing a full '
|
|
||||||
'configuration through user configuration or --config option. If no "advanced_config" '
|
|
||||||
'is provided by another configuration file, it will still be loaded from default '
|
|
||||||
'configuration.')
|
|
||||||
|
|
||||||
information_opts = opts.add_argument_group("Information")
|
|
||||||
information_opts.add_argument('-p', '--properties', dest='properties', action='store_true', default=None,
|
|
||||||
help='Display properties that can be guessed.')
|
|
||||||
information_opts.add_argument('-V', '--values', dest='values', action='store_true', default=None,
|
|
||||||
help='Display property values that can be guessed.')
|
|
||||||
information_opts.add_argument('--version', dest='version', action='store_true', default=None,
|
|
||||||
help='Display the guessit version.')
|
|
||||||
|
|
||||||
return opts
|
|
||||||
|
|
||||||
|
|
||||||
def parse_options(options=None, api=False):
|
|
||||||
"""
|
|
||||||
Parse given option string
|
|
||||||
|
|
||||||
:param options:
|
|
||||||
:type options:
|
|
||||||
:param api
|
|
||||||
:type api: boolean
|
|
||||||
:return:
|
|
||||||
:rtype:
|
|
||||||
"""
|
|
||||||
if isinstance(options, six.string_types):
|
|
||||||
args = shlex.split(options)
|
|
||||||
options = vars(argument_parser.parse_args(args))
|
|
||||||
elif options is None:
|
|
||||||
if api:
|
|
||||||
options = {}
|
|
||||||
else:
|
|
||||||
options = vars(argument_parser.parse_args())
|
|
||||||
elif not isinstance(options, dict):
|
|
||||||
options = vars(argument_parser.parse_args(options))
|
|
||||||
return options
|
|
||||||
|
|
||||||
|
|
||||||
argument_parser = build_argument_parser()
|
|
||||||
|
|
||||||
|
|
||||||
class ConfigurationException(Exception):
|
|
||||||
"""
|
|
||||||
Exception related to configuration file.
|
|
||||||
"""
|
|
||||||
pass # pylint:disable=unnecessary-pass
|
|
||||||
|
|
||||||
|
|
||||||
def load_config(options):
|
|
||||||
"""
|
|
||||||
Load options from configuration files, if defined and present.
|
|
||||||
:param options:
|
|
||||||
:type options:
|
|
||||||
:return:
|
|
||||||
:rtype:
|
|
||||||
"""
|
|
||||||
configurations = []
|
|
||||||
|
|
||||||
if not options.get('no_default_config'):
|
|
||||||
default_options_data = pkgutil.get_data('guessit', 'config/options.json').decode('utf-8')
|
|
||||||
default_options = json.loads(default_options_data)
|
|
||||||
configurations.append(default_options)
|
|
||||||
|
|
||||||
config_files = []
|
|
||||||
|
|
||||||
if not options.get('no_user_config'):
|
|
||||||
home_directory = os.path.expanduser("~")
|
|
||||||
cwd = os.getcwd()
|
|
||||||
yaml_supported = False
|
|
||||||
try:
|
|
||||||
import yaml # pylint:disable=unused-variable,unused-import
|
|
||||||
yaml_supported = True
|
|
||||||
except ImportError:
|
|
||||||
pass
|
|
||||||
|
|
||||||
config_file_locations = get_options_file_locations(home_directory, cwd, yaml_supported)
|
|
||||||
config_files = [f for f in config_file_locations if os.path.exists(f)]
|
|
||||||
|
|
||||||
custom_config_files = options.get('config')
|
|
||||||
if custom_config_files:
|
|
||||||
config_files = config_files + custom_config_files
|
|
||||||
|
|
||||||
for config_file in config_files:
|
|
||||||
config_file_options = load_config_file(config_file)
|
|
||||||
if config_file_options:
|
|
||||||
configurations.append(config_file_options)
|
|
||||||
|
|
||||||
config = {}
|
|
||||||
if configurations:
|
|
||||||
config = merge_options(*configurations)
|
|
||||||
|
|
||||||
if 'advanced_config' not in config:
|
|
||||||
# Guessit doesn't work without advanced_config, so we use default if no configuration files provides it.
|
|
||||||
default_options_data = pkgutil.get_data('guessit', 'config/options.json').decode('utf-8')
|
|
||||||
default_options = json.loads(default_options_data)
|
|
||||||
config['advanced_config'] = default_options['advanced_config']
|
|
||||||
|
|
||||||
return config
|
|
||||||
|
|
||||||
|
|
||||||
def merge_options(*options):
|
|
||||||
"""
|
|
||||||
Merge options into a single options dict.
|
|
||||||
:param options:
|
|
||||||
:type options:
|
|
||||||
:return:
|
|
||||||
:rtype:
|
|
||||||
"""
|
|
||||||
|
|
||||||
merged = {}
|
|
||||||
if options:
|
|
||||||
if options[0]:
|
|
||||||
merged.update(copy.deepcopy(options[0]))
|
|
||||||
|
|
||||||
for options in options[1:]:
|
|
||||||
if options:
|
|
||||||
pristine = options.get('pristine')
|
|
||||||
|
|
||||||
if pristine is True:
|
|
||||||
merged = {}
|
|
||||||
elif pristine:
|
|
||||||
for to_reset in pristine:
|
|
||||||
if to_reset in merged:
|
|
||||||
del merged[to_reset]
|
|
||||||
|
|
||||||
for (option, value) in options.items():
|
|
||||||
merge_option_value(option, value, merged)
|
|
||||||
|
|
||||||
return merged
|
|
||||||
|
|
||||||
|
|
||||||
def merge_option_value(option, value, merged):
|
|
||||||
"""
|
|
||||||
Merge option value
|
|
||||||
:param option:
|
|
||||||
:param value:
|
|
||||||
:param merged:
|
|
||||||
:return:
|
|
||||||
"""
|
|
||||||
if value is not None and option != 'pristine':
|
|
||||||
if option in merged.keys() and isinstance(merged[option], list):
|
|
||||||
for val in value:
|
|
||||||
if val not in merged[option]:
|
|
||||||
merged[option].append(val)
|
|
||||||
elif option in merged.keys() and isinstance(merged[option], dict):
|
|
||||||
merged[option] = merge_options(merged[option], value)
|
|
||||||
elif isinstance(value, list):
|
|
||||||
merged[option] = list(value)
|
|
||||||
else:
|
|
||||||
merged[option] = value
|
|
||||||
|
|
||||||
|
|
||||||
def load_config_file(filepath):
|
|
||||||
"""
|
|
||||||
Load a configuration as an options dict.
|
|
||||||
|
|
||||||
Format of the file is given with filepath extension.
|
|
||||||
:param filepath:
|
|
||||||
:type filepath:
|
|
||||||
:return:
|
|
||||||
:rtype:
|
|
||||||
"""
|
|
||||||
if filepath.endswith('.json'):
|
|
||||||
with open(filepath) as config_file_data:
|
|
||||||
return json.load(config_file_data)
|
|
||||||
if filepath.endswith('.yaml') or filepath.endswith('.yml'):
|
|
||||||
try:
|
|
||||||
import yaml
|
|
||||||
with open(filepath) as config_file_data:
|
|
||||||
return yaml.load(config_file_data)
|
|
||||||
except ImportError: # pragma: no cover
|
|
||||||
raise ConfigurationException('Configuration file extension is not supported. '
|
|
||||||
'PyYAML should be installed to support "%s" file' % (
|
|
||||||
filepath,))
|
|
||||||
|
|
||||||
try:
|
|
||||||
# Try to load input as JSON
|
|
||||||
return json.loads(filepath)
|
|
||||||
except: # pylint: disable=bare-except
|
|
||||||
pass
|
|
||||||
|
|
||||||
raise ConfigurationException('Configuration file extension is not supported for "%s" file.' % (filepath,))
|
|
||||||
|
|
||||||
|
|
||||||
def get_options_file_locations(homedir, cwd, yaml_supported=False):
|
|
||||||
"""
|
|
||||||
Get all possible locations for options file.
|
|
||||||
:param homedir: user home directory
|
|
||||||
:type homedir: basestring
|
|
||||||
:param cwd: current working directory
|
|
||||||
:type homedir: basestring
|
|
||||||
:return:
|
|
||||||
:rtype: list
|
|
||||||
"""
|
|
||||||
locations = []
|
|
||||||
|
|
||||||
configdirs = [(os.path.join(homedir, '.guessit'), 'options'),
|
|
||||||
(os.path.join(homedir, '.config', 'guessit'), 'options'),
|
|
||||||
(cwd, 'guessit.options')]
|
|
||||||
configexts = ['json']
|
|
||||||
|
|
||||||
if yaml_supported:
|
|
||||||
configexts.append('yaml')
|
|
||||||
configexts.append('yml')
|
|
||||||
|
|
||||||
for configdir in configdirs:
|
|
||||||
for configext in configexts:
|
|
||||||
locations.append(os.path.join(configdir[0], configdir[1] + '.' + configext))
|
|
||||||
|
|
||||||
return locations
|
|
||||||
@@ -1,35 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
"""
|
|
||||||
Utils for re module
|
|
||||||
"""
|
|
||||||
|
|
||||||
from rebulk.remodule import re
|
|
||||||
|
|
||||||
|
|
||||||
def build_or_pattern(patterns, name=None, escape=False):
|
|
||||||
"""
|
|
||||||
Build a or pattern string from a list of possible patterns
|
|
||||||
|
|
||||||
:param patterns:
|
|
||||||
:type patterns:
|
|
||||||
:param name:
|
|
||||||
:type name:
|
|
||||||
:param escape:
|
|
||||||
:type escape:
|
|
||||||
:return:
|
|
||||||
:rtype:
|
|
||||||
"""
|
|
||||||
or_pattern = []
|
|
||||||
for pattern in patterns:
|
|
||||||
if not or_pattern:
|
|
||||||
or_pattern.append('(?')
|
|
||||||
if name:
|
|
||||||
or_pattern.append('P<' + name + '>')
|
|
||||||
else:
|
|
||||||
or_pattern.append(':')
|
|
||||||
else:
|
|
||||||
or_pattern.append('|')
|
|
||||||
or_pattern.append('(?:%s)' % re.escape(pattern) if escape else pattern)
|
|
||||||
or_pattern.append(')')
|
|
||||||
return ''.join(or_pattern)
|
|
||||||
@@ -1,99 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
"""
|
|
||||||
Rebulk object default builder
|
|
||||||
"""
|
|
||||||
from rebulk import Rebulk
|
|
||||||
|
|
||||||
from .markers.path import path
|
|
||||||
from .markers.groups import groups
|
|
||||||
|
|
||||||
from .properties.episodes import episodes
|
|
||||||
from .properties.container import container
|
|
||||||
from .properties.source import source
|
|
||||||
from .properties.video_codec import video_codec
|
|
||||||
from .properties.audio_codec import audio_codec
|
|
||||||
from .properties.screen_size import screen_size
|
|
||||||
from .properties.website import website
|
|
||||||
from .properties.date import date
|
|
||||||
from .properties.title import title
|
|
||||||
from .properties.episode_title import episode_title
|
|
||||||
from .properties.language import language
|
|
||||||
from .properties.country import country
|
|
||||||
from .properties.release_group import release_group
|
|
||||||
from .properties.streaming_service import streaming_service
|
|
||||||
from .properties.other import other
|
|
||||||
from .properties.size import size
|
|
||||||
from .properties.bit_rate import bit_rate
|
|
||||||
from .properties.edition import edition
|
|
||||||
from .properties.cds import cds
|
|
||||||
from .properties.bonus import bonus
|
|
||||||
from .properties.film import film
|
|
||||||
from .properties.part import part
|
|
||||||
from .properties.crc import crc
|
|
||||||
from .properties.mimetype import mimetype
|
|
||||||
from .properties.type import type_
|
|
||||||
|
|
||||||
from .processors import processors
|
|
||||||
|
|
||||||
|
|
||||||
def rebulk_builder(config):
|
|
||||||
"""
|
|
||||||
Default builder for main Rebulk object used by api.
|
|
||||||
:return: Main Rebulk object
|
|
||||||
:rtype: Rebulk
|
|
||||||
"""
|
|
||||||
def _config(name):
|
|
||||||
return config.get(name, {})
|
|
||||||
|
|
||||||
rebulk = Rebulk()
|
|
||||||
|
|
||||||
common_words = frozenset(_config('common_words'))
|
|
||||||
|
|
||||||
rebulk.rebulk(path(_config('path')))
|
|
||||||
rebulk.rebulk(groups(_config('groups')))
|
|
||||||
|
|
||||||
rebulk.rebulk(episodes(_config('episodes')))
|
|
||||||
rebulk.rebulk(container(_config('container')))
|
|
||||||
rebulk.rebulk(source(_config('source')))
|
|
||||||
rebulk.rebulk(video_codec(_config('video_codec')))
|
|
||||||
rebulk.rebulk(audio_codec(_config('audio_codec')))
|
|
||||||
rebulk.rebulk(screen_size(_config('screen_size')))
|
|
||||||
rebulk.rebulk(website(_config('website')))
|
|
||||||
rebulk.rebulk(date(_config('date')))
|
|
||||||
rebulk.rebulk(title(_config('title')))
|
|
||||||
rebulk.rebulk(episode_title(_config('episode_title')))
|
|
||||||
rebulk.rebulk(language(_config('language'), common_words))
|
|
||||||
rebulk.rebulk(country(_config('country'), common_words))
|
|
||||||
rebulk.rebulk(release_group(_config('release_group')))
|
|
||||||
rebulk.rebulk(streaming_service(_config('streaming_service')))
|
|
||||||
rebulk.rebulk(other(_config('other')))
|
|
||||||
rebulk.rebulk(size(_config('size')))
|
|
||||||
rebulk.rebulk(bit_rate(_config('bit_rate')))
|
|
||||||
rebulk.rebulk(edition(_config('edition')))
|
|
||||||
rebulk.rebulk(cds(_config('cds')))
|
|
||||||
rebulk.rebulk(bonus(_config('bonus')))
|
|
||||||
rebulk.rebulk(film(_config('film')))
|
|
||||||
rebulk.rebulk(part(_config('part')))
|
|
||||||
rebulk.rebulk(crc(_config('crc')))
|
|
||||||
|
|
||||||
rebulk.rebulk(processors(_config('processors')))
|
|
||||||
|
|
||||||
rebulk.rebulk(mimetype(_config('mimetype')))
|
|
||||||
rebulk.rebulk(type_(_config('type')))
|
|
||||||
|
|
||||||
def customize_properties(properties):
|
|
||||||
"""
|
|
||||||
Customize default rebulk properties
|
|
||||||
"""
|
|
||||||
count = properties['count']
|
|
||||||
del properties['count']
|
|
||||||
|
|
||||||
properties['season_count'] = count
|
|
||||||
properties['episode_count'] = count
|
|
||||||
|
|
||||||
return properties
|
|
||||||
|
|
||||||
rebulk.customize_properties = customize_properties
|
|
||||||
|
|
||||||
return rebulk
|
|
||||||
@@ -1,15 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
"""
|
|
||||||
Common module
|
|
||||||
"""
|
|
||||||
import re
|
|
||||||
|
|
||||||
seps = r' [](){}+*|=-_~#/\\.,;:' # list of tags/words separators
|
|
||||||
seps_no_groups = seps.replace('[](){}', '')
|
|
||||||
seps_no_fs = seps.replace('/', '').replace('\\', '')
|
|
||||||
|
|
||||||
title_seps = r'-+/\|' # separators for title
|
|
||||||
|
|
||||||
dash = (r'-', r'['+re.escape(seps_no_fs)+']') # abbreviation used by many rebulk objects.
|
|
||||||
alt_dash = (r'@', r'['+re.escape(seps_no_fs)+']') # abbreviation used by many rebulk objects.
|
|
||||||
@@ -1,75 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
"""
|
|
||||||
Comparators
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
from functools import cmp_to_key
|
|
||||||
except ImportError:
|
|
||||||
from ...backports import cmp_to_key
|
|
||||||
|
|
||||||
|
|
||||||
def marker_comparator_predicate(match):
|
|
||||||
"""
|
|
||||||
Match predicate used in comparator
|
|
||||||
"""
|
|
||||||
return (
|
|
||||||
not match.private
|
|
||||||
and match.name not in ('proper_count', 'title')
|
|
||||||
and not (match.name == 'container' and 'extension' in match.tags)
|
|
||||||
and not (match.name == 'other' and match.value == 'Rip')
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def marker_weight(matches, marker, predicate):
|
|
||||||
"""
|
|
||||||
Compute the comparator weight of a marker
|
|
||||||
:param matches:
|
|
||||||
:param marker:
|
|
||||||
:param predicate:
|
|
||||||
:return:
|
|
||||||
"""
|
|
||||||
return len(set(match.name for match in matches.range(*marker.span, predicate=predicate)))
|
|
||||||
|
|
||||||
|
|
||||||
def marker_comparator(matches, markers, predicate):
|
|
||||||
"""
|
|
||||||
Builds a comparator that returns markers sorted from the most valuable to the less.
|
|
||||||
|
|
||||||
Take the parts where matches count is higher, then when length is higher, then when position is at left.
|
|
||||||
|
|
||||||
:param matches:
|
|
||||||
:type matches:
|
|
||||||
:param markers:
|
|
||||||
:param predicate:
|
|
||||||
:return:
|
|
||||||
:rtype:
|
|
||||||
"""
|
|
||||||
|
|
||||||
def comparator(marker1, marker2):
|
|
||||||
"""
|
|
||||||
The actual comparator function.
|
|
||||||
"""
|
|
||||||
matches_count = marker_weight(matches, marker2, predicate) - marker_weight(matches, marker1, predicate)
|
|
||||||
if matches_count:
|
|
||||||
return matches_count
|
|
||||||
|
|
||||||
# give preference to rightmost path
|
|
||||||
return markers.index(marker2) - markers.index(marker1)
|
|
||||||
|
|
||||||
return comparator
|
|
||||||
|
|
||||||
|
|
||||||
def marker_sorted(markers, matches, predicate=marker_comparator_predicate):
|
|
||||||
"""
|
|
||||||
Sort markers from matches, from the most valuable to the less.
|
|
||||||
|
|
||||||
:param markers:
|
|
||||||
:type markers:
|
|
||||||
:param matches:
|
|
||||||
:type matches:
|
|
||||||
:param predicate:
|
|
||||||
:return:
|
|
||||||
:rtype:
|
|
||||||
"""
|
|
||||||
return sorted(markers, key=cmp_to_key(marker_comparator(matches, markers, predicate=predicate)))
|
|
||||||
@@ -1,125 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
"""
|
|
||||||
Date
|
|
||||||
"""
|
|
||||||
from dateutil import parser
|
|
||||||
|
|
||||||
from rebulk.remodule import re
|
|
||||||
|
|
||||||
_dsep = r'[-/ \.]'
|
|
||||||
_dsep_bis = r'[-/ \.x]'
|
|
||||||
|
|
||||||
date_regexps = [
|
|
||||||
re.compile(r'%s((\d{8}))%s' % (_dsep, _dsep), re.IGNORECASE),
|
|
||||||
re.compile(r'%s((\d{6}))%s' % (_dsep, _dsep), re.IGNORECASE),
|
|
||||||
re.compile(r'(?:^|[^\d])((\d{2})%s(\d{1,2})%s(\d{1,2}))(?:$|[^\d])' % (_dsep, _dsep), re.IGNORECASE),
|
|
||||||
re.compile(r'(?:^|[^\d])((\d{1,2})%s(\d{1,2})%s(\d{2}))(?:$|[^\d])' % (_dsep, _dsep), re.IGNORECASE),
|
|
||||||
re.compile(r'(?:^|[^\d])((\d{4})%s(\d{1,2})%s(\d{1,2}))(?:$|[^\d])' % (_dsep_bis, _dsep), re.IGNORECASE),
|
|
||||||
re.compile(r'(?:^|[^\d])((\d{1,2})%s(\d{1,2})%s(\d{4}))(?:$|[^\d])' % (_dsep, _dsep_bis), re.IGNORECASE),
|
|
||||||
re.compile(r'(?:^|[^\d])((\d{1,2}(?:st|nd|rd|th)?%s(?:[a-z]{3,10})%s\d{4}))(?:$|[^\d])' % (_dsep, _dsep),
|
|
||||||
re.IGNORECASE)]
|
|
||||||
|
|
||||||
|
|
||||||
def valid_year(year):
|
|
||||||
"""Check if number is a valid year"""
|
|
||||||
return 1920 <= year < 2030
|
|
||||||
|
|
||||||
|
|
||||||
def _is_int(string):
|
|
||||||
"""
|
|
||||||
Check if the input string is an integer
|
|
||||||
|
|
||||||
:param string:
|
|
||||||
:type string:
|
|
||||||
:return:
|
|
||||||
:rtype:
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
int(string)
|
|
||||||
return True
|
|
||||||
except ValueError:
|
|
||||||
return False
|
|
||||||
|
|
||||||
|
|
||||||
def _guess_day_first_parameter(groups): # pylint:disable=inconsistent-return-statements
|
|
||||||
"""
|
|
||||||
If day_first is not defined, use some heuristic to fix it.
|
|
||||||
It helps to solve issues with python dateutils 2.5.3 parser changes.
|
|
||||||
|
|
||||||
:param groups: match groups found for the date
|
|
||||||
:type groups: list of match objects
|
|
||||||
:return: day_first option guessed value
|
|
||||||
:rtype: bool
|
|
||||||
"""
|
|
||||||
|
|
||||||
# If match starts with a long year, then day_first is force to false.
|
|
||||||
if _is_int(groups[0]) and valid_year(int(groups[0][:4])):
|
|
||||||
return False
|
|
||||||
# If match ends with a long year, the day_first is forced to true.
|
|
||||||
if _is_int(groups[-1]) and valid_year(int(groups[-1][-4:])):
|
|
||||||
return True
|
|
||||||
# If match starts with a short year, then day_first is force to false.
|
|
||||||
if _is_int(groups[0]) and int(groups[0][:2]) > 31:
|
|
||||||
return False
|
|
||||||
# If match ends with a short year, then day_first is force to true.
|
|
||||||
if _is_int(groups[-1]) and int(groups[-1][-2:]) > 31:
|
|
||||||
return True
|
|
||||||
|
|
||||||
|
|
||||||
def search_date(string, year_first=None, day_first=None): # pylint:disable=inconsistent-return-statements
|
|
||||||
"""Looks for date patterns, and if found return the date and group span.
|
|
||||||
|
|
||||||
Assumes there are sentinels at the beginning and end of the string that
|
|
||||||
always allow matching a non-digit delimiting the date.
|
|
||||||
|
|
||||||
Year can be defined on two digit only. It will return the nearest possible
|
|
||||||
date from today.
|
|
||||||
|
|
||||||
>>> search_date(' This happened on 2002-04-22. ')
|
|
||||||
(18, 28, datetime.date(2002, 4, 22))
|
|
||||||
|
|
||||||
>>> search_date(' And this on 17-06-1998. ')
|
|
||||||
(13, 23, datetime.date(1998, 6, 17))
|
|
||||||
|
|
||||||
>>> search_date(' no date in here ')
|
|
||||||
"""
|
|
||||||
for date_re in date_regexps:
|
|
||||||
search_match = date_re.search(string)
|
|
||||||
if not search_match:
|
|
||||||
continue
|
|
||||||
|
|
||||||
start, end = search_match.start(1), search_match.end(1)
|
|
||||||
groups = search_match.groups()[1:]
|
|
||||||
match = '-'.join(groups)
|
|
||||||
|
|
||||||
if match is None:
|
|
||||||
continue
|
|
||||||
|
|
||||||
if year_first and day_first is None:
|
|
||||||
day_first = False
|
|
||||||
|
|
||||||
if day_first is None:
|
|
||||||
day_first = _guess_day_first_parameter(groups)
|
|
||||||
|
|
||||||
# If day_first/year_first is undefined, parse is made using both possible values.
|
|
||||||
yearfirst_opts = [False, True]
|
|
||||||
if year_first is not None:
|
|
||||||
yearfirst_opts = [year_first]
|
|
||||||
|
|
||||||
dayfirst_opts = [True, False]
|
|
||||||
if day_first is not None:
|
|
||||||
dayfirst_opts = [day_first]
|
|
||||||
|
|
||||||
kwargs_list = ({'dayfirst': d, 'yearfirst': y}
|
|
||||||
for d in dayfirst_opts for y in yearfirst_opts)
|
|
||||||
for kwargs in kwargs_list:
|
|
||||||
try:
|
|
||||||
date = parser.parse(match, **kwargs)
|
|
||||||
except (ValueError, TypeError): # pragma: no cover
|
|
||||||
# see https://bugs.launchpad.net/dateutil/+bug/1247643
|
|
||||||
date = None
|
|
||||||
|
|
||||||
# check date plausibility
|
|
||||||
if date and valid_year(date.year): # pylint:disable=no-member
|
|
||||||
return start, end, date.date() # pylint:disable=no-member
|
|
||||||
@@ -1,53 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
"""
|
|
||||||
Expected property factory
|
|
||||||
"""
|
|
||||||
import re
|
|
||||||
|
|
||||||
from rebulk import Rebulk
|
|
||||||
from rebulk.utils import find_all
|
|
||||||
|
|
||||||
from . import dash, seps
|
|
||||||
|
|
||||||
|
|
||||||
def build_expected_function(context_key):
|
|
||||||
"""
|
|
||||||
Creates a expected property function
|
|
||||||
:param context_key:
|
|
||||||
:type context_key:
|
|
||||||
:param cleanup:
|
|
||||||
:type cleanup:
|
|
||||||
:return:
|
|
||||||
:rtype:
|
|
||||||
"""
|
|
||||||
|
|
||||||
def expected(input_string, context):
|
|
||||||
"""
|
|
||||||
Expected property functional pattern.
|
|
||||||
:param input_string:
|
|
||||||
:type input_string:
|
|
||||||
:param context:
|
|
||||||
:type context:
|
|
||||||
:return:
|
|
||||||
:rtype:
|
|
||||||
"""
|
|
||||||
ret = []
|
|
||||||
for search in context.get(context_key):
|
|
||||||
if search.startswith('re:'):
|
|
||||||
search = search[3:]
|
|
||||||
search = search.replace(' ', '-')
|
|
||||||
matches = Rebulk().regex(search, abbreviations=[dash], flags=re.IGNORECASE) \
|
|
||||||
.matches(input_string, context)
|
|
||||||
for match in matches:
|
|
||||||
ret.append(match.span)
|
|
||||||
else:
|
|
||||||
value = search
|
|
||||||
for sep in seps:
|
|
||||||
input_string = input_string.replace(sep, ' ')
|
|
||||||
search = search.replace(sep, ' ')
|
|
||||||
for start in find_all(input_string, search, ignore_case=True):
|
|
||||||
ret.append({'start': start, 'end': start + len(search), 'value': value})
|
|
||||||
return ret
|
|
||||||
|
|
||||||
return expected
|
|
||||||
@@ -1,136 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
"""
|
|
||||||
Formatters
|
|
||||||
"""
|
|
||||||
from rebulk.formatters import formatters
|
|
||||||
from rebulk.remodule import re
|
|
||||||
from . import seps
|
|
||||||
|
|
||||||
_excluded_clean_chars = ',:;-/\\'
|
|
||||||
clean_chars = ""
|
|
||||||
for sep in seps:
|
|
||||||
if sep not in _excluded_clean_chars:
|
|
||||||
clean_chars += sep
|
|
||||||
|
|
||||||
|
|
||||||
def _potential_before(i, input_string):
|
|
||||||
"""
|
|
||||||
Check if the character at position i can be a potential single char separator considering what's before it.
|
|
||||||
|
|
||||||
:param i:
|
|
||||||
:type i: int
|
|
||||||
:param input_string:
|
|
||||||
:type input_string: str
|
|
||||||
:return:
|
|
||||||
:rtype: bool
|
|
||||||
"""
|
|
||||||
return i - 1 >= 0 and input_string[i] in seps and input_string[i - 2] in seps and input_string[i - 1] not in seps
|
|
||||||
|
|
||||||
|
|
||||||
def _potential_after(i, input_string):
|
|
||||||
"""
|
|
||||||
Check if the character at position i can be a potential single char separator considering what's after it.
|
|
||||||
|
|
||||||
:param i:
|
|
||||||
:type i: int
|
|
||||||
:param input_string:
|
|
||||||
:type input_string: str
|
|
||||||
:return:
|
|
||||||
:rtype: bool
|
|
||||||
"""
|
|
||||||
return i + 2 >= len(input_string) or \
|
|
||||||
input_string[i + 2] == input_string[i] and input_string[i + 1] not in seps
|
|
||||||
|
|
||||||
|
|
||||||
def cleanup(input_string):
|
|
||||||
"""
|
|
||||||
Removes and strip separators from input_string (but keep ',;' characters)
|
|
||||||
|
|
||||||
It also keep separators for single characters (Mavels Agents of S.H.I.E.L.D.)
|
|
||||||
|
|
||||||
:param input_string:
|
|
||||||
:type input_string: str
|
|
||||||
:return:
|
|
||||||
:rtype:
|
|
||||||
"""
|
|
||||||
clean_string = input_string
|
|
||||||
for char in clean_chars:
|
|
||||||
clean_string = clean_string.replace(char, ' ')
|
|
||||||
|
|
||||||
# Restore input separator if they separate single characters.
|
|
||||||
# Useful for Mavels Agents of S.H.I.E.L.D.
|
|
||||||
# https://github.com/guessit-io/guessit/issues/278
|
|
||||||
|
|
||||||
indices = [i for i, letter in enumerate(clean_string) if letter in seps]
|
|
||||||
|
|
||||||
dots = set()
|
|
||||||
if indices:
|
|
||||||
clean_list = list(clean_string)
|
|
||||||
|
|
||||||
potential_indices = []
|
|
||||||
|
|
||||||
for i in indices:
|
|
||||||
if _potential_before(i, input_string) and _potential_after(i, input_string):
|
|
||||||
potential_indices.append(i)
|
|
||||||
|
|
||||||
replace_indices = []
|
|
||||||
|
|
||||||
for potential_index in potential_indices:
|
|
||||||
if potential_index - 2 in potential_indices or potential_index + 2 in potential_indices:
|
|
||||||
replace_indices.append(potential_index)
|
|
||||||
|
|
||||||
if replace_indices:
|
|
||||||
for replace_index in replace_indices:
|
|
||||||
dots.add(input_string[replace_index])
|
|
||||||
clean_list[replace_index] = input_string[replace_index]
|
|
||||||
clean_string = ''.join(clean_list)
|
|
||||||
|
|
||||||
clean_string = strip(clean_string, ''.join([c for c in seps if c not in dots]))
|
|
||||||
|
|
||||||
clean_string = re.sub(' +', ' ', clean_string)
|
|
||||||
return clean_string
|
|
||||||
|
|
||||||
|
|
||||||
def strip(input_string, chars=seps):
|
|
||||||
"""
|
|
||||||
Strip separators from input_string
|
|
||||||
:param input_string:
|
|
||||||
:param chars:
|
|
||||||
:type input_string:
|
|
||||||
:return:
|
|
||||||
:rtype:
|
|
||||||
"""
|
|
||||||
return input_string.strip(chars)
|
|
||||||
|
|
||||||
|
|
||||||
def raw_cleanup(raw):
|
|
||||||
"""
|
|
||||||
Cleanup a raw value to perform raw comparison
|
|
||||||
:param raw:
|
|
||||||
:type raw:
|
|
||||||
:return:
|
|
||||||
:rtype:
|
|
||||||
"""
|
|
||||||
return formatters(cleanup, strip)(raw.lower())
|
|
||||||
|
|
||||||
|
|
||||||
def reorder_title(title, articles=('the',), separators=(',', ', ')):
|
|
||||||
"""
|
|
||||||
Reorder the title
|
|
||||||
:param title:
|
|
||||||
:type title:
|
|
||||||
:param articles:
|
|
||||||
:type articles:
|
|
||||||
:param separators:
|
|
||||||
:type separators:
|
|
||||||
:return:
|
|
||||||
:rtype:
|
|
||||||
"""
|
|
||||||
ltitle = title.lower()
|
|
||||||
for article in articles:
|
|
||||||
for separator in separators:
|
|
||||||
suffix = separator + article
|
|
||||||
if ltitle[-len(suffix):] == suffix:
|
|
||||||
return title[-len(suffix) + len(separator):] + ' ' + title[:-len(suffix)]
|
|
||||||
return title
|
|
||||||
@@ -1,165 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
"""
|
|
||||||
parse numeral from various formats
|
|
||||||
"""
|
|
||||||
from rebulk.remodule import re
|
|
||||||
|
|
||||||
digital_numeral = r'\d{1,4}'
|
|
||||||
|
|
||||||
roman_numeral = r'(?=[MCDLXVI]+)M{0,4}(?:CM|CD|D?C{0,3})(?:XC|XL|L?X{0,3})(?:IX|IV|V?I{0,3})'
|
|
||||||
|
|
||||||
english_word_numeral_list = [
|
|
||||||
'zero', 'one', 'two', 'three', 'four', 'five', 'six', 'seven', 'eight', 'nine', 'ten',
|
|
||||||
'eleven', 'twelve', 'thirteen', 'fourteen', 'fifteen', 'sixteen', 'seventeen', 'eighteen', 'nineteen', 'twenty'
|
|
||||||
]
|
|
||||||
|
|
||||||
french_word_numeral_list = [
|
|
||||||
'zéro', 'un', 'deux', 'trois', 'quatre', 'cinq', 'six', 'sept', 'huit', 'neuf', 'dix',
|
|
||||||
'onze', 'douze', 'treize', 'quatorze', 'quinze', 'seize', 'dix-sept', 'dix-huit', 'dix-neuf', 'vingt'
|
|
||||||
]
|
|
||||||
|
|
||||||
french_alt_word_numeral_list = [
|
|
||||||
'zero', 'une', 'deux', 'trois', 'quatre', 'cinq', 'six', 'sept', 'huit', 'neuf', 'dix',
|
|
||||||
'onze', 'douze', 'treize', 'quatorze', 'quinze', 'seize', 'dixsept', 'dixhuit', 'dixneuf', 'vingt'
|
|
||||||
]
|
|
||||||
|
|
||||||
|
|
||||||
def __build_word_numeral(*args):
|
|
||||||
"""
|
|
||||||
Build word numeral regexp from list.
|
|
||||||
|
|
||||||
:param args:
|
|
||||||
:type args:
|
|
||||||
:param kwargs:
|
|
||||||
:type kwargs:
|
|
||||||
:return:
|
|
||||||
:rtype:
|
|
||||||
"""
|
|
||||||
re_ = None
|
|
||||||
for word_list in args:
|
|
||||||
for word in word_list:
|
|
||||||
if not re_:
|
|
||||||
re_ = r'(?:(?=\w+)'
|
|
||||||
else:
|
|
||||||
re_ += '|'
|
|
||||||
re_ += word
|
|
||||||
re_ += ')'
|
|
||||||
return re_
|
|
||||||
|
|
||||||
|
|
||||||
word_numeral = __build_word_numeral(english_word_numeral_list, french_word_numeral_list, french_alt_word_numeral_list)
|
|
||||||
|
|
||||||
numeral = '(?:' + digital_numeral + '|' + roman_numeral + '|' + word_numeral + ')'
|
|
||||||
|
|
||||||
__romanNumeralMap = (
|
|
||||||
('M', 1000),
|
|
||||||
('CM', 900),
|
|
||||||
('D', 500),
|
|
||||||
('CD', 400),
|
|
||||||
('C', 100),
|
|
||||||
('XC', 90),
|
|
||||||
('L', 50),
|
|
||||||
('XL', 40),
|
|
||||||
('X', 10),
|
|
||||||
('IX', 9),
|
|
||||||
('V', 5),
|
|
||||||
('IV', 4),
|
|
||||||
('I', 1)
|
|
||||||
)
|
|
||||||
|
|
||||||
__romanNumeralPattern = re.compile('^' + roman_numeral + '$')
|
|
||||||
|
|
||||||
|
|
||||||
def __parse_roman(value):
|
|
||||||
"""
|
|
||||||
convert Roman numeral to integer
|
|
||||||
|
|
||||||
:param value: Value to parse
|
|
||||||
:type value: string
|
|
||||||
:return:
|
|
||||||
:rtype:
|
|
||||||
"""
|
|
||||||
if not __romanNumeralPattern.search(value):
|
|
||||||
raise ValueError('Invalid Roman numeral: %s' % value)
|
|
||||||
|
|
||||||
result = 0
|
|
||||||
index = 0
|
|
||||||
for num, integer in __romanNumeralMap:
|
|
||||||
while value[index:index + len(num)] == num:
|
|
||||||
result += integer
|
|
||||||
index += len(num)
|
|
||||||
return result
|
|
||||||
|
|
||||||
|
|
||||||
def __parse_word(value):
|
|
||||||
"""
|
|
||||||
Convert Word numeral to integer
|
|
||||||
|
|
||||||
:param value: Value to parse
|
|
||||||
:type value: string
|
|
||||||
:return:
|
|
||||||
:rtype:
|
|
||||||
"""
|
|
||||||
for word_list in [english_word_numeral_list, french_word_numeral_list, french_alt_word_numeral_list]:
|
|
||||||
try:
|
|
||||||
return word_list.index(value.lower())
|
|
||||||
except ValueError:
|
|
||||||
pass
|
|
||||||
raise ValueError # pragma: no cover
|
|
||||||
|
|
||||||
|
|
||||||
_clean_re = re.compile(r'[^\d]*(\d+)[^\d]*')
|
|
||||||
|
|
||||||
|
|
||||||
def parse_numeral(value, int_enabled=True, roman_enabled=True, word_enabled=True, clean=True):
|
|
||||||
"""
|
|
||||||
Parse a numeric value into integer.
|
|
||||||
|
|
||||||
:param value: Value to parse. Can be an integer, roman numeral or word.
|
|
||||||
:type value: string
|
|
||||||
:param int_enabled:
|
|
||||||
:type int_enabled:
|
|
||||||
:param roman_enabled:
|
|
||||||
:type roman_enabled:
|
|
||||||
:param word_enabled:
|
|
||||||
:type word_enabled:
|
|
||||||
:param clean:
|
|
||||||
:type clean:
|
|
||||||
:return: Numeric value, or None if value can't be parsed
|
|
||||||
:rtype: int
|
|
||||||
"""
|
|
||||||
# pylint: disable=too-many-branches
|
|
||||||
if int_enabled:
|
|
||||||
try:
|
|
||||||
if clean:
|
|
||||||
match = _clean_re.match(value)
|
|
||||||
if match:
|
|
||||||
clean_value = match.group(1)
|
|
||||||
return int(clean_value)
|
|
||||||
return int(value)
|
|
||||||
except ValueError:
|
|
||||||
pass
|
|
||||||
if roman_enabled:
|
|
||||||
try:
|
|
||||||
if clean:
|
|
||||||
for word in value.split():
|
|
||||||
try:
|
|
||||||
return __parse_roman(word.upper())
|
|
||||||
except ValueError:
|
|
||||||
pass
|
|
||||||
return __parse_roman(value)
|
|
||||||
except ValueError:
|
|
||||||
pass
|
|
||||||
if word_enabled:
|
|
||||||
try:
|
|
||||||
if clean:
|
|
||||||
for word in value.split():
|
|
||||||
try:
|
|
||||||
return __parse_word(word)
|
|
||||||
except ValueError: # pragma: no cover
|
|
||||||
pass
|
|
||||||
return __parse_word(value) # pragma: no cover
|
|
||||||
except ValueError: # pragma: no cover
|
|
||||||
pass
|
|
||||||
raise ValueError('Invalid numeral: ' + value) # pragma: no cover
|
|
||||||
@@ -1,27 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
"""
|
|
||||||
Pattern utility functions
|
|
||||||
"""
|
|
||||||
|
|
||||||
|
|
||||||
def is_disabled(context, name):
|
|
||||||
"""Whether a specific pattern is disabled.
|
|
||||||
|
|
||||||
The context object might define an inclusion list (includes) or an exclusion list (excludes)
|
|
||||||
A pattern is considered disabled if it's found in the exclusion list or
|
|
||||||
it's not found in the inclusion list and the inclusion list is not empty or not defined.
|
|
||||||
|
|
||||||
:param context:
|
|
||||||
:param name:
|
|
||||||
:return:
|
|
||||||
"""
|
|
||||||
if not context:
|
|
||||||
return False
|
|
||||||
|
|
||||||
excludes = context.get('excludes')
|
|
||||||
if excludes and name in excludes:
|
|
||||||
return True
|
|
||||||
|
|
||||||
includes = context.get('includes')
|
|
||||||
return includes and name not in includes
|
|
||||||
@@ -1,106 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
"""
|
|
||||||
Quantities: Size
|
|
||||||
"""
|
|
||||||
import re
|
|
||||||
from abc import abstractmethod
|
|
||||||
|
|
||||||
import six
|
|
||||||
|
|
||||||
from ..common import seps
|
|
||||||
|
|
||||||
|
|
||||||
class Quantity(object):
|
|
||||||
"""
|
|
||||||
Represent a quantity object with magnitude and units.
|
|
||||||
"""
|
|
||||||
|
|
||||||
parser_re = re.compile(r'(?P<magnitude>\d+(?:[.]\d+)?)(?P<units>[^\d]+)?')
|
|
||||||
|
|
||||||
def __init__(self, magnitude, units):
|
|
||||||
self.magnitude = magnitude
|
|
||||||
self.units = units
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
@abstractmethod
|
|
||||||
def parse_units(cls, value):
|
|
||||||
"""
|
|
||||||
Parse a string to a proper unit notation.
|
|
||||||
"""
|
|
||||||
raise NotImplementedError
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def fromstring(cls, string):
|
|
||||||
"""
|
|
||||||
Parse the string into a quantity object.
|
|
||||||
:param string:
|
|
||||||
:return:
|
|
||||||
"""
|
|
||||||
values = cls.parser_re.match(string).groupdict()
|
|
||||||
try:
|
|
||||||
magnitude = int(values['magnitude'])
|
|
||||||
except ValueError:
|
|
||||||
magnitude = float(values['magnitude'])
|
|
||||||
units = cls.parse_units(values['units'])
|
|
||||||
|
|
||||||
return cls(magnitude, units)
|
|
||||||
|
|
||||||
def __hash__(self):
|
|
||||||
return hash(str(self))
|
|
||||||
|
|
||||||
def __eq__(self, other):
|
|
||||||
if isinstance(other, six.string_types):
|
|
||||||
return str(self) == other
|
|
||||||
if not isinstance(other, self.__class__):
|
|
||||||
return NotImplemented
|
|
||||||
return self.magnitude == other.magnitude and self.units == other.units
|
|
||||||
|
|
||||||
def __ne__(self, other):
|
|
||||||
return not self == other
|
|
||||||
|
|
||||||
def __repr__(self):
|
|
||||||
return '<{0} [{1}]>'.format(self.__class__.__name__, self)
|
|
||||||
|
|
||||||
def __str__(self):
|
|
||||||
return '{0}{1}'.format(self.magnitude, self.units)
|
|
||||||
|
|
||||||
|
|
||||||
class Size(Quantity):
|
|
||||||
"""
|
|
||||||
Represent size.
|
|
||||||
|
|
||||||
e.g.: 1.1GB, 300MB
|
|
||||||
"""
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def parse_units(cls, value):
|
|
||||||
return value.strip(seps).upper()
|
|
||||||
|
|
||||||
|
|
||||||
class BitRate(Quantity):
|
|
||||||
"""
|
|
||||||
Represent bit rate.
|
|
||||||
|
|
||||||
e.g.: 320Kbps, 1.5Mbps
|
|
||||||
"""
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def parse_units(cls, value):
|
|
||||||
value = value.strip(seps).capitalize()
|
|
||||||
for token in ('bits', 'bit'):
|
|
||||||
value = value.replace(token, 'bps')
|
|
||||||
|
|
||||||
return value
|
|
||||||
|
|
||||||
|
|
||||||
class FrameRate(Quantity):
|
|
||||||
"""
|
|
||||||
Represent frame rate.
|
|
||||||
|
|
||||||
e.g.: 24fps, 60fps
|
|
||||||
"""
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def parse_units(cls, value):
|
|
||||||
return 'fps'
|
|
||||||
@@ -1,74 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
"""
|
|
||||||
Validators
|
|
||||||
"""
|
|
||||||
from functools import partial
|
|
||||||
|
|
||||||
from rebulk.validators import chars_before, chars_after, chars_surround
|
|
||||||
from . import seps
|
|
||||||
|
|
||||||
seps_before = partial(chars_before, seps)
|
|
||||||
seps_after = partial(chars_after, seps)
|
|
||||||
seps_surround = partial(chars_surround, seps)
|
|
||||||
|
|
||||||
|
|
||||||
def int_coercable(string):
|
|
||||||
"""
|
|
||||||
Check if string can be coerced to int
|
|
||||||
:param string:
|
|
||||||
:type string:
|
|
||||||
:return:
|
|
||||||
:rtype:
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
int(string)
|
|
||||||
return True
|
|
||||||
except ValueError:
|
|
||||||
return False
|
|
||||||
|
|
||||||
|
|
||||||
def and_(*validators):
|
|
||||||
"""
|
|
||||||
Compose validators functions
|
|
||||||
:param validators:
|
|
||||||
:type validators:
|
|
||||||
:return:
|
|
||||||
:rtype:
|
|
||||||
"""
|
|
||||||
def composed(string):
|
|
||||||
"""
|
|
||||||
Composed validators function
|
|
||||||
:param string:
|
|
||||||
:type string:
|
|
||||||
:return:
|
|
||||||
:rtype:
|
|
||||||
"""
|
|
||||||
for validator in validators:
|
|
||||||
if not validator(string):
|
|
||||||
return False
|
|
||||||
return True
|
|
||||||
return composed
|
|
||||||
|
|
||||||
|
|
||||||
def or_(*validators):
|
|
||||||
"""
|
|
||||||
Compose validators functions
|
|
||||||
:param validators:
|
|
||||||
:type validators:
|
|
||||||
:return:
|
|
||||||
:rtype:
|
|
||||||
"""
|
|
||||||
def composed(string):
|
|
||||||
"""
|
|
||||||
Composed validators function
|
|
||||||
:param string:
|
|
||||||
:type string:
|
|
||||||
:return:
|
|
||||||
:rtype:
|
|
||||||
"""
|
|
||||||
for validator in validators:
|
|
||||||
if validator(string):
|
|
||||||
return True
|
|
||||||
return False
|
|
||||||
return composed
|
|
||||||
@@ -1,34 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
"""
|
|
||||||
Words utils
|
|
||||||
"""
|
|
||||||
from collections import namedtuple
|
|
||||||
|
|
||||||
from . import seps
|
|
||||||
|
|
||||||
_Word = namedtuple('_Word', ['span', 'value'])
|
|
||||||
|
|
||||||
|
|
||||||
def iter_words(string):
|
|
||||||
"""
|
|
||||||
Iterate on all words in a string
|
|
||||||
:param string:
|
|
||||||
:type string:
|
|
||||||
:return:
|
|
||||||
:rtype: iterable[str]
|
|
||||||
"""
|
|
||||||
i = 0
|
|
||||||
last_sep_index = -1
|
|
||||||
inside_word = False
|
|
||||||
for char in string:
|
|
||||||
if ord(char) < 128 and char in seps: # Make sure we don't exclude unicode characters.
|
|
||||||
if inside_word:
|
|
||||||
yield _Word(span=(last_sep_index+1, i), value=string[last_sep_index+1:i])
|
|
||||||
inside_word = False
|
|
||||||
last_sep_index = i
|
|
||||||
else:
|
|
||||||
inside_word = True
|
|
||||||
i += 1
|
|
||||||
if inside_word:
|
|
||||||
yield _Word(span=(last_sep_index+1, i), value=string[last_sep_index+1:i])
|
|
||||||
@@ -1,5 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
"""
|
|
||||||
Markers
|
|
||||||
"""
|
|
||||||
@@ -1,52 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
"""
|
|
||||||
Groups markers (...), [...] and {...}
|
|
||||||
"""
|
|
||||||
from rebulk import Rebulk
|
|
||||||
|
|
||||||
|
|
||||||
def groups(config):
|
|
||||||
"""
|
|
||||||
Builder for rebulk object.
|
|
||||||
|
|
||||||
:param config: rule configuration
|
|
||||||
:type config: dict
|
|
||||||
:return: Created Rebulk object
|
|
||||||
:rtype: Rebulk
|
|
||||||
"""
|
|
||||||
rebulk = Rebulk()
|
|
||||||
rebulk.defaults(name="group", marker=True)
|
|
||||||
|
|
||||||
starting = config['starting']
|
|
||||||
ending = config['ending']
|
|
||||||
|
|
||||||
def mark_groups(input_string):
|
|
||||||
"""
|
|
||||||
Functional pattern to mark groups (...), [...] and {...}.
|
|
||||||
|
|
||||||
:param input_string:
|
|
||||||
:return:
|
|
||||||
"""
|
|
||||||
openings = ([], [], [])
|
|
||||||
i = 0
|
|
||||||
|
|
||||||
ret = []
|
|
||||||
for char in input_string:
|
|
||||||
start_type = starting.find(char)
|
|
||||||
if start_type > -1:
|
|
||||||
openings[start_type].append(i)
|
|
||||||
|
|
||||||
i += 1
|
|
||||||
|
|
||||||
end_type = ending.find(char)
|
|
||||||
if end_type > -1:
|
|
||||||
try:
|
|
||||||
start_index = openings[end_type].pop()
|
|
||||||
ret.append((start_index, i))
|
|
||||||
except IndexError:
|
|
||||||
pass
|
|
||||||
return ret
|
|
||||||
|
|
||||||
rebulk.functional(mark_groups)
|
|
||||||
return rebulk
|
|
||||||
@@ -1,47 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
"""
|
|
||||||
Path markers
|
|
||||||
"""
|
|
||||||
from rebulk import Rebulk
|
|
||||||
|
|
||||||
from rebulk.utils import find_all
|
|
||||||
|
|
||||||
|
|
||||||
def path(config): # pylint:disable=unused-argument
|
|
||||||
"""
|
|
||||||
Builder for rebulk object.
|
|
||||||
|
|
||||||
:param config: rule configuration
|
|
||||||
:type config: dict
|
|
||||||
:return: Created Rebulk object
|
|
||||||
:rtype: Rebulk
|
|
||||||
"""
|
|
||||||
rebulk = Rebulk()
|
|
||||||
rebulk.defaults(name="path", marker=True)
|
|
||||||
|
|
||||||
def mark_path(input_string, context):
|
|
||||||
"""
|
|
||||||
Functional pattern to mark path elements.
|
|
||||||
|
|
||||||
:param input_string:
|
|
||||||
:param context:
|
|
||||||
:return:
|
|
||||||
"""
|
|
||||||
ret = []
|
|
||||||
if context.get('name_only', False):
|
|
||||||
ret.append((0, len(input_string)))
|
|
||||||
else:
|
|
||||||
indices = list(find_all(input_string, '/'))
|
|
||||||
indices += list(find_all(input_string, '\\'))
|
|
||||||
indices += [-1, len(input_string)]
|
|
||||||
|
|
||||||
indices.sort()
|
|
||||||
|
|
||||||
for i in range(0, len(indices) - 1):
|
|
||||||
ret.append((indices[i] + 1, indices[i + 1]))
|
|
||||||
|
|
||||||
return ret
|
|
||||||
|
|
||||||
rebulk.functional(mark_path)
|
|
||||||
return rebulk
|
|
||||||
@@ -1,20 +0,0 @@
|
|||||||
"""
|
|
||||||
Match processors
|
|
||||||
"""
|
|
||||||
from guessit.rules.common import seps
|
|
||||||
|
|
||||||
|
|
||||||
def strip(match, chars=seps):
|
|
||||||
"""
|
|
||||||
Strip given characters from match.
|
|
||||||
|
|
||||||
:param chars:
|
|
||||||
:param match:
|
|
||||||
:return:
|
|
||||||
"""
|
|
||||||
while match.input_string[match.start] in chars:
|
|
||||||
match.start += 1
|
|
||||||
while match.input_string[match.end - 1] in chars:
|
|
||||||
match.end -= 1
|
|
||||||
if not match:
|
|
||||||
return False
|
|
||||||
@@ -1,259 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
"""
|
|
||||||
Processors
|
|
||||||
"""
|
|
||||||
from collections import defaultdict
|
|
||||||
import copy
|
|
||||||
|
|
||||||
import six
|
|
||||||
|
|
||||||
from rebulk import Rebulk, Rule, CustomRule, POST_PROCESS, PRE_PROCESS, AppendMatch, RemoveMatch
|
|
||||||
|
|
||||||
from .common import seps_no_groups
|
|
||||||
from .common.formatters import cleanup
|
|
||||||
from .common.comparators import marker_sorted
|
|
||||||
from .common.date import valid_year
|
|
||||||
from .common.words import iter_words
|
|
||||||
|
|
||||||
|
|
||||||
class EnlargeGroupMatches(CustomRule):
|
|
||||||
"""
|
|
||||||
Enlarge matches that are starting and/or ending group to include brackets in their span.
|
|
||||||
"""
|
|
||||||
priority = PRE_PROCESS
|
|
||||||
|
|
||||||
def when(self, matches, context):
|
|
||||||
starting = []
|
|
||||||
ending = []
|
|
||||||
|
|
||||||
for group in matches.markers.named('group'):
|
|
||||||
for match in matches.starting(group.start + 1):
|
|
||||||
starting.append(match)
|
|
||||||
|
|
||||||
for match in matches.ending(group.end - 1):
|
|
||||||
ending.append(match)
|
|
||||||
|
|
||||||
if starting or ending:
|
|
||||||
return starting, ending
|
|
||||||
return False
|
|
||||||
|
|
||||||
def then(self, matches, when_response, context):
|
|
||||||
starting, ending = when_response
|
|
||||||
for match in starting:
|
|
||||||
matches.remove(match)
|
|
||||||
match.start -= 1
|
|
||||||
match.raw_start += 1
|
|
||||||
matches.append(match)
|
|
||||||
|
|
||||||
for match in ending:
|
|
||||||
matches.remove(match)
|
|
||||||
match.end += 1
|
|
||||||
match.raw_end -= 1
|
|
||||||
matches.append(match)
|
|
||||||
|
|
||||||
|
|
||||||
class EquivalentHoles(Rule):
|
|
||||||
"""
|
|
||||||
Creates equivalent matches for holes that have same values than existing (case insensitive)
|
|
||||||
"""
|
|
||||||
priority = POST_PROCESS
|
|
||||||
consequence = AppendMatch
|
|
||||||
|
|
||||||
def when(self, matches, context):
|
|
||||||
new_matches = []
|
|
||||||
|
|
||||||
for filepath in marker_sorted(matches.markers.named('path'), matches):
|
|
||||||
holes = matches.holes(start=filepath.start, end=filepath.end, formatter=cleanup)
|
|
||||||
for name in matches.names:
|
|
||||||
for hole in list(holes):
|
|
||||||
for current_match in matches.named(name):
|
|
||||||
if isinstance(current_match.value, six.string_types) and \
|
|
||||||
hole.value.lower() == current_match.value.lower():
|
|
||||||
if 'equivalent-ignore' in current_match.tags:
|
|
||||||
continue
|
|
||||||
new_value = _preferred_string(hole.value, current_match.value)
|
|
||||||
if hole.value != new_value:
|
|
||||||
hole.value = new_value
|
|
||||||
if current_match.value != new_value:
|
|
||||||
current_match.value = new_value
|
|
||||||
hole.name = name
|
|
||||||
hole.tags = ['equivalent']
|
|
||||||
new_matches.append(hole)
|
|
||||||
if hole in holes:
|
|
||||||
holes.remove(hole)
|
|
||||||
|
|
||||||
return new_matches
|
|
||||||
|
|
||||||
|
|
||||||
class RemoveAmbiguous(Rule):
|
|
||||||
"""
|
|
||||||
If multiple matches are found with same name and different values, keep the one in the most valuable filepart.
|
|
||||||
Also keep others match with same name and values than those kept ones.
|
|
||||||
"""
|
|
||||||
|
|
||||||
priority = POST_PROCESS
|
|
||||||
consequence = RemoveMatch
|
|
||||||
|
|
||||||
def __init__(self, sort_function=marker_sorted, predicate=None):
|
|
||||||
super(RemoveAmbiguous, self).__init__()
|
|
||||||
self.sort_function = sort_function
|
|
||||||
self.predicate = predicate
|
|
||||||
|
|
||||||
def when(self, matches, context):
|
|
||||||
fileparts = self.sort_function(matches.markers.named('path'), matches)
|
|
||||||
|
|
||||||
previous_fileparts_names = set()
|
|
||||||
values = defaultdict(list)
|
|
||||||
|
|
||||||
to_remove = []
|
|
||||||
for filepart in fileparts:
|
|
||||||
filepart_matches = matches.range(filepart.start, filepart.end, predicate=self.predicate)
|
|
||||||
|
|
||||||
filepart_names = set()
|
|
||||||
for match in filepart_matches:
|
|
||||||
filepart_names.add(match.name)
|
|
||||||
if match.name in previous_fileparts_names:
|
|
||||||
if match.value not in values[match.name]:
|
|
||||||
to_remove.append(match)
|
|
||||||
else:
|
|
||||||
if match.value not in values[match.name]:
|
|
||||||
values[match.name].append(match.value)
|
|
||||||
|
|
||||||
previous_fileparts_names.update(filepart_names)
|
|
||||||
|
|
||||||
return to_remove
|
|
||||||
|
|
||||||
|
|
||||||
class RemoveLessSpecificSeasonEpisode(RemoveAmbiguous):
|
|
||||||
"""
|
|
||||||
If multiple season/episodes matches are found with different values,
|
|
||||||
keep the one tagged as 'SxxExx' or in the rightmost filepart.
|
|
||||||
"""
|
|
||||||
def __init__(self, name):
|
|
||||||
super(RemoveLessSpecificSeasonEpisode, self).__init__(
|
|
||||||
sort_function=(lambda markers, matches:
|
|
||||||
marker_sorted(list(reversed(markers)), matches,
|
|
||||||
lambda match: match.name == name and 'SxxExx' in match.tags)),
|
|
||||||
predicate=lambda match: match.name == name)
|
|
||||||
|
|
||||||
|
|
||||||
def _preferred_string(value1, value2): # pylint:disable=too-many-return-statements
|
|
||||||
"""
|
|
||||||
Retrieves preferred title from both values.
|
|
||||||
:param value1:
|
|
||||||
:type value1: str
|
|
||||||
:param value2:
|
|
||||||
:type value2: str
|
|
||||||
:return: The preferred title
|
|
||||||
:rtype: str
|
|
||||||
"""
|
|
||||||
if value1 == value2:
|
|
||||||
return value1
|
|
||||||
if value1.istitle() and not value2.istitle():
|
|
||||||
return value1
|
|
||||||
if not value1.isupper() and value2.isupper():
|
|
||||||
return value1
|
|
||||||
if not value1.isupper() and value1[0].isupper() and not value2[0].isupper():
|
|
||||||
return value1
|
|
||||||
if _count_title_words(value1) > _count_title_words(value2):
|
|
||||||
return value1
|
|
||||||
return value2
|
|
||||||
|
|
||||||
|
|
||||||
def _count_title_words(value):
|
|
||||||
"""
|
|
||||||
Count only many words are titles in value.
|
|
||||||
:param value:
|
|
||||||
:type value:
|
|
||||||
:return:
|
|
||||||
:rtype:
|
|
||||||
"""
|
|
||||||
ret = 0
|
|
||||||
for word in iter_words(value):
|
|
||||||
if word.value.istitle():
|
|
||||||
ret += 1
|
|
||||||
return ret
|
|
||||||
|
|
||||||
|
|
||||||
class SeasonYear(Rule):
|
|
||||||
"""
|
|
||||||
If a season is a valid year and no year was found, create an match with year.
|
|
||||||
"""
|
|
||||||
priority = POST_PROCESS
|
|
||||||
consequence = AppendMatch
|
|
||||||
|
|
||||||
def when(self, matches, context):
|
|
||||||
ret = []
|
|
||||||
if not matches.named('year'):
|
|
||||||
for season in matches.named('season'):
|
|
||||||
if valid_year(season.value):
|
|
||||||
year = copy.copy(season)
|
|
||||||
year.name = 'year'
|
|
||||||
ret.append(year)
|
|
||||||
return ret
|
|
||||||
|
|
||||||
|
|
||||||
class YearSeason(Rule):
|
|
||||||
"""
|
|
||||||
If a year is found, no season found, and episode is found, create an match with season.
|
|
||||||
"""
|
|
||||||
priority = POST_PROCESS
|
|
||||||
consequence = AppendMatch
|
|
||||||
|
|
||||||
def when(self, matches, context):
|
|
||||||
ret = []
|
|
||||||
if not matches.named('season') and matches.named('episode'):
|
|
||||||
for year in matches.named('year'):
|
|
||||||
season = copy.copy(year)
|
|
||||||
season.name = 'season'
|
|
||||||
ret.append(season)
|
|
||||||
return ret
|
|
||||||
|
|
||||||
|
|
||||||
class Processors(CustomRule):
|
|
||||||
"""
|
|
||||||
Empty rule for ordering post_processing properly.
|
|
||||||
"""
|
|
||||||
priority = POST_PROCESS
|
|
||||||
|
|
||||||
def when(self, matches, context):
|
|
||||||
pass
|
|
||||||
|
|
||||||
def then(self, matches, when_response, context): # pragma: no cover
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
class StripSeparators(CustomRule):
|
|
||||||
"""
|
|
||||||
Strip separators from matches. Keep separators if they are from acronyms, like in ".S.H.I.E.L.D."
|
|
||||||
"""
|
|
||||||
priority = POST_PROCESS
|
|
||||||
|
|
||||||
def when(self, matches, context):
|
|
||||||
return matches
|
|
||||||
|
|
||||||
def then(self, matches, when_response, context): # pragma: no cover
|
|
||||||
for match in matches:
|
|
||||||
for _ in range(0, len(match.span)):
|
|
||||||
if match.raw[0] in seps_no_groups and (len(match.raw) < 3 or match.raw[2] not in seps_no_groups):
|
|
||||||
match.raw_start += 1
|
|
||||||
|
|
||||||
for _ in reversed(range(0, len(match.span))):
|
|
||||||
if match.raw[-1] in seps_no_groups and (len(match.raw) < 3 or match.raw[-3] not in seps_no_groups):
|
|
||||||
match.raw_end -= 1
|
|
||||||
|
|
||||||
|
|
||||||
def processors(config): # pylint:disable=unused-argument
|
|
||||||
"""
|
|
||||||
Builder for rebulk object.
|
|
||||||
|
|
||||||
:param config: rule configuration
|
|
||||||
:type config: dict
|
|
||||||
:return: Created Rebulk object
|
|
||||||
:rtype: Rebulk
|
|
||||||
"""
|
|
||||||
return Rebulk().rules(EnlargeGroupMatches, EquivalentHoles,
|
|
||||||
RemoveLessSpecificSeasonEpisode('season'),
|
|
||||||
RemoveLessSpecificSeasonEpisode('episode'),
|
|
||||||
RemoveAmbiguous, SeasonYear, YearSeason, Processors, StripSeparators)
|
|
||||||
@@ -1,5 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
"""
|
|
||||||
Properties
|
|
||||||
"""
|
|
||||||
@@ -1,235 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
"""
|
|
||||||
audio_codec, audio_profile and audio_channels property
|
|
||||||
"""
|
|
||||||
from rebulk import Rebulk, Rule, RemoveMatch
|
|
||||||
from rebulk.remodule import re
|
|
||||||
|
|
||||||
from ..common import dash
|
|
||||||
from ..common.pattern import is_disabled
|
|
||||||
from ..common.validators import seps_before, seps_after
|
|
||||||
|
|
||||||
audio_properties = ['audio_codec', 'audio_profile', 'audio_channels']
|
|
||||||
|
|
||||||
|
|
||||||
def audio_codec(config): # pylint:disable=unused-argument
|
|
||||||
"""
|
|
||||||
Builder for rebulk object.
|
|
||||||
|
|
||||||
:param config: rule configuration
|
|
||||||
:type config: dict
|
|
||||||
:return: Created Rebulk object
|
|
||||||
:rtype: Rebulk
|
|
||||||
"""
|
|
||||||
rebulk = Rebulk()\
|
|
||||||
.regex_defaults(flags=re.IGNORECASE, abbreviations=[dash])\
|
|
||||||
.string_defaults(ignore_case=True)
|
|
||||||
|
|
||||||
def audio_codec_priority(match1, match2):
|
|
||||||
"""
|
|
||||||
Gives priority to audio_codec
|
|
||||||
:param match1:
|
|
||||||
:type match1:
|
|
||||||
:param match2:
|
|
||||||
:type match2:
|
|
||||||
:return:
|
|
||||||
:rtype:
|
|
||||||
"""
|
|
||||||
if match1.name == 'audio_codec' and match2.name in ['audio_profile', 'audio_channels']:
|
|
||||||
return match2
|
|
||||||
if match1.name in ['audio_profile', 'audio_channels'] and match2.name == 'audio_codec':
|
|
||||||
return match1
|
|
||||||
return '__default__'
|
|
||||||
|
|
||||||
rebulk.defaults(name='audio_codec',
|
|
||||||
conflict_solver=audio_codec_priority,
|
|
||||||
disabled=lambda context: is_disabled(context, 'audio_codec'))
|
|
||||||
|
|
||||||
rebulk.regex("MP3", "LAME", r"LAME(?:\d)+-?(?:\d)+", value="MP3")
|
|
||||||
rebulk.string("MP2", value="MP2")
|
|
||||||
rebulk.regex('Dolby', 'DolbyDigital', 'Dolby-Digital', 'DD', 'AC3D?', value='Dolby Digital')
|
|
||||||
rebulk.regex('Dolby-?Atmos', 'Atmos', value='Dolby Atmos')
|
|
||||||
rebulk.string("AAC", value="AAC")
|
|
||||||
rebulk.string('EAC3', 'DDP', 'DD+', value='Dolby Digital Plus')
|
|
||||||
rebulk.string("Flac", value="FLAC")
|
|
||||||
rebulk.string("DTS", value="DTS")
|
|
||||||
rebulk.regex('DTS-?HD', 'DTS(?=-?MA)', value='DTS-HD',
|
|
||||||
conflict_solver=lambda match, other: other if other.name == 'audio_codec' else '__default__')
|
|
||||||
rebulk.regex('True-?HD', value='Dolby TrueHD')
|
|
||||||
rebulk.string('Opus', value='Opus')
|
|
||||||
rebulk.string('Vorbis', value='Vorbis')
|
|
||||||
rebulk.string('PCM', value='PCM')
|
|
||||||
rebulk.string('LPCM', value='LPCM')
|
|
||||||
|
|
||||||
rebulk.defaults(clear=True,
|
|
||||||
name='audio_profile',
|
|
||||||
disabled=lambda context: is_disabled(context, 'audio_profile'))
|
|
||||||
rebulk.string('MA', value='Master Audio', tags=['audio_profile.rule', 'DTS-HD'])
|
|
||||||
rebulk.string('HR', 'HRA', value='High Resolution Audio', tags=['audio_profile.rule', 'DTS-HD'])
|
|
||||||
rebulk.string('ES', value='Extended Surround', tags=['audio_profile.rule', 'DTS'])
|
|
||||||
rebulk.string('HE', value='High Efficiency', tags=['audio_profile.rule', 'AAC'])
|
|
||||||
rebulk.string('LC', value='Low Complexity', tags=['audio_profile.rule', 'AAC'])
|
|
||||||
rebulk.string('HQ', value='High Quality', tags=['audio_profile.rule', 'Dolby Digital'])
|
|
||||||
rebulk.string('EX', value='EX', tags=['audio_profile.rule', 'Dolby Digital'])
|
|
||||||
|
|
||||||
rebulk.defaults(clear=True,
|
|
||||||
name="audio_channels",
|
|
||||||
disabled=lambda context: is_disabled(context, 'audio_channels'))
|
|
||||||
rebulk.regex('7[01]', value='7.1', validator=seps_after, tags='weak-audio_channels')
|
|
||||||
rebulk.regex('5[01]', value='5.1', validator=seps_after, tags='weak-audio_channels')
|
|
||||||
rebulk.string('20', value='2.0', validator=seps_after, tags='weak-audio_channels')
|
|
||||||
|
|
||||||
for value, items in config.get('audio_channels').items():
|
|
||||||
for item in items:
|
|
||||||
if item.startswith('re:'):
|
|
||||||
rebulk.regex(item[3:], value=value, children=True)
|
|
||||||
else:
|
|
||||||
rebulk.string(item, value=value)
|
|
||||||
|
|
||||||
rebulk.rules(DtsHDRule, DtsRule, AacRule, DolbyDigitalRule, AudioValidatorRule, HqConflictRule,
|
|
||||||
AudioChannelsValidatorRule)
|
|
||||||
|
|
||||||
return rebulk
|
|
||||||
|
|
||||||
|
|
||||||
class AudioValidatorRule(Rule):
|
|
||||||
"""
|
|
||||||
Remove audio properties if not surrounded by separators and not next each others
|
|
||||||
"""
|
|
||||||
priority = 64
|
|
||||||
consequence = RemoveMatch
|
|
||||||
|
|
||||||
def when(self, matches, context):
|
|
||||||
ret = []
|
|
||||||
|
|
||||||
audio_list = matches.range(predicate=lambda match: match.name in audio_properties)
|
|
||||||
for audio in audio_list:
|
|
||||||
if not seps_before(audio):
|
|
||||||
valid_before = matches.range(audio.start - 1, audio.start,
|
|
||||||
lambda match: match.name in audio_properties)
|
|
||||||
if not valid_before:
|
|
||||||
ret.append(audio)
|
|
||||||
continue
|
|
||||||
if not seps_after(audio):
|
|
||||||
valid_after = matches.range(audio.end, audio.end + 1,
|
|
||||||
lambda match: match.name in audio_properties)
|
|
||||||
if not valid_after:
|
|
||||||
ret.append(audio)
|
|
||||||
continue
|
|
||||||
|
|
||||||
return ret
|
|
||||||
|
|
||||||
|
|
||||||
class AudioProfileRule(Rule):
|
|
||||||
"""
|
|
||||||
Abstract rule to validate audio profiles
|
|
||||||
"""
|
|
||||||
priority = 64
|
|
||||||
dependency = AudioValidatorRule
|
|
||||||
consequence = RemoveMatch
|
|
||||||
|
|
||||||
def __init__(self, codec):
|
|
||||||
super(AudioProfileRule, self).__init__()
|
|
||||||
self.codec = codec
|
|
||||||
|
|
||||||
def enabled(self, context):
|
|
||||||
return not is_disabled(context, 'audio_profile')
|
|
||||||
|
|
||||||
def when(self, matches, context):
|
|
||||||
profile_list = matches.named('audio_profile',
|
|
||||||
lambda match: 'audio_profile.rule' in match.tags and
|
|
||||||
self.codec in match.tags)
|
|
||||||
ret = []
|
|
||||||
for profile in profile_list:
|
|
||||||
codec = matches.at_span(profile.span,
|
|
||||||
lambda match: match.name == 'audio_codec' and
|
|
||||||
match.value == self.codec, 0)
|
|
||||||
if not codec:
|
|
||||||
codec = matches.previous(profile,
|
|
||||||
lambda match: match.name == 'audio_codec' and
|
|
||||||
match.value == self.codec)
|
|
||||||
if not codec:
|
|
||||||
codec = matches.next(profile,
|
|
||||||
lambda match: match.name == 'audio_codec' and
|
|
||||||
match.value == self.codec)
|
|
||||||
if not codec:
|
|
||||||
ret.append(profile)
|
|
||||||
if codec:
|
|
||||||
ret.extend(matches.conflicting(profile))
|
|
||||||
return ret
|
|
||||||
|
|
||||||
|
|
||||||
class DtsHDRule(AudioProfileRule):
|
|
||||||
"""
|
|
||||||
Rule to validate DTS-HD profile
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self):
|
|
||||||
super(DtsHDRule, self).__init__('DTS-HD')
|
|
||||||
|
|
||||||
|
|
||||||
class DtsRule(AudioProfileRule):
|
|
||||||
"""
|
|
||||||
Rule to validate DTS profile
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self):
|
|
||||||
super(DtsRule, self).__init__('DTS')
|
|
||||||
|
|
||||||
|
|
||||||
class AacRule(AudioProfileRule):
|
|
||||||
"""
|
|
||||||
Rule to validate AAC profile
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self):
|
|
||||||
super(AacRule, self).__init__('AAC')
|
|
||||||
|
|
||||||
|
|
||||||
class DolbyDigitalRule(AudioProfileRule):
|
|
||||||
"""
|
|
||||||
Rule to validate Dolby Digital profile
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self):
|
|
||||||
super(DolbyDigitalRule, self).__init__('Dolby Digital')
|
|
||||||
|
|
||||||
|
|
||||||
class HqConflictRule(Rule):
|
|
||||||
"""
|
|
||||||
Solve conflict between HQ from other property and from audio_profile.
|
|
||||||
"""
|
|
||||||
|
|
||||||
dependency = [DtsHDRule, DtsRule, AacRule, DolbyDigitalRule]
|
|
||||||
consequence = RemoveMatch
|
|
||||||
|
|
||||||
def enabled(self, context):
|
|
||||||
return not is_disabled(context, 'audio_profile')
|
|
||||||
|
|
||||||
def when(self, matches, context):
|
|
||||||
hq_audio = matches.named('audio_profile', lambda m: m.value == 'High Quality')
|
|
||||||
hq_audio_spans = [match.span for match in hq_audio]
|
|
||||||
return matches.named('other', lambda m: m.span in hq_audio_spans)
|
|
||||||
|
|
||||||
|
|
||||||
class AudioChannelsValidatorRule(Rule):
|
|
||||||
"""
|
|
||||||
Remove audio_channel if no audio codec as previous match.
|
|
||||||
"""
|
|
||||||
priority = 128
|
|
||||||
consequence = RemoveMatch
|
|
||||||
|
|
||||||
def enabled(self, context):
|
|
||||||
return not is_disabled(context, 'audio_channels')
|
|
||||||
|
|
||||||
def when(self, matches, context):
|
|
||||||
ret = []
|
|
||||||
|
|
||||||
for audio_channel in matches.tagged('weak-audio_channels'):
|
|
||||||
valid_before = matches.range(audio_channel.start - 1, audio_channel.start,
|
|
||||||
lambda match: match.name == 'audio_codec')
|
|
||||||
if not valid_before:
|
|
||||||
ret.append(audio_channel)
|
|
||||||
|
|
||||||
return ret
|
|
||||||
@@ -1,74 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
"""
|
|
||||||
video_bit_rate and audio_bit_rate properties
|
|
||||||
"""
|
|
||||||
import re
|
|
||||||
|
|
||||||
from rebulk import Rebulk
|
|
||||||
from rebulk.rules import Rule, RemoveMatch, RenameMatch
|
|
||||||
|
|
||||||
from ..common import dash, seps
|
|
||||||
from ..common.pattern import is_disabled
|
|
||||||
from ..common.quantity import BitRate
|
|
||||||
from ..common.validators import seps_surround
|
|
||||||
|
|
||||||
|
|
||||||
def bit_rate(config): # pylint:disable=unused-argument
|
|
||||||
"""
|
|
||||||
Builder for rebulk object.
|
|
||||||
|
|
||||||
:param config: rule configuration
|
|
||||||
:type config: dict
|
|
||||||
:return: Created Rebulk object
|
|
||||||
:rtype: Rebulk
|
|
||||||
"""
|
|
||||||
rebulk = Rebulk(disabled=lambda context: (is_disabled(context, 'audio_bit_rate')
|
|
||||||
and is_disabled(context, 'video_bit_rate')))
|
|
||||||
rebulk = rebulk.regex_defaults(flags=re.IGNORECASE, abbreviations=[dash])
|
|
||||||
rebulk.defaults(name='audio_bit_rate', validator=seps_surround)
|
|
||||||
rebulk.regex(r'\d+-?[kmg]b(ps|its?)', r'\d+\.\d+-?[kmg]b(ps|its?)',
|
|
||||||
conflict_solver=(
|
|
||||||
lambda match, other: match
|
|
||||||
if other.name == 'audio_channels' and 'weak-audio_channels' not in other.tags
|
|
||||||
else other
|
|
||||||
),
|
|
||||||
formatter=BitRate.fromstring, tags=['release-group-prefix'])
|
|
||||||
|
|
||||||
rebulk.rules(BitRateTypeRule)
|
|
||||||
|
|
||||||
return rebulk
|
|
||||||
|
|
||||||
|
|
||||||
class BitRateTypeRule(Rule):
|
|
||||||
"""
|
|
||||||
Convert audio bit rate guess into video bit rate.
|
|
||||||
"""
|
|
||||||
consequence = [RenameMatch('video_bit_rate'), RemoveMatch]
|
|
||||||
|
|
||||||
def when(self, matches, context):
|
|
||||||
to_rename = []
|
|
||||||
to_remove = []
|
|
||||||
|
|
||||||
if is_disabled(context, 'audio_bit_rate'):
|
|
||||||
to_remove.extend(matches.named('audio_bit_rate'))
|
|
||||||
else:
|
|
||||||
video_bit_rate_disabled = is_disabled(context, 'video_bit_rate')
|
|
||||||
for match in matches.named('audio_bit_rate'):
|
|
||||||
previous = matches.previous(match, index=0,
|
|
||||||
predicate=lambda m: m.name in ('source', 'screen_size', 'video_codec'))
|
|
||||||
if previous and not matches.holes(previous.end, match.start, predicate=lambda m: m.value.strip(seps)):
|
|
||||||
after = matches.next(match, index=0, predicate=lambda m: m.name == 'audio_codec')
|
|
||||||
if after and not matches.holes(match.end, after.start, predicate=lambda m: m.value.strip(seps)):
|
|
||||||
bitrate = match.value
|
|
||||||
if bitrate.units == 'Kbps' or (bitrate.units == 'Mbps' and bitrate.magnitude < 10):
|
|
||||||
continue
|
|
||||||
|
|
||||||
if video_bit_rate_disabled:
|
|
||||||
to_remove.append(match)
|
|
||||||
else:
|
|
||||||
to_rename.append(match)
|
|
||||||
|
|
||||||
if to_rename or to_remove:
|
|
||||||
return to_rename, to_remove
|
|
||||||
return False
|
|
||||||
@@ -1,56 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
"""
|
|
||||||
bonus property
|
|
||||||
"""
|
|
||||||
from rebulk.remodule import re
|
|
||||||
|
|
||||||
from rebulk import Rebulk, AppendMatch, Rule
|
|
||||||
|
|
||||||
from .title import TitleFromPosition
|
|
||||||
from ..common.formatters import cleanup
|
|
||||||
from ..common.pattern import is_disabled
|
|
||||||
from ..common.validators import seps_surround
|
|
||||||
|
|
||||||
|
|
||||||
def bonus(config): # pylint:disable=unused-argument
|
|
||||||
"""
|
|
||||||
Builder for rebulk object.
|
|
||||||
|
|
||||||
:param config: rule configuration
|
|
||||||
:type config: dict
|
|
||||||
:return: Created Rebulk object
|
|
||||||
:rtype: Rebulk
|
|
||||||
"""
|
|
||||||
rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'bonus'))
|
|
||||||
rebulk = rebulk.regex_defaults(flags=re.IGNORECASE)
|
|
||||||
|
|
||||||
rebulk.regex(r'x(\d+)', name='bonus', private_parent=True, children=True, formatter=int,
|
|
||||||
validator={'__parent__': seps_surround},
|
|
||||||
validate_all=True,
|
|
||||||
conflict_solver=lambda match, conflicting: match
|
|
||||||
if conflicting.name in ('video_codec', 'episode') and 'weak-episode' not in conflicting.tags
|
|
||||||
else '__default__')
|
|
||||||
|
|
||||||
rebulk.rules(BonusTitleRule)
|
|
||||||
|
|
||||||
return rebulk
|
|
||||||
|
|
||||||
|
|
||||||
class BonusTitleRule(Rule):
|
|
||||||
"""
|
|
||||||
Find bonus title after bonus.
|
|
||||||
"""
|
|
||||||
dependency = TitleFromPosition
|
|
||||||
consequence = AppendMatch
|
|
||||||
|
|
||||||
properties = {'bonus_title': [None]}
|
|
||||||
|
|
||||||
def when(self, matches, context): # pylint:disable=inconsistent-return-statements
|
|
||||||
bonus_number = matches.named('bonus', lambda match: not match.private, index=0)
|
|
||||||
if bonus_number:
|
|
||||||
filepath = matches.markers.at_match(bonus_number, lambda marker: marker.name == 'path', 0)
|
|
||||||
hole = matches.holes(bonus_number.end, filepath.end + 1, formatter=cleanup, index=0)
|
|
||||||
if hole and hole.value:
|
|
||||||
hole.name = 'bonus_title'
|
|
||||||
return hole
|
|
||||||
@@ -1,41 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
"""
|
|
||||||
cd and cd_count properties
|
|
||||||
"""
|
|
||||||
from rebulk.remodule import re
|
|
||||||
|
|
||||||
from rebulk import Rebulk
|
|
||||||
|
|
||||||
from ..common import dash
|
|
||||||
from ..common.pattern import is_disabled
|
|
||||||
|
|
||||||
|
|
||||||
def cds(config): # pylint:disable=unused-argument
|
|
||||||
"""
|
|
||||||
Builder for rebulk object.
|
|
||||||
|
|
||||||
:param config: rule configuration
|
|
||||||
:type config: dict
|
|
||||||
:return: Created Rebulk object
|
|
||||||
:rtype: Rebulk
|
|
||||||
"""
|
|
||||||
rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'cd'))
|
|
||||||
rebulk = rebulk.regex_defaults(flags=re.IGNORECASE, abbreviations=[dash])
|
|
||||||
|
|
||||||
rebulk.regex(r'cd-?(?P<cd>\d+)(?:-?of-?(?P<cd_count>\d+))?',
|
|
||||||
validator={'cd': lambda match: 0 < match.value < 100,
|
|
||||||
'cd_count': lambda match: 0 < match.value < 100},
|
|
||||||
formatter={'cd': int, 'cd_count': int},
|
|
||||||
children=True,
|
|
||||||
private_parent=True,
|
|
||||||
properties={'cd': [None], 'cd_count': [None]})
|
|
||||||
rebulk.regex(r'(?P<cd_count>\d+)-?cds?',
|
|
||||||
validator={'cd': lambda match: 0 < match.value < 100,
|
|
||||||
'cd_count': lambda match: 0 < match.value < 100},
|
|
||||||
formatter={'cd_count': int},
|
|
||||||
children=True,
|
|
||||||
private_parent=True,
|
|
||||||
properties={'cd': [None], 'cd_count': [None]})
|
|
||||||
|
|
||||||
return rebulk
|
|
||||||
@@ -1,61 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
"""
|
|
||||||
container property
|
|
||||||
"""
|
|
||||||
from rebulk.remodule import re
|
|
||||||
|
|
||||||
from rebulk import Rebulk
|
|
||||||
|
|
||||||
from ..common import seps
|
|
||||||
from ..common.pattern import is_disabled
|
|
||||||
from ..common.validators import seps_surround
|
|
||||||
from ...reutils import build_or_pattern
|
|
||||||
|
|
||||||
|
|
||||||
def container(config):
|
|
||||||
"""
|
|
||||||
Builder for rebulk object.
|
|
||||||
|
|
||||||
:param config: rule configuration
|
|
||||||
:type config: dict
|
|
||||||
:return: Created Rebulk object
|
|
||||||
:rtype: Rebulk
|
|
||||||
"""
|
|
||||||
rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'container'))
|
|
||||||
rebulk = rebulk.regex_defaults(flags=re.IGNORECASE).string_defaults(ignore_case=True)
|
|
||||||
rebulk.defaults(name='container',
|
|
||||||
formatter=lambda value: value.strip(seps),
|
|
||||||
tags=['extension'],
|
|
||||||
conflict_solver=lambda match, other: other
|
|
||||||
if other.name in ('source', 'video_codec') or
|
|
||||||
other.name == 'container' and 'extension' not in other.tags
|
|
||||||
else '__default__')
|
|
||||||
|
|
||||||
subtitles = config['subtitles']
|
|
||||||
info = config['info']
|
|
||||||
videos = config['videos']
|
|
||||||
torrent = config['torrent']
|
|
||||||
nzb = config['nzb']
|
|
||||||
|
|
||||||
rebulk.regex(r'\.'+build_or_pattern(subtitles)+'$', exts=subtitles, tags=['extension', 'subtitle'])
|
|
||||||
rebulk.regex(r'\.'+build_or_pattern(info)+'$', exts=info, tags=['extension', 'info'])
|
|
||||||
rebulk.regex(r'\.'+build_or_pattern(videos)+'$', exts=videos, tags=['extension', 'video'])
|
|
||||||
rebulk.regex(r'\.'+build_or_pattern(torrent)+'$', exts=torrent, tags=['extension', 'torrent'])
|
|
||||||
rebulk.regex(r'\.'+build_or_pattern(nzb)+'$', exts=nzb, tags=['extension', 'nzb'])
|
|
||||||
|
|
||||||
rebulk.defaults(clear=True,
|
|
||||||
name='container',
|
|
||||||
validator=seps_surround,
|
|
||||||
formatter=lambda s: s.lower(),
|
|
||||||
conflict_solver=lambda match, other: match
|
|
||||||
if other.name in ('source',
|
|
||||||
'video_codec') or other.name == 'container' and 'extension' in other.tags
|
|
||||||
else '__default__')
|
|
||||||
|
|
||||||
rebulk.string(*[sub for sub in subtitles if sub not in ('sub', 'ass')], tags=['subtitle'])
|
|
||||||
rebulk.string(*videos, tags=['video'])
|
|
||||||
rebulk.string(*torrent, tags=['torrent'])
|
|
||||||
rebulk.string(*nzb, tags=['nzb'])
|
|
||||||
|
|
||||||
return rebulk
|
|
||||||
@@ -1,114 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
"""
|
|
||||||
country property
|
|
||||||
"""
|
|
||||||
# pylint: disable=no-member
|
|
||||||
import babelfish
|
|
||||||
|
|
||||||
from rebulk import Rebulk
|
|
||||||
from ..common.pattern import is_disabled
|
|
||||||
from ..common.words import iter_words
|
|
||||||
|
|
||||||
|
|
||||||
def country(config, common_words):
|
|
||||||
"""
|
|
||||||
Builder for rebulk object.
|
|
||||||
|
|
||||||
:param config: rule configuration
|
|
||||||
:type config: dict
|
|
||||||
:param common_words: common words
|
|
||||||
:type common_words: set
|
|
||||||
:return: Created Rebulk object
|
|
||||||
:rtype: Rebulk
|
|
||||||
"""
|
|
||||||
rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'country'))
|
|
||||||
rebulk = rebulk.defaults(name='country')
|
|
||||||
|
|
||||||
def find_countries(string, context=None):
|
|
||||||
"""
|
|
||||||
Find countries in given string.
|
|
||||||
"""
|
|
||||||
allowed_countries = context.get('allowed_countries') if context else None
|
|
||||||
return CountryFinder(allowed_countries, common_words).find(string)
|
|
||||||
|
|
||||||
rebulk.functional(find_countries,
|
|
||||||
# Prefer language and any other property over country if not US or GB.
|
|
||||||
conflict_solver=lambda match, other: match
|
|
||||||
if other.name != 'language' or match.value not in (babelfish.Country('US'),
|
|
||||||
babelfish.Country('GB'))
|
|
||||||
else other,
|
|
||||||
properties={'country': [None]},
|
|
||||||
disabled=lambda context: not context.get('allowed_countries'))
|
|
||||||
|
|
||||||
babelfish.country_converters['guessit'] = GuessitCountryConverter(config['synonyms'])
|
|
||||||
|
|
||||||
return rebulk
|
|
||||||
|
|
||||||
|
|
||||||
class GuessitCountryConverter(babelfish.CountryReverseConverter): # pylint: disable=missing-docstring
|
|
||||||
def __init__(self, synonyms):
|
|
||||||
self.guessit_exceptions = {}
|
|
||||||
|
|
||||||
for alpha2, synlist in synonyms.items():
|
|
||||||
for syn in synlist:
|
|
||||||
self.guessit_exceptions[syn.lower()] = alpha2
|
|
||||||
|
|
||||||
@property
|
|
||||||
def codes(self): # pylint: disable=missing-docstring
|
|
||||||
return (babelfish.country_converters['name'].codes |
|
|
||||||
frozenset(babelfish.COUNTRIES.values()) |
|
|
||||||
frozenset(self.guessit_exceptions.keys()))
|
|
||||||
|
|
||||||
def convert(self, alpha2):
|
|
||||||
if alpha2 == 'GB':
|
|
||||||
return 'UK'
|
|
||||||
return str(babelfish.Country(alpha2))
|
|
||||||
|
|
||||||
def reverse(self, name): # pylint:disable=arguments-differ
|
|
||||||
# exceptions come first, as they need to override a potential match
|
|
||||||
# with any of the other guessers
|
|
||||||
try:
|
|
||||||
return self.guessit_exceptions[name.lower()]
|
|
||||||
except KeyError:
|
|
||||||
pass
|
|
||||||
|
|
||||||
try:
|
|
||||||
return babelfish.Country(name.upper()).alpha2
|
|
||||||
except ValueError:
|
|
||||||
pass
|
|
||||||
|
|
||||||
for conv in [babelfish.Country.fromname]:
|
|
||||||
try:
|
|
||||||
return conv(name).alpha2
|
|
||||||
except babelfish.CountryReverseError:
|
|
||||||
pass
|
|
||||||
|
|
||||||
raise babelfish.CountryReverseError(name)
|
|
||||||
|
|
||||||
|
|
||||||
class CountryFinder(object):
|
|
||||||
"""Helper class to search and return country matches."""
|
|
||||||
|
|
||||||
def __init__(self, allowed_countries, common_words):
|
|
||||||
self.allowed_countries = {l.lower() for l in allowed_countries or []}
|
|
||||||
self.common_words = common_words
|
|
||||||
|
|
||||||
def find(self, string):
|
|
||||||
"""Return all matches for country."""
|
|
||||||
for word_match in iter_words(string.strip().lower()):
|
|
||||||
word = word_match.value
|
|
||||||
if word.lower() in self.common_words:
|
|
||||||
continue
|
|
||||||
|
|
||||||
try:
|
|
||||||
country_object = babelfish.Country.fromguessit(word)
|
|
||||||
if (country_object.name.lower() in self.allowed_countries or
|
|
||||||
country_object.alpha2.lower() in self.allowed_countries):
|
|
||||||
yield self._to_rebulk_match(word_match, country_object)
|
|
||||||
except babelfish.Error:
|
|
||||||
continue
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def _to_rebulk_match(cls, word, value):
|
|
||||||
return word.span[0], word.span[1], {'value': value}
|
|
||||||
@@ -1,90 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
"""
|
|
||||||
crc and uuid properties
|
|
||||||
"""
|
|
||||||
from rebulk.remodule import re
|
|
||||||
|
|
||||||
from rebulk import Rebulk
|
|
||||||
from ..common.pattern import is_disabled
|
|
||||||
from ..common.validators import seps_surround
|
|
||||||
|
|
||||||
|
|
||||||
def crc(config): # pylint:disable=unused-argument
|
|
||||||
"""
|
|
||||||
Builder for rebulk object.
|
|
||||||
|
|
||||||
:param config: rule configuration
|
|
||||||
:type config: dict
|
|
||||||
:return: Created Rebulk object
|
|
||||||
:rtype: Rebulk
|
|
||||||
"""
|
|
||||||
rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'crc32'))
|
|
||||||
rebulk = rebulk.regex_defaults(flags=re.IGNORECASE)
|
|
||||||
rebulk.defaults(validator=seps_surround)
|
|
||||||
|
|
||||||
rebulk.regex('(?:[a-fA-F]|[0-9]){8}', name='crc32',
|
|
||||||
conflict_solver=lambda match, other: other
|
|
||||||
if other.name in ['episode', 'season']
|
|
||||||
else '__default__')
|
|
||||||
|
|
||||||
rebulk.functional(guess_idnumber, name='uuid',
|
|
||||||
conflict_solver=lambda match, other: match
|
|
||||||
if other.name in ['episode', 'season']
|
|
||||||
else '__default__')
|
|
||||||
return rebulk
|
|
||||||
|
|
||||||
|
|
||||||
_DIGIT = 0
|
|
||||||
_LETTER = 1
|
|
||||||
_OTHER = 2
|
|
||||||
|
|
||||||
_idnum = re.compile(r'(?P<uuid>[a-zA-Z0-9-]{20,})') # 1.0, (0, 0))
|
|
||||||
|
|
||||||
|
|
||||||
def guess_idnumber(string):
|
|
||||||
"""
|
|
||||||
Guess id number function
|
|
||||||
:param string:
|
|
||||||
:type string:
|
|
||||||
:return:
|
|
||||||
:rtype:
|
|
||||||
"""
|
|
||||||
# pylint:disable=invalid-name
|
|
||||||
ret = []
|
|
||||||
|
|
||||||
matches = list(_idnum.finditer(string))
|
|
||||||
for match in matches:
|
|
||||||
result = match.groupdict()
|
|
||||||
switch_count = 0
|
|
||||||
switch_letter_count = 0
|
|
||||||
letter_count = 0
|
|
||||||
last_letter = None
|
|
||||||
|
|
||||||
last = _LETTER
|
|
||||||
for c in result['uuid']:
|
|
||||||
if c in '0123456789':
|
|
||||||
ci = _DIGIT
|
|
||||||
elif c in 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ':
|
|
||||||
ci = _LETTER
|
|
||||||
if c != last_letter:
|
|
||||||
switch_letter_count += 1
|
|
||||||
last_letter = c
|
|
||||||
letter_count += 1
|
|
||||||
else:
|
|
||||||
ci = _OTHER
|
|
||||||
|
|
||||||
if ci != last:
|
|
||||||
switch_count += 1
|
|
||||||
|
|
||||||
last = ci
|
|
||||||
|
|
||||||
# only return the result as probable if we alternate often between
|
|
||||||
# char type (more likely for hash values than for common words)
|
|
||||||
switch_ratio = float(switch_count) / len(result['uuid'])
|
|
||||||
letters_ratio = (float(switch_letter_count) / letter_count) if letter_count > 0 else 1
|
|
||||||
|
|
||||||
if switch_ratio > 0.4 and letters_ratio > 0.4:
|
|
||||||
ret.append(match.span())
|
|
||||||
|
|
||||||
return ret
|
|
||||||
@@ -1,84 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
"""
|
|
||||||
date and year properties
|
|
||||||
"""
|
|
||||||
from rebulk import Rebulk, RemoveMatch, Rule
|
|
||||||
|
|
||||||
from ..common.date import search_date, valid_year
|
|
||||||
from ..common.pattern import is_disabled
|
|
||||||
from ..common.validators import seps_surround
|
|
||||||
|
|
||||||
|
|
||||||
def date(config): # pylint:disable=unused-argument
|
|
||||||
"""
|
|
||||||
Builder for rebulk object.
|
|
||||||
|
|
||||||
:param config: rule configuration
|
|
||||||
:type config: dict
|
|
||||||
:return: Created Rebulk object
|
|
||||||
:rtype: Rebulk
|
|
||||||
"""
|
|
||||||
rebulk = Rebulk().defaults(validator=seps_surround)
|
|
||||||
|
|
||||||
rebulk.regex(r"\d{4}", name="year", formatter=int,
|
|
||||||
disabled=lambda context: is_disabled(context, 'year'),
|
|
||||||
conflict_solver=lambda match, other: other
|
|
||||||
if other.name in ('episode', 'season') and len(other.raw) < len(match.raw)
|
|
||||||
else '__default__',
|
|
||||||
validator=lambda match: seps_surround(match) and valid_year(match.value))
|
|
||||||
|
|
||||||
def date_functional(string, context): # pylint:disable=inconsistent-return-statements
|
|
||||||
"""
|
|
||||||
Search for date in the string and retrieves match
|
|
||||||
|
|
||||||
:param string:
|
|
||||||
:return:
|
|
||||||
"""
|
|
||||||
|
|
||||||
ret = search_date(string, context.get('date_year_first'), context.get('date_day_first'))
|
|
||||||
if ret:
|
|
||||||
return ret[0], ret[1], {'value': ret[2]}
|
|
||||||
|
|
||||||
rebulk.functional(date_functional, name="date", properties={'date': [None]},
|
|
||||||
disabled=lambda context: is_disabled(context, 'date'),
|
|
||||||
conflict_solver=lambda match, other: other
|
|
||||||
if other.name in ('episode', 'season', 'crc32')
|
|
||||||
else '__default__')
|
|
||||||
|
|
||||||
rebulk.rules(KeepMarkedYearInFilepart)
|
|
||||||
|
|
||||||
return rebulk
|
|
||||||
|
|
||||||
|
|
||||||
class KeepMarkedYearInFilepart(Rule):
|
|
||||||
"""
|
|
||||||
Keep first years marked with [](){} in filepart, or if no year is marked, ensure it won't override titles.
|
|
||||||
"""
|
|
||||||
priority = 64
|
|
||||||
consequence = RemoveMatch
|
|
||||||
|
|
||||||
def enabled(self, context):
|
|
||||||
return not is_disabled(context, 'year')
|
|
||||||
|
|
||||||
def when(self, matches, context):
|
|
||||||
ret = []
|
|
||||||
if len(matches.named('year')) > 1:
|
|
||||||
for filepart in matches.markers.named('path'):
|
|
||||||
years = matches.range(filepart.start, filepart.end, lambda match: match.name == 'year')
|
|
||||||
if len(years) > 1:
|
|
||||||
group_years = []
|
|
||||||
ungroup_years = []
|
|
||||||
for year in years:
|
|
||||||
if matches.markers.at_match(year, lambda marker: marker.name == 'group'):
|
|
||||||
group_years.append(year)
|
|
||||||
else:
|
|
||||||
ungroup_years.append(year)
|
|
||||||
if group_years and ungroup_years:
|
|
||||||
ret.extend(ungroup_years)
|
|
||||||
ret.extend(group_years[1:]) # Keep the first year in marker.
|
|
||||||
elif not group_years:
|
|
||||||
ret.append(ungroup_years[0]) # Keep first year for title.
|
|
||||||
if len(ungroup_years) > 2:
|
|
||||||
ret.extend(ungroup_years[2:])
|
|
||||||
return ret
|
|
||||||
@@ -1,52 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
"""
|
|
||||||
edition property
|
|
||||||
"""
|
|
||||||
from rebulk.remodule import re
|
|
||||||
|
|
||||||
from rebulk import Rebulk
|
|
||||||
from ..common import dash
|
|
||||||
from ..common.pattern import is_disabled
|
|
||||||
from ..common.validators import seps_surround
|
|
||||||
|
|
||||||
|
|
||||||
def edition(config): # pylint:disable=unused-argument
|
|
||||||
"""
|
|
||||||
Builder for rebulk object.
|
|
||||||
|
|
||||||
:param config: rule configuration
|
|
||||||
:type config: dict
|
|
||||||
:return: Created Rebulk object
|
|
||||||
:rtype: Rebulk
|
|
||||||
"""
|
|
||||||
rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'edition'))
|
|
||||||
rebulk = rebulk.regex_defaults(flags=re.IGNORECASE, abbreviations=[dash]).string_defaults(ignore_case=True)
|
|
||||||
rebulk.defaults(name='edition', validator=seps_surround)
|
|
||||||
|
|
||||||
rebulk.regex('collector', "collector'?s?-edition", 'edition-collector', value='Collector')
|
|
||||||
rebulk.regex('special-edition', 'edition-special', value='Special',
|
|
||||||
conflict_solver=lambda match, other: other
|
|
||||||
if other.name == 'episode_details' and other.value == 'Special'
|
|
||||||
else '__default__')
|
|
||||||
rebulk.string('se', value='Special', tags='has-neighbor')
|
|
||||||
rebulk.string('ddc', value="Director's Definitive Cut")
|
|
||||||
rebulk.regex('criterion-edition', 'edition-criterion', 'CC', value='Criterion')
|
|
||||||
rebulk.regex('deluxe', 'deluxe-edition', 'edition-deluxe', value='Deluxe')
|
|
||||||
rebulk.regex('limited', 'limited-edition', value='Limited', tags=['has-neighbor', 'release-group-prefix'])
|
|
||||||
rebulk.regex(r'theatrical-cut', r'theatrical-edition', r'theatrical', value='Theatrical')
|
|
||||||
rebulk.regex(r"director'?s?-cut", r"director'?s?-cut-edition", r"edition-director'?s?-cut", 'DC',
|
|
||||||
value="Director's Cut")
|
|
||||||
rebulk.regex('extended', 'extended-?cut', 'extended-?version',
|
|
||||||
value='Extended', tags=['has-neighbor', 'release-group-prefix'])
|
|
||||||
rebulk.regex('alternat(e|ive)(?:-?Cut)?', value='Alternative Cut', tags=['has-neighbor', 'release-group-prefix'])
|
|
||||||
for value in ('Remastered', 'Uncensored', 'Uncut', 'Unrated'):
|
|
||||||
rebulk.string(value, value=value, tags=['has-neighbor', 'release-group-prefix'])
|
|
||||||
rebulk.string('Festival', value='Festival', tags=['has-neighbor-before', 'has-neighbor-after'])
|
|
||||||
rebulk.regex('imax', 'imax-edition', value='IMAX')
|
|
||||||
rebulk.regex('fan-edit(?:ion)?', 'fan-collection', value='Fan')
|
|
||||||
rebulk.regex('ultimate-edition', value='Ultimate')
|
|
||||||
rebulk.regex("ultimate-collector'?s?-edition", value=['Ultimate', 'Collector'])
|
|
||||||
rebulk.regex('ultimate-fan-edit(?:ion)?', 'ultimate-fan-collection', value=['Ultimate', 'Fan'])
|
|
||||||
|
|
||||||
return rebulk
|
|
||||||
@@ -1,300 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
"""
|
|
||||||
Episode title
|
|
||||||
"""
|
|
||||||
from collections import defaultdict
|
|
||||||
|
|
||||||
from rebulk import Rebulk, Rule, AppendMatch, RemoveMatch, RenameMatch, POST_PROCESS
|
|
||||||
|
|
||||||
from ..common import seps, title_seps
|
|
||||||
from ..common.formatters import cleanup
|
|
||||||
from ..common.pattern import is_disabled
|
|
||||||
from ..common.validators import or_
|
|
||||||
from ..properties.title import TitleFromPosition, TitleBaseRule
|
|
||||||
from ..properties.type import TypeProcessor
|
|
||||||
|
|
||||||
|
|
||||||
def episode_title(config): # pylint:disable=unused-argument
|
|
||||||
"""
|
|
||||||
Builder for rebulk object.
|
|
||||||
|
|
||||||
:param config: rule configuration
|
|
||||||
:type config: dict
|
|
||||||
:return: Created Rebulk object
|
|
||||||
:rtype: Rebulk
|
|
||||||
"""
|
|
||||||
previous_names = ('episode', 'episode_count',
|
|
||||||
'season', 'season_count', 'date', 'title', 'year')
|
|
||||||
|
|
||||||
rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'episode_title'))
|
|
||||||
rebulk = rebulk.rules(RemoveConflictsWithEpisodeTitle(previous_names),
|
|
||||||
EpisodeTitleFromPosition(previous_names),
|
|
||||||
AlternativeTitleReplace(previous_names),
|
|
||||||
TitleToEpisodeTitle,
|
|
||||||
Filepart3EpisodeTitle,
|
|
||||||
Filepart2EpisodeTitle,
|
|
||||||
RenameEpisodeTitleWhenMovieType)
|
|
||||||
return rebulk
|
|
||||||
|
|
||||||
|
|
||||||
class RemoveConflictsWithEpisodeTitle(Rule):
|
|
||||||
"""
|
|
||||||
Remove conflicting matches that might lead to wrong episode_title parsing.
|
|
||||||
"""
|
|
||||||
|
|
||||||
priority = 64
|
|
||||||
consequence = RemoveMatch
|
|
||||||
|
|
||||||
def __init__(self, previous_names):
|
|
||||||
super(RemoveConflictsWithEpisodeTitle, self).__init__()
|
|
||||||
self.previous_names = previous_names
|
|
||||||
self.next_names = ('streaming_service', 'screen_size', 'source',
|
|
||||||
'video_codec', 'audio_codec', 'other', 'container')
|
|
||||||
self.affected_if_holes_after = ('part', )
|
|
||||||
self.affected_names = ('part', 'year')
|
|
||||||
|
|
||||||
def when(self, matches, context):
|
|
||||||
to_remove = []
|
|
||||||
for filepart in matches.markers.named('path'):
|
|
||||||
for match in matches.range(filepart.start, filepart.end,
|
|
||||||
predicate=lambda m: m.name in self.affected_names):
|
|
||||||
before = matches.range(filepart.start, match.start, predicate=lambda m: not m.private, index=-1)
|
|
||||||
if not before or before.name not in self.previous_names:
|
|
||||||
continue
|
|
||||||
|
|
||||||
after = matches.range(match.end, filepart.end, predicate=lambda m: not m.private, index=0)
|
|
||||||
if not after or after.name not in self.next_names:
|
|
||||||
continue
|
|
||||||
|
|
||||||
group = matches.markers.at_match(match, predicate=lambda m: m.name == 'group', index=0)
|
|
||||||
|
|
||||||
def has_value_in_same_group(current_match, current_group=group):
|
|
||||||
"""Return true if current match has value and belongs to the current group."""
|
|
||||||
return current_match.value.strip(seps) and (
|
|
||||||
current_group == matches.markers.at_match(current_match,
|
|
||||||
predicate=lambda mm: mm.name == 'group', index=0)
|
|
||||||
)
|
|
||||||
|
|
||||||
holes_before = matches.holes(before.end, match.start, predicate=has_value_in_same_group)
|
|
||||||
holes_after = matches.holes(match.end, after.start, predicate=has_value_in_same_group)
|
|
||||||
|
|
||||||
if not holes_before and not holes_after:
|
|
||||||
continue
|
|
||||||
|
|
||||||
if match.name in self.affected_if_holes_after and not holes_after:
|
|
||||||
continue
|
|
||||||
|
|
||||||
to_remove.append(match)
|
|
||||||
if match.parent:
|
|
||||||
to_remove.append(match.parent)
|
|
||||||
|
|
||||||
return to_remove
|
|
||||||
|
|
||||||
|
|
||||||
class TitleToEpisodeTitle(Rule):
|
|
||||||
"""
|
|
||||||
If multiple different title are found, convert the one following episode number to episode_title.
|
|
||||||
"""
|
|
||||||
dependency = TitleFromPosition
|
|
||||||
|
|
||||||
def when(self, matches, context):
|
|
||||||
titles = matches.named('title')
|
|
||||||
title_groups = defaultdict(list)
|
|
||||||
for title in titles:
|
|
||||||
title_groups[title.value].append(title)
|
|
||||||
|
|
||||||
episode_titles = []
|
|
||||||
if len(title_groups) < 2:
|
|
||||||
return episode_titles
|
|
||||||
|
|
||||||
for title in titles:
|
|
||||||
if matches.previous(title, lambda match: match.name == 'episode'):
|
|
||||||
episode_titles.append(title)
|
|
||||||
|
|
||||||
return episode_titles
|
|
||||||
|
|
||||||
def then(self, matches, when_response, context):
|
|
||||||
for title in when_response:
|
|
||||||
matches.remove(title)
|
|
||||||
title.name = 'episode_title'
|
|
||||||
matches.append(title)
|
|
||||||
|
|
||||||
|
|
||||||
class EpisodeTitleFromPosition(TitleBaseRule):
|
|
||||||
"""
|
|
||||||
Add episode title match in existing matches
|
|
||||||
Must run after TitleFromPosition rule.
|
|
||||||
"""
|
|
||||||
dependency = TitleToEpisodeTitle
|
|
||||||
|
|
||||||
def __init__(self, previous_names):
|
|
||||||
super(EpisodeTitleFromPosition, self).__init__('episode_title', ['title'])
|
|
||||||
self.previous_names = previous_names
|
|
||||||
|
|
||||||
def hole_filter(self, hole, matches):
|
|
||||||
episode = matches.previous(hole,
|
|
||||||
lambda previous: previous.named(*self.previous_names),
|
|
||||||
0)
|
|
||||||
|
|
||||||
crc32 = matches.named('crc32')
|
|
||||||
|
|
||||||
return episode or crc32
|
|
||||||
|
|
||||||
def filepart_filter(self, filepart, matches):
|
|
||||||
# Filepart where title was found.
|
|
||||||
if matches.range(filepart.start, filepart.end, lambda match: match.name == 'title'):
|
|
||||||
return True
|
|
||||||
return False
|
|
||||||
|
|
||||||
def should_remove(self, match, matches, filepart, hole, context):
|
|
||||||
if match.name == 'episode_details':
|
|
||||||
return False
|
|
||||||
return super(EpisodeTitleFromPosition, self).should_remove(match, matches, filepart, hole, context)
|
|
||||||
|
|
||||||
def when(self, matches, context): # pylint:disable=inconsistent-return-statements
|
|
||||||
if matches.named('episode_title'):
|
|
||||||
return
|
|
||||||
return super(EpisodeTitleFromPosition, self).when(matches, context)
|
|
||||||
|
|
||||||
|
|
||||||
class AlternativeTitleReplace(Rule):
|
|
||||||
"""
|
|
||||||
If alternateTitle was found and title is next to episode, season or date, replace it with episode_title.
|
|
||||||
"""
|
|
||||||
dependency = EpisodeTitleFromPosition
|
|
||||||
consequence = RenameMatch
|
|
||||||
|
|
||||||
def __init__(self, previous_names):
|
|
||||||
super(AlternativeTitleReplace, self).__init__()
|
|
||||||
self.previous_names = previous_names
|
|
||||||
|
|
||||||
def when(self, matches, context): # pylint:disable=inconsistent-return-statements
|
|
||||||
if matches.named('episode_title'):
|
|
||||||
return
|
|
||||||
|
|
||||||
alternative_title = matches.range(predicate=lambda match: match.name == 'alternative_title', index=0)
|
|
||||||
if alternative_title:
|
|
||||||
main_title = matches.chain_before(alternative_title.start, seps=seps,
|
|
||||||
predicate=lambda match: 'title' in match.tags, index=0)
|
|
||||||
if main_title:
|
|
||||||
episode = matches.previous(main_title,
|
|
||||||
lambda previous: previous.named(*self.previous_names),
|
|
||||||
0)
|
|
||||||
|
|
||||||
crc32 = matches.named('crc32')
|
|
||||||
|
|
||||||
if episode or crc32:
|
|
||||||
return alternative_title
|
|
||||||
|
|
||||||
def then(self, matches, when_response, context):
|
|
||||||
matches.remove(when_response)
|
|
||||||
when_response.name = 'episode_title'
|
|
||||||
when_response.tags.append('alternative-replaced')
|
|
||||||
matches.append(when_response)
|
|
||||||
|
|
||||||
|
|
||||||
class RenameEpisodeTitleWhenMovieType(Rule):
|
|
||||||
"""
|
|
||||||
Rename episode_title by alternative_title when type is movie.
|
|
||||||
"""
|
|
||||||
priority = POST_PROCESS
|
|
||||||
|
|
||||||
dependency = TypeProcessor
|
|
||||||
consequence = RenameMatch
|
|
||||||
|
|
||||||
def when(self, matches, context): # pylint:disable=inconsistent-return-statements
|
|
||||||
if matches.named('episode_title', lambda m: 'alternative-replaced' not in m.tags) \
|
|
||||||
and not matches.named('type', lambda m: m.value == 'episode'):
|
|
||||||
return matches.named('episode_title')
|
|
||||||
|
|
||||||
def then(self, matches, when_response, context):
|
|
||||||
for match in when_response:
|
|
||||||
matches.remove(match)
|
|
||||||
match.name = 'alternative_title'
|
|
||||||
matches.append(match)
|
|
||||||
|
|
||||||
|
|
||||||
class Filepart3EpisodeTitle(Rule):
|
|
||||||
"""
|
|
||||||
If we have at least 3 filepart structured like this:
|
|
||||||
|
|
||||||
Serie name/SO1/E01-episode_title.mkv
|
|
||||||
AAAAAAAAAA/BBB/CCCCCCCCCCCCCCCCCCCC
|
|
||||||
|
|
||||||
Serie name/SO1/episode_title-E01.mkv
|
|
||||||
AAAAAAAAAA/BBB/CCCCCCCCCCCCCCCCCCCC
|
|
||||||
|
|
||||||
If CCCC contains episode and BBB contains seasonNumber
|
|
||||||
Then title is to be found in AAAA.
|
|
||||||
"""
|
|
||||||
consequence = AppendMatch('title')
|
|
||||||
|
|
||||||
def when(self, matches, context): # pylint:disable=inconsistent-return-statements
|
|
||||||
if matches.tagged('filepart-title'):
|
|
||||||
return
|
|
||||||
|
|
||||||
fileparts = matches.markers.named('path')
|
|
||||||
if len(fileparts) < 3:
|
|
||||||
return
|
|
||||||
|
|
||||||
filename = fileparts[-1]
|
|
||||||
directory = fileparts[-2]
|
|
||||||
subdirectory = fileparts[-3]
|
|
||||||
|
|
||||||
episode_number = matches.range(filename.start, filename.end, lambda match: match.name == 'episode', 0)
|
|
||||||
if episode_number:
|
|
||||||
season = matches.range(directory.start, directory.end, lambda match: match.name == 'season', 0)
|
|
||||||
|
|
||||||
if season:
|
|
||||||
hole = matches.holes(subdirectory.start, subdirectory.end,
|
|
||||||
ignore=or_(lambda match: 'weak-episode' in match.tags, TitleBaseRule.is_ignored),
|
|
||||||
formatter=cleanup, seps=title_seps, predicate=lambda match: match.value,
|
|
||||||
index=0)
|
|
||||||
if hole:
|
|
||||||
return hole
|
|
||||||
|
|
||||||
|
|
||||||
class Filepart2EpisodeTitle(Rule):
|
|
||||||
"""
|
|
||||||
If we have at least 2 filepart structured like this:
|
|
||||||
|
|
||||||
Serie name SO1/E01-episode_title.mkv
|
|
||||||
AAAAAAAAAAAAA/BBBBBBBBBBBBBBBBBBBBB
|
|
||||||
|
|
||||||
If BBBB contains episode and AAA contains a hole followed by seasonNumber
|
|
||||||
then title is to be found in AAAA.
|
|
||||||
|
|
||||||
or
|
|
||||||
|
|
||||||
Serie name/SO1E01-episode_title.mkv
|
|
||||||
AAAAAAAAAA/BBBBBBBBBBBBBBBBBBBBB
|
|
||||||
|
|
||||||
If BBBB contains season and episode and AAA contains a hole
|
|
||||||
then title is to be found in AAAA.
|
|
||||||
"""
|
|
||||||
consequence = AppendMatch('title')
|
|
||||||
|
|
||||||
def when(self, matches, context): # pylint:disable=inconsistent-return-statements
|
|
||||||
if matches.tagged('filepart-title'):
|
|
||||||
return
|
|
||||||
|
|
||||||
fileparts = matches.markers.named('path')
|
|
||||||
if len(fileparts) < 2:
|
|
||||||
return
|
|
||||||
|
|
||||||
filename = fileparts[-1]
|
|
||||||
directory = fileparts[-2]
|
|
||||||
|
|
||||||
episode_number = matches.range(filename.start, filename.end, lambda match: match.name == 'episode', 0)
|
|
||||||
if episode_number:
|
|
||||||
season = (matches.range(directory.start, directory.end, lambda match: match.name == 'season', 0) or
|
|
||||||
matches.range(filename.start, filename.end, lambda match: match.name == 'season', 0))
|
|
||||||
if season:
|
|
||||||
hole = matches.holes(directory.start, directory.end,
|
|
||||||
ignore=or_(lambda match: 'weak-episode' in match.tags, TitleBaseRule.is_ignored),
|
|
||||||
formatter=cleanup, seps=title_seps,
|
|
||||||
predicate=lambda match: match.value, index=0)
|
|
||||||
if hole:
|
|
||||||
hole.tags.append('filepart-title')
|
|
||||||
return hole
|
|
||||||
@@ -1,912 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
"""
|
|
||||||
episode, season, disc, episode_count, season_count and episode_details properties
|
|
||||||
"""
|
|
||||||
import copy
|
|
||||||
from collections import defaultdict
|
|
||||||
|
|
||||||
from rebulk import Rebulk, RemoveMatch, Rule, AppendMatch, RenameMatch
|
|
||||||
from rebulk.match import Match
|
|
||||||
from rebulk.remodule import re
|
|
||||||
from rebulk.utils import is_iterable
|
|
||||||
|
|
||||||
from guessit.rules import match_processors
|
|
||||||
from guessit.rules.common.numeral import parse_numeral, numeral
|
|
||||||
from .title import TitleFromPosition
|
|
||||||
from ..common import dash, alt_dash, seps, seps_no_fs
|
|
||||||
from ..common.formatters import strip
|
|
||||||
from ..common.pattern import is_disabled
|
|
||||||
from ..common.validators import seps_surround, int_coercable, and_
|
|
||||||
from ...reutils import build_or_pattern
|
|
||||||
|
|
||||||
|
|
||||||
def episodes(config):
|
|
||||||
"""
|
|
||||||
Builder for rebulk object.
|
|
||||||
|
|
||||||
:param config: rule configuration
|
|
||||||
:type config: dict
|
|
||||||
:return: Created Rebulk object
|
|
||||||
:rtype: Rebulk
|
|
||||||
"""
|
|
||||||
|
|
||||||
# pylint: disable=too-many-branches,too-many-statements,too-many-locals
|
|
||||||
def is_season_episode_disabled(context):
|
|
||||||
"""Whether season and episode rules should be enabled."""
|
|
||||||
return is_disabled(context, 'episode') or is_disabled(context, 'season')
|
|
||||||
|
|
||||||
def episodes_season_chain_breaker(matches):
|
|
||||||
"""
|
|
||||||
Break chains if there's more than 100 offset between two neighbor values.
|
|
||||||
:param matches:
|
|
||||||
:type matches:
|
|
||||||
:return:
|
|
||||||
:rtype:
|
|
||||||
"""
|
|
||||||
eps = matches.named('episode')
|
|
||||||
if len(eps) > 1 and abs(eps[-1].value - eps[-2].value) > episode_max_range:
|
|
||||||
return True
|
|
||||||
|
|
||||||
seasons = matches.named('season')
|
|
||||||
if len(seasons) > 1 and abs(seasons[-1].value - seasons[-2].value) > season_max_range:
|
|
||||||
return True
|
|
||||||
return False
|
|
||||||
|
|
||||||
def season_episode_conflict_solver(match, other):
|
|
||||||
"""
|
|
||||||
Conflict solver for episode/season patterns
|
|
||||||
|
|
||||||
:param match:
|
|
||||||
:param other:
|
|
||||||
:return:
|
|
||||||
"""
|
|
||||||
if match.name != other.name:
|
|
||||||
if match.name == 'episode' and other.name == 'year':
|
|
||||||
return match
|
|
||||||
if match.name in ('season', 'episode'):
|
|
||||||
if other.name in ('video_codec', 'audio_codec', 'container', 'date'):
|
|
||||||
return match
|
|
||||||
if (other.name == 'audio_channels' and 'weak-audio_channels' not in other.tags
|
|
||||||
and not match.initiator.children.named(match.name + 'Marker')) or (
|
|
||||||
other.name == 'screen_size' and not int_coercable(other.raw)):
|
|
||||||
return match
|
|
||||||
if other.name in ('season', 'episode') and match.initiator != other.initiator:
|
|
||||||
if (match.initiator.name in ('weak_episode', 'weak_duplicate')
|
|
||||||
and other.initiator.name in ('weak_episode', 'weak_duplicate')):
|
|
||||||
return '__default__'
|
|
||||||
for current in (match, other):
|
|
||||||
if 'weak-episode' in current.tags or 'x' in current.initiator.raw.lower():
|
|
||||||
return current
|
|
||||||
return '__default__'
|
|
||||||
|
|
||||||
def ordering_validator(match):
|
|
||||||
"""
|
|
||||||
Validator for season list. They should be in natural order to be validated.
|
|
||||||
|
|
||||||
episode/season separated by a weak discrete separator should be consecutive, unless a strong discrete separator
|
|
||||||
or a range separator is present in the chain (1.3&5 is valid, but 1.3-5 is not valid and 1.3.5 is not valid)
|
|
||||||
"""
|
|
||||||
values = match.children.to_dict()
|
|
||||||
if 'season' in values and is_iterable(values['season']):
|
|
||||||
# Season numbers must be in natural order to be validated.
|
|
||||||
if not list(sorted(values['season'])) == values['season']:
|
|
||||||
return False
|
|
||||||
if 'episode' in values and is_iterable(values['episode']):
|
|
||||||
# Season numbers must be in natural order to be validated.
|
|
||||||
if not list(sorted(values['episode'])) == values['episode']:
|
|
||||||
return False
|
|
||||||
|
|
||||||
def is_consecutive(property_name):
|
|
||||||
"""
|
|
||||||
Check if the property season or episode has valid consecutive values.
|
|
||||||
:param property_name:
|
|
||||||
:type property_name:
|
|
||||||
:return:
|
|
||||||
:rtype:
|
|
||||||
"""
|
|
||||||
previous_match = None
|
|
||||||
valid = True
|
|
||||||
for current_match in match.children.named(property_name):
|
|
||||||
if previous_match:
|
|
||||||
match.children.previous(current_match,
|
|
||||||
lambda m: m.name == property_name + 'Separator')
|
|
||||||
separator = match.children.previous(current_match,
|
|
||||||
lambda m: m.name == property_name + 'Separator', 0)
|
|
||||||
if separator:
|
|
||||||
if separator.raw not in range_separators and separator.raw in weak_discrete_separators:
|
|
||||||
if not 0 < current_match.value - previous_match.value <= max_range_gap + 1:
|
|
||||||
valid = False
|
|
||||||
if separator.raw in strong_discrete_separators:
|
|
||||||
valid = True
|
|
||||||
break
|
|
||||||
previous_match = current_match
|
|
||||||
return valid
|
|
||||||
|
|
||||||
return is_consecutive('episode') and is_consecutive('season')
|
|
||||||
|
|
||||||
def validate_roman(match):
|
|
||||||
"""
|
|
||||||
Validate a roman match if surrounded by separators
|
|
||||||
:param match:
|
|
||||||
:type match:
|
|
||||||
:return:
|
|
||||||
:rtype:
|
|
||||||
"""
|
|
||||||
if int_coercable(match.raw):
|
|
||||||
return True
|
|
||||||
return seps_surround(match)
|
|
||||||
|
|
||||||
season_words = config['season_words']
|
|
||||||
episode_words = config['episode_words']
|
|
||||||
of_words = config['of_words']
|
|
||||||
all_words = config['all_words']
|
|
||||||
season_markers = config['season_markers']
|
|
||||||
season_ep_markers = config['season_ep_markers']
|
|
||||||
disc_markers = config['disc_markers']
|
|
||||||
episode_markers = config['episode_markers']
|
|
||||||
range_separators = config['range_separators']
|
|
||||||
weak_discrete_separators = list(sep for sep in seps_no_fs if sep not in range_separators)
|
|
||||||
strong_discrete_separators = config['discrete_separators']
|
|
||||||
discrete_separators = strong_discrete_separators + weak_discrete_separators
|
|
||||||
episode_max_range = config['episode_max_range']
|
|
||||||
season_max_range = config['season_max_range']
|
|
||||||
max_range_gap = config['max_range_gap']
|
|
||||||
|
|
||||||
rebulk = Rebulk() \
|
|
||||||
.regex_defaults(flags=re.IGNORECASE) \
|
|
||||||
.string_defaults(ignore_case=True) \
|
|
||||||
.chain_defaults(chain_breaker=episodes_season_chain_breaker) \
|
|
||||||
.defaults(private_names=['episodeSeparator', 'seasonSeparator', 'episodeMarker', 'seasonMarker'],
|
|
||||||
formatter={'season': int, 'episode': int, 'version': int, 'count': int},
|
|
||||||
children=True,
|
|
||||||
private_parent=True,
|
|
||||||
conflict_solver=season_episode_conflict_solver,
|
|
||||||
abbreviations=[alt_dash])
|
|
||||||
|
|
||||||
# S01E02, 01x02, S01S02S03
|
|
||||||
rebulk.chain(
|
|
||||||
tags=['SxxExx'],
|
|
||||||
validate_all=True,
|
|
||||||
validator={'__parent__': and_(seps_surround, ordering_validator)},
|
|
||||||
disabled=is_season_episode_disabled) \
|
|
||||||
.defaults(tags=['SxxExx']) \
|
|
||||||
.regex(build_or_pattern(season_markers, name='seasonMarker') + r'(?P<season>\d+)@?' +
|
|
||||||
build_or_pattern(episode_markers + disc_markers, name='episodeMarker') + r'@?(?P<episode>\d+)')\
|
|
||||||
.repeater('+') \
|
|
||||||
.regex(build_or_pattern(episode_markers + disc_markers + discrete_separators + range_separators,
|
|
||||||
name='episodeSeparator',
|
|
||||||
escape=True) +
|
|
||||||
r'(?P<episode>\d+)').repeater('*')
|
|
||||||
|
|
||||||
rebulk.chain(tags=['SxxExx'],
|
|
||||||
validate_all=True,
|
|
||||||
validator={'__parent__': and_(seps_surround, ordering_validator)},
|
|
||||||
disabled=is_season_episode_disabled) \
|
|
||||||
.defaults(tags=['SxxExx']) \
|
|
||||||
.regex(r'(?P<season>\d+)@?' +
|
|
||||||
build_or_pattern(season_ep_markers, name='episodeMarker') +
|
|
||||||
r'@?(?P<episode>\d+)').repeater('+') \
|
|
||||||
|
|
||||||
rebulk.chain(tags=['SxxExx'],
|
|
||||||
validate_all=True,
|
|
||||||
validator={'__parent__': and_(seps_surround, ordering_validator)},
|
|
||||||
disabled=is_season_episode_disabled) \
|
|
||||||
.defaults(tags=['SxxExx']) \
|
|
||||||
.regex(r'(?P<season>\d+)@?' +
|
|
||||||
build_or_pattern(season_ep_markers, name='episodeMarker') +
|
|
||||||
r'@?(?P<episode>\d+)') \
|
|
||||||
.regex(build_or_pattern(season_ep_markers + discrete_separators + range_separators,
|
|
||||||
name='episodeSeparator',
|
|
||||||
escape=True) +
|
|
||||||
r'(?P<episode>\d+)').repeater('*')
|
|
||||||
|
|
||||||
rebulk.chain(tags=['SxxExx'],
|
|
||||||
validate_all=True,
|
|
||||||
validator={'__parent__': and_(seps_surround, ordering_validator)},
|
|
||||||
disabled=is_season_episode_disabled) \
|
|
||||||
.defaults(tags=['SxxExx']) \
|
|
||||||
.regex(build_or_pattern(season_markers, name='seasonMarker') + r'(?P<season>\d+)') \
|
|
||||||
.regex('(?P<other>Extras)', name='other', value='Extras', tags=['no-release-group-prefix']).repeater('?') \
|
|
||||||
.regex(build_or_pattern(season_markers + discrete_separators + range_separators,
|
|
||||||
name='seasonSeparator',
|
|
||||||
escape=True) +
|
|
||||||
r'(?P<season>\d+)').repeater('*')
|
|
||||||
|
|
||||||
# episode_details property
|
|
||||||
for episode_detail in ('Special', 'Pilot', 'Unaired', 'Final'):
|
|
||||||
rebulk.string(episode_detail,
|
|
||||||
private_parent=False,
|
|
||||||
children=False,
|
|
||||||
value=episode_detail,
|
|
||||||
name='episode_details',
|
|
||||||
disabled=lambda context: is_disabled(context, 'episode_details'))
|
|
||||||
|
|
||||||
rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator', 'episodeMarker', 'seasonMarker'],
|
|
||||||
validate_all=True,
|
|
||||||
validator={'__parent__': and_(seps_surround, ordering_validator)},
|
|
||||||
children=True,
|
|
||||||
private_parent=True,
|
|
||||||
conflict_solver=season_episode_conflict_solver)
|
|
||||||
|
|
||||||
rebulk.chain(validate_all=True,
|
|
||||||
conflict_solver=season_episode_conflict_solver,
|
|
||||||
formatter={'season': parse_numeral, 'count': parse_numeral},
|
|
||||||
validator={'__parent__': and_(seps_surround, ordering_validator),
|
|
||||||
'season': validate_roman,
|
|
||||||
'count': validate_roman},
|
|
||||||
disabled=lambda context: context.get('type') == 'movie' or is_disabled(context, 'season')) \
|
|
||||||
.defaults(formatter={'season': parse_numeral, 'count': parse_numeral},
|
|
||||||
validator={'season': validate_roman, 'count': validate_roman},
|
|
||||||
conflict_solver=season_episode_conflict_solver) \
|
|
||||||
.regex(build_or_pattern(season_words, name='seasonMarker') + '@?(?P<season>' + numeral + ')') \
|
|
||||||
.regex(r'' + build_or_pattern(of_words) + '@?(?P<count>' + numeral + ')').repeater('?') \
|
|
||||||
.regex(r'@?' + build_or_pattern(range_separators + discrete_separators + ['@'],
|
|
||||||
name='seasonSeparator', escape=True) +
|
|
||||||
r'@?(?P<season>\d+)').repeater('*')
|
|
||||||
|
|
||||||
rebulk.defaults(abbreviations=[dash])
|
|
||||||
|
|
||||||
rebulk.regex(build_or_pattern(episode_words, name='episodeMarker') + r'-?(?P<episode>\d+)' +
|
|
||||||
r'(?:v(?P<version>\d+))?' +
|
|
||||||
r'(?:-?' + build_or_pattern(of_words) + r'-?(?P<count>\d+))?', # Episode 4
|
|
||||||
disabled=lambda context: context.get('type') == 'episode' or is_disabled(context, 'episode'))
|
|
||||||
|
|
||||||
rebulk.regex(build_or_pattern(episode_words, name='episodeMarker') + r'-?(?P<episode>' + numeral + ')' +
|
|
||||||
r'(?:v(?P<version>\d+))?' +
|
|
||||||
r'(?:-?' + build_or_pattern(of_words) + r'-?(?P<count>\d+))?', # Episode 4
|
|
||||||
validator={'episode': validate_roman},
|
|
||||||
formatter={'episode': parse_numeral},
|
|
||||||
disabled=lambda context: context.get('type') != 'episode' or is_disabled(context, 'episode'))
|
|
||||||
|
|
||||||
rebulk.regex(r'S?(?P<season>\d+)-?(?:xE|Ex|E|x)-?(?P<other>' + build_or_pattern(all_words) + ')',
|
|
||||||
tags=['SxxExx'],
|
|
||||||
formatter={'other': lambda match: 'Complete'},
|
|
||||||
disabled=lambda context: is_disabled(context, 'season'))
|
|
||||||
|
|
||||||
# 12, 13
|
|
||||||
rebulk.chain(tags=['weak-episode'],
|
|
||||||
disabled=lambda context: context.get('type') == 'movie' or is_disabled(context, 'episode')) \
|
|
||||||
.defaults(validator=None, tags=['weak-episode']) \
|
|
||||||
.regex(r'(?P<episode>\d{2})') \
|
|
||||||
.regex(r'v(?P<version>\d+)').repeater('?') \
|
|
||||||
.regex(r'(?P<episodeSeparator>[x-])(?P<episode>\d{2})', abbreviations=None).repeater('*')
|
|
||||||
|
|
||||||
# 012, 013
|
|
||||||
rebulk.chain(tags=['weak-episode'],
|
|
||||||
disabled=lambda context: context.get('type') == 'movie' or is_disabled(context, 'episode')) \
|
|
||||||
.defaults(validator=None, tags=['weak-episode']) \
|
|
||||||
.regex(r'0(?P<episode>\d{1,2})') \
|
|
||||||
.regex(r'v(?P<version>\d+)').repeater('?') \
|
|
||||||
.regex(r'(?P<episodeSeparator>[x-])0(?P<episode>\d{1,2})', abbreviations=None).repeater('*')
|
|
||||||
|
|
||||||
# 112, 113
|
|
||||||
rebulk.chain(tags=['weak-episode'],
|
|
||||||
name='weak_episode',
|
|
||||||
disabled=lambda context: context.get('type') == 'movie' or is_disabled(context, 'episode')) \
|
|
||||||
.defaults(validator=None, tags=['weak-episode'], name='weak_episode') \
|
|
||||||
.regex(r'(?P<episode>\d{3,4})') \
|
|
||||||
.regex(r'v(?P<version>\d+)').repeater('?') \
|
|
||||||
.regex(r'(?P<episodeSeparator>[x-])(?P<episode>\d{3,4})', abbreviations=None).repeater('*')
|
|
||||||
|
|
||||||
# 1, 2, 3
|
|
||||||
rebulk.chain(tags=['weak-episode'],
|
|
||||||
disabled=lambda context: context.get('type') != 'episode' or is_disabled(context, 'episode')) \
|
|
||||||
.defaults(validator=None, tags=['weak-episode']) \
|
|
||||||
.regex(r'(?P<episode>\d)') \
|
|
||||||
.regex(r'v(?P<version>\d+)').repeater('?') \
|
|
||||||
.regex(r'(?P<episodeSeparator>[x-])(?P<episode>\d{1,2})', abbreviations=None).repeater('*')
|
|
||||||
|
|
||||||
# e112, e113, 1e18, 3e19
|
|
||||||
rebulk.chain(disabled=lambda context: is_disabled(context, 'episode')) \
|
|
||||||
.defaults(validator=None) \
|
|
||||||
.regex(r'(?P<season>\d{1,2})?(?P<episodeMarker>e)(?P<episode>\d{1,4})') \
|
|
||||||
.regex(r'v(?P<version>\d+)').repeater('?') \
|
|
||||||
.regex(r'(?P<episodeSeparator>e|x|-)(?P<episode>\d{1,4})', abbreviations=None).repeater('*')
|
|
||||||
|
|
||||||
# ep 112, ep113, ep112, ep113
|
|
||||||
rebulk.chain(disabled=lambda context: is_disabled(context, 'episode')) \
|
|
||||||
.defaults(validator=None) \
|
|
||||||
.regex(r'ep-?(?P<episode>\d{1,4})') \
|
|
||||||
.regex(r'v(?P<version>\d+)').repeater('?') \
|
|
||||||
.regex(r'(?P<episodeSeparator>ep|e|x|-)(?P<episode>\d{1,4})', abbreviations=None).repeater('*')
|
|
||||||
|
|
||||||
# cap 112, cap 112_114
|
|
||||||
rebulk.chain(tags=['see-pattern'],
|
|
||||||
disabled=is_season_episode_disabled) \
|
|
||||||
.defaults(validator=None, tags=['see-pattern']) \
|
|
||||||
.regex(r'(?P<seasonMarker>cap)-?(?P<season>\d{1,2})(?P<episode>\d{2})') \
|
|
||||||
.regex(r'(?P<episodeSeparator>-)(?P<season>\d{1,2})(?P<episode>\d{2})').repeater('?')
|
|
||||||
|
|
||||||
# 102, 0102
|
|
||||||
rebulk.chain(tags=['weak-episode', 'weak-duplicate'],
|
|
||||||
name='weak_duplicate',
|
|
||||||
conflict_solver=season_episode_conflict_solver,
|
|
||||||
disabled=lambda context: (context.get('episode_prefer_number', False) or
|
|
||||||
context.get('type') == 'movie') or is_season_episode_disabled(context)) \
|
|
||||||
.defaults(tags=['weak-episode', 'weak-duplicate'],
|
|
||||||
name='weak_duplicate',
|
|
||||||
validator=None,
|
|
||||||
conflict_solver=season_episode_conflict_solver) \
|
|
||||||
.regex(r'(?P<season>\d{1,2})(?P<episode>\d{2})') \
|
|
||||||
.regex(r'v(?P<version>\d+)').repeater('?') \
|
|
||||||
.regex(r'(?P<episodeSeparator>x|-)(?P<episode>\d{2})', abbreviations=None).repeater('*')
|
|
||||||
|
|
||||||
rebulk.regex(r'v(?P<version>\d+)',
|
|
||||||
formatter=int,
|
|
||||||
disabled=lambda context: is_disabled(context, 'version'))
|
|
||||||
|
|
||||||
rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator'])
|
|
||||||
|
|
||||||
# TODO: List of words
|
|
||||||
# detached of X count (season/episode)
|
|
||||||
rebulk.regex(r'(?P<episode>\d+)-?' + build_or_pattern(of_words) +
|
|
||||||
r'-?(?P<count>\d+)-?' + build_or_pattern(episode_words) + '?',
|
|
||||||
formatter=int,
|
|
||||||
pre_match_processor=match_processors.strip,
|
|
||||||
disabled=lambda context: is_disabled(context, 'episode'))
|
|
||||||
|
|
||||||
rebulk.regex(r'Minisodes?',
|
|
||||||
children=False,
|
|
||||||
private_parent=False,
|
|
||||||
name='episode_format',
|
|
||||||
value="Minisode",
|
|
||||||
disabled=lambda context: is_disabled(context, 'episode_format'))
|
|
||||||
|
|
||||||
rebulk.rules(WeakConflictSolver, RemoveInvalidSeason, RemoveInvalidEpisode,
|
|
||||||
SeePatternRange(range_separators + ['_']),
|
|
||||||
EpisodeNumberSeparatorRange(range_separators),
|
|
||||||
SeasonSeparatorRange(range_separators), RemoveWeakIfMovie, RemoveWeakIfSxxExx, RemoveWeakDuplicate,
|
|
||||||
EpisodeDetailValidator, RemoveDetachedEpisodeNumber, VersionValidator, RemoveWeak(episode_words),
|
|
||||||
RenameToAbsoluteEpisode, CountValidator, EpisodeSingleDigitValidator, RenameToDiscMatch)
|
|
||||||
|
|
||||||
return rebulk
|
|
||||||
|
|
||||||
|
|
||||||
class WeakConflictSolver(Rule):
|
|
||||||
"""
|
|
||||||
Rule to decide whether weak-episode or weak-duplicate matches should be kept.
|
|
||||||
|
|
||||||
If an anime is detected:
|
|
||||||
- weak-duplicate matches should be removed
|
|
||||||
- weak-episode matches should be tagged as anime
|
|
||||||
Otherwise:
|
|
||||||
- weak-episode matches are removed unless they're part of an episode range match.
|
|
||||||
"""
|
|
||||||
priority = 128
|
|
||||||
consequence = [RemoveMatch, AppendMatch]
|
|
||||||
|
|
||||||
def enabled(self, context):
|
|
||||||
return context.get('type') != 'movie'
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def is_anime(cls, matches):
|
|
||||||
"""Return True if it seems to be an anime.
|
|
||||||
|
|
||||||
Anime characteristics:
|
|
||||||
- version, crc32 matches
|
|
||||||
- screen_size inside brackets
|
|
||||||
- release_group at start and inside brackets
|
|
||||||
"""
|
|
||||||
if matches.named('version') or matches.named('crc32'):
|
|
||||||
return True
|
|
||||||
|
|
||||||
for group in matches.markers.named('group'):
|
|
||||||
if matches.range(group.start, group.end, predicate=lambda m: m.name == 'screen_size'):
|
|
||||||
return True
|
|
||||||
if matches.markers.starting(group.start, predicate=lambda m: m.name == 'path'):
|
|
||||||
hole = matches.holes(group.start, group.end, index=0)
|
|
||||||
if hole and hole.raw == group.raw:
|
|
||||||
return True
|
|
||||||
|
|
||||||
return False
|
|
||||||
|
|
||||||
def when(self, matches, context):
|
|
||||||
to_remove = []
|
|
||||||
to_append = []
|
|
||||||
anime_detected = self.is_anime(matches)
|
|
||||||
for filepart in matches.markers.named('path'):
|
|
||||||
weak_matches = matches.range(filepart.start, filepart.end, predicate=(
|
|
||||||
lambda m: m.initiator.name == 'weak_episode'))
|
|
||||||
weak_dup_matches = matches.range(filepart.start, filepart.end, predicate=(
|
|
||||||
lambda m: m.initiator.name == 'weak_duplicate'))
|
|
||||||
if anime_detected:
|
|
||||||
if weak_matches:
|
|
||||||
to_remove.extend(weak_dup_matches)
|
|
||||||
for match in matches.range(filepart.start, filepart.end, predicate=(
|
|
||||||
lambda m: m.name == 'episode' and m.initiator.name != 'weak_duplicate')):
|
|
||||||
episode = copy.copy(match)
|
|
||||||
episode.tags = episode.tags + ['anime']
|
|
||||||
to_append.append(episode)
|
|
||||||
to_remove.append(match)
|
|
||||||
elif weak_dup_matches:
|
|
||||||
episodes_in_range = matches.range(filepart.start, filepart.end, predicate=(
|
|
||||||
lambda m:
|
|
||||||
m.name == 'episode' and m.initiator.name == 'weak_episode'
|
|
||||||
and m.initiator.children.named('episodeSeparator')
|
|
||||||
))
|
|
||||||
if not episodes_in_range and not matches.range(filepart.start, filepart.end,
|
|
||||||
predicate=lambda m: 'SxxExx' in m.tags):
|
|
||||||
to_remove.extend(weak_matches)
|
|
||||||
else:
|
|
||||||
for match in episodes_in_range:
|
|
||||||
episode = copy.copy(match)
|
|
||||||
episode.tags = []
|
|
||||||
to_append.append(episode)
|
|
||||||
to_remove.append(match)
|
|
||||||
|
|
||||||
if to_append:
|
|
||||||
to_remove.extend(weak_dup_matches)
|
|
||||||
|
|
||||||
if to_remove or to_append:
|
|
||||||
return to_remove, to_append
|
|
||||||
return False
|
|
||||||
|
|
||||||
|
|
||||||
class CountValidator(Rule):
|
|
||||||
"""
|
|
||||||
Validate count property and rename it
|
|
||||||
"""
|
|
||||||
priority = 64
|
|
||||||
consequence = [RemoveMatch, RenameMatch('episode_count'), RenameMatch('season_count')]
|
|
||||||
|
|
||||||
properties = {'episode_count': [None], 'season_count': [None]}
|
|
||||||
|
|
||||||
def when(self, matches, context):
|
|
||||||
to_remove = []
|
|
||||||
episode_count = []
|
|
||||||
season_count = []
|
|
||||||
|
|
||||||
for count in matches.named('count'):
|
|
||||||
previous = matches.previous(count, lambda match: match.name in ['episode', 'season'], 0)
|
|
||||||
if previous:
|
|
||||||
if previous.name == 'episode':
|
|
||||||
episode_count.append(count)
|
|
||||||
elif previous.name == 'season':
|
|
||||||
season_count.append(count)
|
|
||||||
else:
|
|
||||||
to_remove.append(count)
|
|
||||||
if to_remove or episode_count or season_count:
|
|
||||||
return to_remove, episode_count, season_count
|
|
||||||
return False
|
|
||||||
|
|
||||||
|
|
||||||
class SeePatternRange(Rule):
|
|
||||||
"""
|
|
||||||
Create matches for episode range for SEE pattern. E.g.: Cap.102_104
|
|
||||||
"""
|
|
||||||
priority = 128
|
|
||||||
consequence = [RemoveMatch, AppendMatch]
|
|
||||||
|
|
||||||
def __init__(self, range_separators):
|
|
||||||
super(SeePatternRange, self).__init__()
|
|
||||||
self.range_separators = range_separators
|
|
||||||
|
|
||||||
def when(self, matches, context):
|
|
||||||
to_remove = []
|
|
||||||
to_append = []
|
|
||||||
|
|
||||||
for separator in matches.tagged('see-pattern', lambda m: m.name == 'episodeSeparator'):
|
|
||||||
previous_match = matches.previous(separator, lambda m: m.name == 'episode' and 'see-pattern' in m.tags, 0)
|
|
||||||
next_match = matches.next(separator, lambda m: m.name == 'season' and 'see-pattern' in m.tags, 0)
|
|
||||||
if not next_match:
|
|
||||||
continue
|
|
||||||
|
|
||||||
next_match = matches.next(next_match, lambda m: m.name == 'episode' and 'see-pattern' in m.tags, 0)
|
|
||||||
if previous_match and next_match and separator.value in self.range_separators:
|
|
||||||
to_remove.append(next_match)
|
|
||||||
|
|
||||||
for episode_number in range(previous_match.value + 1, next_match.value + 1):
|
|
||||||
match = copy.copy(next_match)
|
|
||||||
match.value = episode_number
|
|
||||||
to_append.append(match)
|
|
||||||
|
|
||||||
to_remove.append(separator)
|
|
||||||
|
|
||||||
if to_remove or to_append:
|
|
||||||
return to_remove, to_append
|
|
||||||
return False
|
|
||||||
|
|
||||||
|
|
||||||
class AbstractSeparatorRange(Rule):
|
|
||||||
"""
|
|
||||||
Remove separator matches and create matches for season range.
|
|
||||||
"""
|
|
||||||
priority = 128
|
|
||||||
consequence = [RemoveMatch, AppendMatch]
|
|
||||||
|
|
||||||
def __init__(self, range_separators, property_name):
|
|
||||||
super(AbstractSeparatorRange, self).__init__()
|
|
||||||
self.range_separators = range_separators
|
|
||||||
self.property_name = property_name
|
|
||||||
|
|
||||||
def when(self, matches, context):
|
|
||||||
to_remove = []
|
|
||||||
to_append = []
|
|
||||||
|
|
||||||
for separator in matches.named(self.property_name + 'Separator'):
|
|
||||||
previous_match = matches.previous(separator, lambda m: m.name == self.property_name, 0)
|
|
||||||
next_match = matches.next(separator, lambda m: m.name == self.property_name, 0)
|
|
||||||
initiator = separator.initiator
|
|
||||||
|
|
||||||
if previous_match and next_match and separator.value in self.range_separators:
|
|
||||||
to_remove.append(next_match)
|
|
||||||
for episode_number in range(previous_match.value + 1, next_match.value):
|
|
||||||
match = copy.copy(next_match)
|
|
||||||
match.value = episode_number
|
|
||||||
initiator.children.append(match)
|
|
||||||
to_append.append(match)
|
|
||||||
to_append.append(next_match)
|
|
||||||
to_remove.append(separator)
|
|
||||||
|
|
||||||
previous_match = None
|
|
||||||
for next_match in matches.named(self.property_name):
|
|
||||||
if previous_match:
|
|
||||||
separator = matches.input_string[previous_match.initiator.end:next_match.initiator.start]
|
|
||||||
if separator not in self.range_separators:
|
|
||||||
separator = strip(separator)
|
|
||||||
if separator in self.range_separators:
|
|
||||||
initiator = previous_match.initiator
|
|
||||||
for episode_number in range(previous_match.value + 1, next_match.value):
|
|
||||||
match = copy.copy(next_match)
|
|
||||||
match.value = episode_number
|
|
||||||
initiator.children.append(match)
|
|
||||||
to_append.append(match)
|
|
||||||
to_append.append(Match(previous_match.end, next_match.start - 1,
|
|
||||||
name=self.property_name + 'Separator',
|
|
||||||
private=True,
|
|
||||||
input_string=matches.input_string))
|
|
||||||
to_remove.append(next_match) # Remove and append match to support proper ordering
|
|
||||||
to_append.append(next_match)
|
|
||||||
|
|
||||||
previous_match = next_match
|
|
||||||
|
|
||||||
if to_remove or to_append:
|
|
||||||
return to_remove, to_append
|
|
||||||
return False
|
|
||||||
|
|
||||||
|
|
||||||
class RenameToAbsoluteEpisode(Rule):
|
|
||||||
"""
|
|
||||||
Rename episode to absolute_episodes.
|
|
||||||
|
|
||||||
Absolute episodes are only used if two groups of episodes are detected:
|
|
||||||
S02E04-06 25-27
|
|
||||||
25-27 S02E04-06
|
|
||||||
2x04-06 25-27
|
|
||||||
28. Anime Name S02E05
|
|
||||||
The matches in the group with higher episode values are renamed to absolute_episode.
|
|
||||||
"""
|
|
||||||
|
|
||||||
consequence = RenameMatch('absolute_episode')
|
|
||||||
|
|
||||||
def when(self, matches, context): # pylint:disable=inconsistent-return-statements
|
|
||||||
initiators = {match.initiator for match in matches.named('episode')
|
|
||||||
if len(match.initiator.children.named('episode')) > 1}
|
|
||||||
if len(initiators) != 2:
|
|
||||||
ret = []
|
|
||||||
for filepart in matches.markers.named('path'):
|
|
||||||
if matches.range(filepart.start + 1, filepart.end, predicate=lambda m: m.name == 'episode'):
|
|
||||||
ret.extend(
|
|
||||||
matches.starting(filepart.start, predicate=lambda m: m.initiator.name == 'weak_episode'))
|
|
||||||
return ret
|
|
||||||
|
|
||||||
initiators = sorted(initiators, key=lambda item: item.end)
|
|
||||||
if not matches.holes(initiators[0].end, initiators[1].start, predicate=lambda m: m.raw.strip(seps)):
|
|
||||||
first_range = matches.named('episode', predicate=lambda m: m.initiator == initiators[0])
|
|
||||||
second_range = matches.named('episode', predicate=lambda m: m.initiator == initiators[1])
|
|
||||||
if len(first_range) == len(second_range):
|
|
||||||
if second_range[0].value > first_range[0].value:
|
|
||||||
return second_range
|
|
||||||
if first_range[0].value > second_range[0].value:
|
|
||||||
return first_range
|
|
||||||
|
|
||||||
|
|
||||||
class EpisodeNumberSeparatorRange(AbstractSeparatorRange):
|
|
||||||
"""
|
|
||||||
Remove separator matches and create matches for episoderNumber range.
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self, range_separators):
|
|
||||||
super(EpisodeNumberSeparatorRange, self).__init__(range_separators, "episode")
|
|
||||||
|
|
||||||
|
|
||||||
class SeasonSeparatorRange(AbstractSeparatorRange):
|
|
||||||
"""
|
|
||||||
Remove separator matches and create matches for season range.
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self, range_separators):
|
|
||||||
super(SeasonSeparatorRange, self).__init__(range_separators, "season")
|
|
||||||
|
|
||||||
|
|
||||||
class RemoveWeakIfMovie(Rule):
|
|
||||||
"""
|
|
||||||
Remove weak-episode tagged matches if it seems to be a movie.
|
|
||||||
"""
|
|
||||||
priority = 64
|
|
||||||
consequence = RemoveMatch
|
|
||||||
|
|
||||||
def enabled(self, context):
|
|
||||||
return context.get('type') != 'episode'
|
|
||||||
|
|
||||||
def when(self, matches, context):
|
|
||||||
to_remove = []
|
|
||||||
to_ignore = set()
|
|
||||||
remove = False
|
|
||||||
for filepart in matches.markers.named('path'):
|
|
||||||
year = matches.range(filepart.start, filepart.end, predicate=lambda m: m.name == 'year', index=0)
|
|
||||||
if year:
|
|
||||||
remove = True
|
|
||||||
next_match = matches.range(year.end, filepart.end, predicate=lambda m: m.private, index=0)
|
|
||||||
if (next_match and not matches.holes(year.end, next_match.start, predicate=lambda m: m.raw.strip(seps))
|
|
||||||
and not matches.at_match(next_match, predicate=lambda m: m.name == 'year')):
|
|
||||||
to_ignore.add(next_match.initiator)
|
|
||||||
|
|
||||||
to_ignore.update(matches.range(filepart.start, filepart.end,
|
|
||||||
predicate=lambda m: len(m.children.named('episode')) > 1))
|
|
||||||
|
|
||||||
to_remove.extend(matches.conflicting(year))
|
|
||||||
if remove:
|
|
||||||
to_remove.extend(matches.tagged('weak-episode', predicate=(
|
|
||||||
lambda m: m.initiator not in to_ignore and 'anime' not in m.tags)))
|
|
||||||
|
|
||||||
return to_remove
|
|
||||||
|
|
||||||
|
|
||||||
class RemoveWeak(Rule):
|
|
||||||
"""
|
|
||||||
Remove weak-episode matches which appears after video, source, and audio matches.
|
|
||||||
"""
|
|
||||||
priority = 16
|
|
||||||
consequence = RemoveMatch, AppendMatch
|
|
||||||
|
|
||||||
def __init__(self, episode_words):
|
|
||||||
super(RemoveWeak, self).__init__()
|
|
||||||
self.episode_words = episode_words
|
|
||||||
|
|
||||||
def when(self, matches, context):
|
|
||||||
to_remove = []
|
|
||||||
to_append = []
|
|
||||||
for filepart in matches.markers.named('path'):
|
|
||||||
weaks = matches.range(filepart.start, filepart.end, predicate=lambda m: 'weak-episode' in m.tags)
|
|
||||||
if weaks:
|
|
||||||
weak = weaks[0]
|
|
||||||
previous = matches.previous(weak, predicate=lambda m: m.name in (
|
|
||||||
'audio_codec', 'screen_size', 'streaming_service', 'source', 'video_profile',
|
|
||||||
'audio_channels', 'audio_profile'), index=0)
|
|
||||||
if previous and not matches.holes(
|
|
||||||
previous.end, weak.start, predicate=lambda m: m.raw.strip(seps)):
|
|
||||||
if previous.raw.lower() in self.episode_words:
|
|
||||||
try:
|
|
||||||
episode = copy.copy(weak)
|
|
||||||
episode.name = 'episode'
|
|
||||||
episode.value = int(weak.value)
|
|
||||||
episode.start = previous.start
|
|
||||||
episode.private = False
|
|
||||||
episode.tags = []
|
|
||||||
|
|
||||||
to_append.append(episode)
|
|
||||||
except ValueError:
|
|
||||||
pass
|
|
||||||
|
|
||||||
to_remove.extend(weaks)
|
|
||||||
if to_remove or to_append:
|
|
||||||
return to_remove, to_append
|
|
||||||
return False
|
|
||||||
|
|
||||||
|
|
||||||
class RemoveWeakIfSxxExx(Rule):
|
|
||||||
"""
|
|
||||||
Remove weak-episode tagged matches if SxxExx pattern is matched.
|
|
||||||
|
|
||||||
Weak episodes at beginning of filepart are kept.
|
|
||||||
"""
|
|
||||||
priority = 64
|
|
||||||
consequence = RemoveMatch
|
|
||||||
|
|
||||||
def when(self, matches, context):
|
|
||||||
to_remove = []
|
|
||||||
for filepart in matches.markers.named('path'):
|
|
||||||
if matches.range(filepart.start, filepart.end,
|
|
||||||
predicate=lambda m: not m.private and 'SxxExx' in m.tags):
|
|
||||||
for match in matches.range(filepart.start, filepart.end, predicate=lambda m: 'weak-episode' in m.tags):
|
|
||||||
if match.start != filepart.start or match.initiator.name != 'weak_episode':
|
|
||||||
to_remove.append(match)
|
|
||||||
return to_remove
|
|
||||||
|
|
||||||
|
|
||||||
class RemoveInvalidSeason(Rule):
|
|
||||||
"""
|
|
||||||
Remove invalid season matches.
|
|
||||||
"""
|
|
||||||
priority = 64
|
|
||||||
consequence = RemoveMatch
|
|
||||||
|
|
||||||
def when(self, matches, context):
|
|
||||||
to_remove = []
|
|
||||||
for filepart in matches.markers.named('path'):
|
|
||||||
strong_season = matches.range(filepart.start, filepart.end, index=0,
|
|
||||||
predicate=lambda m: m.name == 'season'
|
|
||||||
and not m.private and 'SxxExx' in m.tags)
|
|
||||||
if strong_season:
|
|
||||||
if strong_season.initiator.children.named('episode'):
|
|
||||||
for season in matches.range(strong_season.end, filepart.end,
|
|
||||||
predicate=lambda m: m.name == 'season' and not m.private):
|
|
||||||
# remove weak season or seasons without episode matches
|
|
||||||
if 'SxxExx' not in season.tags or not season.initiator.children.named('episode'):
|
|
||||||
if season.initiator:
|
|
||||||
to_remove.append(season.initiator)
|
|
||||||
to_remove.extend(season.initiator.children)
|
|
||||||
else:
|
|
||||||
to_remove.append(season)
|
|
||||||
|
|
||||||
return to_remove
|
|
||||||
|
|
||||||
|
|
||||||
class RemoveInvalidEpisode(Rule):
|
|
||||||
"""
|
|
||||||
Remove invalid episode matches.
|
|
||||||
"""
|
|
||||||
priority = 64
|
|
||||||
consequence = RemoveMatch
|
|
||||||
|
|
||||||
def when(self, matches, context):
|
|
||||||
to_remove = []
|
|
||||||
for filepart in matches.markers.named('path'):
|
|
||||||
strong_episode = matches.range(filepart.start, filepart.end, index=0,
|
|
||||||
predicate=lambda m: m.name == 'episode'
|
|
||||||
and not m.private and 'SxxExx' in m.tags)
|
|
||||||
if strong_episode:
|
|
||||||
strong_ep_marker = RemoveInvalidEpisode.get_episode_prefix(matches, strong_episode)
|
|
||||||
for episode in matches.range(strong_episode.end, filepart.end,
|
|
||||||
predicate=lambda m: m.name == 'episode' and not m.private):
|
|
||||||
ep_marker = RemoveInvalidEpisode.get_episode_prefix(matches, episode)
|
|
||||||
if strong_ep_marker and ep_marker and strong_ep_marker.value.lower() != ep_marker.value.lower():
|
|
||||||
if episode.initiator:
|
|
||||||
to_remove.append(episode.initiator)
|
|
||||||
to_remove.extend(episode.initiator.children)
|
|
||||||
else:
|
|
||||||
to_remove.append(ep_marker)
|
|
||||||
to_remove.append(episode)
|
|
||||||
|
|
||||||
return to_remove
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def get_episode_prefix(matches, episode):
|
|
||||||
"""
|
|
||||||
Return episode prefix: episodeMarker or episodeSeparator
|
|
||||||
"""
|
|
||||||
return matches.previous(episode, index=0,
|
|
||||||
predicate=lambda m: m.name in ('episodeMarker', 'episodeSeparator'))
|
|
||||||
|
|
||||||
|
|
||||||
class RemoveWeakDuplicate(Rule):
|
|
||||||
"""
|
|
||||||
Remove weak-duplicate tagged matches if duplicate patterns, for example The 100.109
|
|
||||||
"""
|
|
||||||
priority = 64
|
|
||||||
consequence = RemoveMatch
|
|
||||||
|
|
||||||
def when(self, matches, context):
|
|
||||||
to_remove = []
|
|
||||||
for filepart in matches.markers.named('path'):
|
|
||||||
patterns = defaultdict(list)
|
|
||||||
for match in reversed(matches.range(filepart.start, filepart.end,
|
|
||||||
predicate=lambda m: 'weak-duplicate' in m.tags)):
|
|
||||||
if match.pattern in patterns[match.name]:
|
|
||||||
to_remove.append(match)
|
|
||||||
else:
|
|
||||||
patterns[match.name].append(match.pattern)
|
|
||||||
return to_remove
|
|
||||||
|
|
||||||
|
|
||||||
class EpisodeDetailValidator(Rule):
|
|
||||||
"""
|
|
||||||
Validate episode_details if they are detached or next to season or episode.
|
|
||||||
"""
|
|
||||||
priority = 64
|
|
||||||
consequence = RemoveMatch
|
|
||||||
|
|
||||||
def when(self, matches, context):
|
|
||||||
ret = []
|
|
||||||
for detail in matches.named('episode_details'):
|
|
||||||
if not seps_surround(detail) \
|
|
||||||
and not matches.previous(detail, lambda match: match.name in ['season', 'episode']) \
|
|
||||||
and not matches.next(detail, lambda match: match.name in ['season', 'episode']):
|
|
||||||
ret.append(detail)
|
|
||||||
return ret
|
|
||||||
|
|
||||||
|
|
||||||
class RemoveDetachedEpisodeNumber(Rule):
|
|
||||||
"""
|
|
||||||
If multiple episode are found, remove those that are not detached from a range and less than 10.
|
|
||||||
|
|
||||||
Fairy Tail 2 - 16-20, 2 should be removed.
|
|
||||||
"""
|
|
||||||
priority = 64
|
|
||||||
consequence = RemoveMatch
|
|
||||||
dependency = [RemoveWeakIfSxxExx, RemoveWeakDuplicate]
|
|
||||||
|
|
||||||
def when(self, matches, context):
|
|
||||||
ret = []
|
|
||||||
|
|
||||||
episode_numbers = []
|
|
||||||
episode_values = set()
|
|
||||||
for match in matches.named('episode', lambda m: not m.private and 'weak-episode' in m.tags):
|
|
||||||
if match.value not in episode_values:
|
|
||||||
episode_numbers.append(match)
|
|
||||||
episode_values.add(match.value)
|
|
||||||
|
|
||||||
episode_numbers = list(sorted(episode_numbers, key=lambda m: m.value))
|
|
||||||
if len(episode_numbers) > 1 and \
|
|
||||||
episode_numbers[0].value < 10 and \
|
|
||||||
episode_numbers[1].value - episode_numbers[0].value != 1:
|
|
||||||
parent = episode_numbers[0]
|
|
||||||
while parent: # TODO: Add a feature in rebulk to avoid this ...
|
|
||||||
ret.append(parent)
|
|
||||||
parent = parent.parent
|
|
||||||
return ret
|
|
||||||
|
|
||||||
|
|
||||||
class VersionValidator(Rule):
|
|
||||||
"""
|
|
||||||
Validate version if previous match is episode or if surrounded by separators.
|
|
||||||
"""
|
|
||||||
priority = 64
|
|
||||||
dependency = [RemoveWeakIfMovie, RemoveWeakIfSxxExx]
|
|
||||||
consequence = RemoveMatch
|
|
||||||
|
|
||||||
def when(self, matches, context):
|
|
||||||
ret = []
|
|
||||||
for version in matches.named('version'):
|
|
||||||
episode_number = matches.previous(version, lambda match: match.name == 'episode', 0)
|
|
||||||
if not episode_number and not seps_surround(version.initiator):
|
|
||||||
ret.append(version)
|
|
||||||
return ret
|
|
||||||
|
|
||||||
|
|
||||||
class EpisodeSingleDigitValidator(Rule):
|
|
||||||
"""
|
|
||||||
Remove single digit episode when inside a group that doesn't own title.
|
|
||||||
"""
|
|
||||||
dependency = [TitleFromPosition]
|
|
||||||
|
|
||||||
consequence = RemoveMatch
|
|
||||||
|
|
||||||
def when(self, matches, context):
|
|
||||||
ret = []
|
|
||||||
for episode in matches.named('episode', lambda match: len(match.initiator) == 1):
|
|
||||||
group = matches.markers.at_match(episode, lambda marker: marker.name == 'group', index=0)
|
|
||||||
if group:
|
|
||||||
if not matches.range(*group.span, predicate=lambda match: match.name == 'title'):
|
|
||||||
ret.append(episode)
|
|
||||||
return ret
|
|
||||||
|
|
||||||
|
|
||||||
class RenameToDiscMatch(Rule):
|
|
||||||
"""
|
|
||||||
Rename episodes detected with `d` episodeMarkers to `disc`.
|
|
||||||
"""
|
|
||||||
|
|
||||||
consequence = [RenameMatch('disc'), RenameMatch('discMarker'), RemoveMatch]
|
|
||||||
|
|
||||||
def when(self, matches, context):
|
|
||||||
discs = []
|
|
||||||
markers = []
|
|
||||||
to_remove = []
|
|
||||||
|
|
||||||
disc_disabled = is_disabled(context, 'disc')
|
|
||||||
|
|
||||||
for marker in matches.named('episodeMarker', predicate=lambda m: m.value.lower() == 'd'):
|
|
||||||
if disc_disabled:
|
|
||||||
to_remove.append(marker)
|
|
||||||
to_remove.extend(marker.initiator.children)
|
|
||||||
continue
|
|
||||||
|
|
||||||
markers.append(marker)
|
|
||||||
discs.extend(sorted(marker.initiator.children.named('episode'), key=lambda m: m.value))
|
|
||||||
|
|
||||||
if discs or markers or to_remove:
|
|
||||||
return discs, markers, to_remove
|
|
||||||
return False
|
|
||||||
@@ -1,48 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
"""
|
|
||||||
film property
|
|
||||||
"""
|
|
||||||
from rebulk import Rebulk, AppendMatch, Rule
|
|
||||||
from rebulk.remodule import re
|
|
||||||
|
|
||||||
from ..common.formatters import cleanup
|
|
||||||
from ..common.pattern import is_disabled
|
|
||||||
from ..common.validators import seps_surround
|
|
||||||
|
|
||||||
|
|
||||||
def film(config): # pylint:disable=unused-argument
|
|
||||||
"""
|
|
||||||
Builder for rebulk object.
|
|
||||||
:return: Created Rebulk object
|
|
||||||
:rtype: Rebulk
|
|
||||||
"""
|
|
||||||
rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE, validate_all=True, validator={'__parent__': seps_surround})
|
|
||||||
|
|
||||||
rebulk.regex(r'f(\d{1,2})', name='film', private_parent=True, children=True, formatter=int,
|
|
||||||
disabled=lambda context: is_disabled(context, 'film'))
|
|
||||||
|
|
||||||
rebulk.rules(FilmTitleRule)
|
|
||||||
|
|
||||||
return rebulk
|
|
||||||
|
|
||||||
|
|
||||||
class FilmTitleRule(Rule):
|
|
||||||
"""
|
|
||||||
Rule to find out film_title (hole after film property
|
|
||||||
"""
|
|
||||||
consequence = AppendMatch
|
|
||||||
|
|
||||||
properties = {'film_title': [None]}
|
|
||||||
|
|
||||||
def enabled(self, context):
|
|
||||||
return not is_disabled(context, 'film_title')
|
|
||||||
|
|
||||||
def when(self, matches, context): # pylint:disable=inconsistent-return-statements
|
|
||||||
bonus_number = matches.named('film', lambda match: not match.private, index=0)
|
|
||||||
if bonus_number:
|
|
||||||
filepath = matches.markers.at_match(bonus_number, lambda marker: marker.name == 'path', 0)
|
|
||||||
hole = matches.holes(filepath.start, bonus_number.start + 1, formatter=cleanup, index=0)
|
|
||||||
if hole and hole.value:
|
|
||||||
hole.name = 'film_title'
|
|
||||||
return hole
|
|
||||||
@@ -1,510 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
"""
|
|
||||||
language and subtitle_language properties
|
|
||||||
"""
|
|
||||||
# pylint: disable=no-member
|
|
||||||
import copy
|
|
||||||
from collections import defaultdict, namedtuple
|
|
||||||
|
|
||||||
import babelfish
|
|
||||||
from rebulk import Rebulk, Rule, RemoveMatch, RenameMatch
|
|
||||||
from rebulk.remodule import re
|
|
||||||
|
|
||||||
from ..common import seps
|
|
||||||
from ..common.pattern import is_disabled
|
|
||||||
from ..common.words import iter_words
|
|
||||||
from ..common.validators import seps_surround
|
|
||||||
|
|
||||||
|
|
||||||
def language(config, common_words):
|
|
||||||
"""
|
|
||||||
Builder for rebulk object.
|
|
||||||
|
|
||||||
:param config: rule configuration
|
|
||||||
:type config: dict
|
|
||||||
:param common_words: common words
|
|
||||||
:type common_words: set
|
|
||||||
:return: Created Rebulk object
|
|
||||||
:rtype: Rebulk
|
|
||||||
"""
|
|
||||||
subtitle_both = config['subtitle_affixes']
|
|
||||||
subtitle_prefixes = sorted(subtitle_both + config['subtitle_prefixes'], key=length_comparator)
|
|
||||||
subtitle_suffixes = sorted(subtitle_both + config['subtitle_suffixes'], key=length_comparator)
|
|
||||||
lang_both = config['language_affixes']
|
|
||||||
lang_prefixes = sorted(lang_both + config['language_prefixes'], key=length_comparator)
|
|
||||||
lang_suffixes = sorted(lang_both + config['language_suffixes'], key=length_comparator)
|
|
||||||
weak_affixes = frozenset(config['weak_affixes'])
|
|
||||||
|
|
||||||
rebulk = Rebulk(disabled=lambda context: (is_disabled(context, 'language') and
|
|
||||||
is_disabled(context, 'subtitle_language')))
|
|
||||||
|
|
||||||
rebulk.string(*subtitle_prefixes, name="subtitle_language.prefix", ignore_case=True, private=True,
|
|
||||||
validator=seps_surround, tags=['release-group-prefix'],
|
|
||||||
disabled=lambda context: is_disabled(context, 'subtitle_language'))
|
|
||||||
rebulk.string(*subtitle_suffixes, name="subtitle_language.suffix", ignore_case=True, private=True,
|
|
||||||
validator=seps_surround,
|
|
||||||
disabled=lambda context: is_disabled(context, 'subtitle_language'))
|
|
||||||
rebulk.string(*lang_suffixes, name="language.suffix", ignore_case=True, private=True,
|
|
||||||
validator=seps_surround, tags=['source-suffix'],
|
|
||||||
disabled=lambda context: is_disabled(context, 'language'))
|
|
||||||
|
|
||||||
def find_languages(string, context=None):
|
|
||||||
"""Find languages in the string
|
|
||||||
|
|
||||||
:return: list of tuple (property, Language, lang_word, word)
|
|
||||||
"""
|
|
||||||
return LanguageFinder(context, subtitle_prefixes, subtitle_suffixes,
|
|
||||||
lang_prefixes, lang_suffixes, weak_affixes).find(string)
|
|
||||||
|
|
||||||
rebulk.functional(find_languages,
|
|
||||||
properties={'language': [None]},
|
|
||||||
disabled=lambda context: not context.get('allowed_languages'))
|
|
||||||
rebulk.rules(SubtitleExtensionRule,
|
|
||||||
SubtitlePrefixLanguageRule,
|
|
||||||
SubtitleSuffixLanguageRule,
|
|
||||||
RemoveLanguage,
|
|
||||||
RemoveInvalidLanguages(common_words))
|
|
||||||
|
|
||||||
babelfish.language_converters['guessit'] = GuessitConverter(config['synonyms'])
|
|
||||||
|
|
||||||
return rebulk
|
|
||||||
|
|
||||||
|
|
||||||
UNDETERMINED = babelfish.Language('und')
|
|
||||||
MULTIPLE = babelfish.Language('mul')
|
|
||||||
NON_SPECIFIC_LANGUAGES = frozenset([UNDETERMINED, MULTIPLE])
|
|
||||||
|
|
||||||
|
|
||||||
class GuessitConverter(babelfish.LanguageReverseConverter): # pylint: disable=missing-docstring
|
|
||||||
_with_country_regexp = re.compile(r'(.*)\((.*)\)')
|
|
||||||
_with_country_regexp2 = re.compile(r'(.*)-(.*)')
|
|
||||||
|
|
||||||
def __init__(self, synonyms):
|
|
||||||
self.guessit_exceptions = {}
|
|
||||||
for code, synlist in synonyms.items():
|
|
||||||
if '_' in code:
|
|
||||||
(alpha3, country) = code.split('_')
|
|
||||||
else:
|
|
||||||
(alpha3, country) = (code, None)
|
|
||||||
for syn in synlist:
|
|
||||||
self.guessit_exceptions[syn.lower()] = (alpha3, country, None)
|
|
||||||
|
|
||||||
@property
|
|
||||||
def codes(self): # pylint: disable=missing-docstring
|
|
||||||
return (babelfish.language_converters['alpha3b'].codes |
|
|
||||||
babelfish.language_converters['alpha2'].codes |
|
|
||||||
babelfish.language_converters['name'].codes |
|
|
||||||
babelfish.language_converters['opensubtitles'].codes |
|
|
||||||
babelfish.country_converters['name'].codes |
|
|
||||||
frozenset(self.guessit_exceptions.keys()))
|
|
||||||
|
|
||||||
def convert(self, alpha3, country=None, script=None):
|
|
||||||
return str(babelfish.Language(alpha3, country, script))
|
|
||||||
|
|
||||||
def reverse(self, name): # pylint:disable=arguments-differ
|
|
||||||
name = name.lower()
|
|
||||||
# exceptions come first, as they need to override a potential match
|
|
||||||
# with any of the other guessers
|
|
||||||
try:
|
|
||||||
return self.guessit_exceptions[name]
|
|
||||||
except KeyError:
|
|
||||||
pass
|
|
||||||
|
|
||||||
for conv in [babelfish.Language,
|
|
||||||
babelfish.Language.fromalpha3b,
|
|
||||||
babelfish.Language.fromalpha2,
|
|
||||||
babelfish.Language.fromname,
|
|
||||||
babelfish.Language.fromopensubtitles,
|
|
||||||
babelfish.Language.fromietf]:
|
|
||||||
try:
|
|
||||||
reverse = conv(name)
|
|
||||||
return reverse.alpha3, reverse.country, reverse.script
|
|
||||||
except (ValueError, babelfish.LanguageReverseError):
|
|
||||||
pass
|
|
||||||
|
|
||||||
raise babelfish.LanguageReverseError(name)
|
|
||||||
|
|
||||||
|
|
||||||
def length_comparator(value):
|
|
||||||
"""
|
|
||||||
Return value length.
|
|
||||||
"""
|
|
||||||
return len(value)
|
|
||||||
|
|
||||||
|
|
||||||
_LanguageMatch = namedtuple('_LanguageMatch', ['property_name', 'word', 'lang'])
|
|
||||||
|
|
||||||
|
|
||||||
class LanguageWord(object):
|
|
||||||
"""
|
|
||||||
Extension to the Word namedtuple in order to create compound words.
|
|
||||||
|
|
||||||
E.g.: pt-BR, soft subtitles, custom subs
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self, start, end, value, input_string, next_word=None):
|
|
||||||
self.start = start
|
|
||||||
self.end = end
|
|
||||||
self.value = value
|
|
||||||
self.input_string = input_string
|
|
||||||
self.next_word = next_word
|
|
||||||
|
|
||||||
@property
|
|
||||||
def extended_word(self): # pylint:disable=inconsistent-return-statements
|
|
||||||
"""
|
|
||||||
Return the extended word for this instance, if any.
|
|
||||||
"""
|
|
||||||
if self.next_word:
|
|
||||||
separator = self.input_string[self.end:self.next_word.start]
|
|
||||||
next_separator = self.input_string[self.next_word.end:self.next_word.end + 1]
|
|
||||||
|
|
||||||
if (separator == '-' and separator != next_separator) or separator in (' ', '.'):
|
|
||||||
value = self.input_string[self.start:self.next_word.end].replace('.', ' ')
|
|
||||||
|
|
||||||
return LanguageWord(self.start, self.next_word.end, value, self.input_string, self.next_word.next_word)
|
|
||||||
|
|
||||||
def __repr__(self):
|
|
||||||
return '<({start},{end}): {value}'.format(start=self.start, end=self.end, value=self.value)
|
|
||||||
|
|
||||||
|
|
||||||
def to_rebulk_match(language_match):
|
|
||||||
"""
|
|
||||||
Convert language match to rebulk Match: start, end, dict
|
|
||||||
"""
|
|
||||||
word = language_match.word
|
|
||||||
start = word.start
|
|
||||||
end = word.end
|
|
||||||
name = language_match.property_name
|
|
||||||
if language_match.lang == UNDETERMINED:
|
|
||||||
return start, end, {
|
|
||||||
'name': name,
|
|
||||||
'value': word.value.lower(),
|
|
||||||
'formatter': babelfish.Language,
|
|
||||||
'tags': ['weak-language']
|
|
||||||
}
|
|
||||||
|
|
||||||
return start, end, {
|
|
||||||
'name': name,
|
|
||||||
'value': language_match.lang
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
class LanguageFinder(object):
|
|
||||||
"""
|
|
||||||
Helper class to search and return language matches: 'language' and 'subtitle_language' properties
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self, context,
|
|
||||||
subtitle_prefixes, subtitle_suffixes,
|
|
||||||
lang_prefixes, lang_suffixes, weak_affixes):
|
|
||||||
allowed_languages = context.get('allowed_languages') if context else None
|
|
||||||
self.allowed_languages = {l.lower() for l in allowed_languages or []}
|
|
||||||
self.weak_affixes = weak_affixes
|
|
||||||
self.prefixes_map = {}
|
|
||||||
self.suffixes_map = {}
|
|
||||||
|
|
||||||
if not is_disabled(context, 'subtitle_language'):
|
|
||||||
self.prefixes_map['subtitle_language'] = subtitle_prefixes
|
|
||||||
self.suffixes_map['subtitle_language'] = subtitle_suffixes
|
|
||||||
|
|
||||||
self.prefixes_map['language'] = lang_prefixes
|
|
||||||
self.suffixes_map['language'] = lang_suffixes
|
|
||||||
|
|
||||||
def find(self, string):
|
|
||||||
"""
|
|
||||||
Return all matches for language and subtitle_language.
|
|
||||||
|
|
||||||
Undetermined language matches are removed if a regular language is found.
|
|
||||||
Multi language matches are removed if there are only undetermined language matches
|
|
||||||
"""
|
|
||||||
regular_lang_map = defaultdict(set)
|
|
||||||
undetermined_map = defaultdict(set)
|
|
||||||
multi_map = defaultdict(set)
|
|
||||||
|
|
||||||
for match in self.iter_language_matches(string):
|
|
||||||
key = match.property_name
|
|
||||||
if match.lang == UNDETERMINED:
|
|
||||||
undetermined_map[key].add(match)
|
|
||||||
elif match.lang == 'mul':
|
|
||||||
multi_map[key].add(match)
|
|
||||||
else:
|
|
||||||
regular_lang_map[key].add(match)
|
|
||||||
|
|
||||||
for key, values in multi_map.items():
|
|
||||||
if key in regular_lang_map or key not in undetermined_map:
|
|
||||||
for value in values:
|
|
||||||
yield to_rebulk_match(value)
|
|
||||||
|
|
||||||
for key, values in undetermined_map.items():
|
|
||||||
if key not in regular_lang_map:
|
|
||||||
for value in values:
|
|
||||||
yield to_rebulk_match(value)
|
|
||||||
|
|
||||||
for values in regular_lang_map.values():
|
|
||||||
for value in values:
|
|
||||||
yield to_rebulk_match(value)
|
|
||||||
|
|
||||||
def iter_language_matches(self, string):
|
|
||||||
"""
|
|
||||||
Return language matches for the given string.
|
|
||||||
"""
|
|
||||||
candidates = []
|
|
||||||
previous = None
|
|
||||||
for word in iter_words(string):
|
|
||||||
language_word = LanguageWord(start=word.span[0], end=word.span[1], value=word.value, input_string=string)
|
|
||||||
if previous:
|
|
||||||
previous.next_word = language_word
|
|
||||||
candidates.append(previous)
|
|
||||||
previous = language_word
|
|
||||||
if previous:
|
|
||||||
candidates.append(previous)
|
|
||||||
|
|
||||||
for candidate in candidates:
|
|
||||||
for match in self.iter_matches_for_candidate(candidate):
|
|
||||||
yield match
|
|
||||||
|
|
||||||
def iter_matches_for_candidate(self, language_word):
|
|
||||||
"""
|
|
||||||
Return language matches for the given candidate word.
|
|
||||||
"""
|
|
||||||
tuples = [
|
|
||||||
(language_word, language_word.next_word,
|
|
||||||
self.prefixes_map,
|
|
||||||
lambda string, prefix: string.startswith(prefix),
|
|
||||||
lambda string, prefix: string[len(prefix):]),
|
|
||||||
(language_word.next_word, language_word,
|
|
||||||
self.suffixes_map,
|
|
||||||
lambda string, suffix: string.endswith(suffix),
|
|
||||||
lambda string, suffix: string[:len(string) - len(suffix)])
|
|
||||||
]
|
|
||||||
|
|
||||||
for word, fallback_word, affixes, is_affix, strip_affix in tuples:
|
|
||||||
if not word:
|
|
||||||
continue
|
|
||||||
|
|
||||||
match = self.find_match_for_word(word, fallback_word, affixes, is_affix, strip_affix)
|
|
||||||
if match:
|
|
||||||
yield match
|
|
||||||
|
|
||||||
match = self.find_language_match_for_word(language_word)
|
|
||||||
if match:
|
|
||||||
yield match
|
|
||||||
|
|
||||||
def find_match_for_word(self, word, fallback_word, affixes, is_affix, strip_affix): # pylint:disable=inconsistent-return-statements
|
|
||||||
"""
|
|
||||||
Return the language match for the given word and affixes.
|
|
||||||
"""
|
|
||||||
for current_word in (word.extended_word, word):
|
|
||||||
if not current_word:
|
|
||||||
continue
|
|
||||||
|
|
||||||
word_lang = current_word.value.lower()
|
|
||||||
|
|
||||||
for key, parts in affixes.items():
|
|
||||||
for part in parts:
|
|
||||||
if not is_affix(word_lang, part):
|
|
||||||
continue
|
|
||||||
|
|
||||||
match = None
|
|
||||||
value = strip_affix(word_lang, part)
|
|
||||||
if not value:
|
|
||||||
if fallback_word and (
|
|
||||||
abs(fallback_word.start - word.end) <= 1 or abs(word.start - fallback_word.end) <= 1):
|
|
||||||
match = self.find_language_match_for_word(fallback_word, key=key)
|
|
||||||
|
|
||||||
if not match and part not in self.weak_affixes:
|
|
||||||
match = self.create_language_match(key, LanguageWord(current_word.start, current_word.end,
|
|
||||||
'und', current_word.input_string))
|
|
||||||
else:
|
|
||||||
match = self.create_language_match(key, LanguageWord(current_word.start, current_word.end,
|
|
||||||
value, current_word.input_string))
|
|
||||||
|
|
||||||
if match:
|
|
||||||
return match
|
|
||||||
|
|
||||||
def find_language_match_for_word(self, word, key='language'): # pylint:disable=inconsistent-return-statements
|
|
||||||
"""
|
|
||||||
Return the language match for the given word.
|
|
||||||
"""
|
|
||||||
for current_word in (word.extended_word, word):
|
|
||||||
if current_word:
|
|
||||||
match = self.create_language_match(key, current_word)
|
|
||||||
if match:
|
|
||||||
return match
|
|
||||||
|
|
||||||
def create_language_match(self, key, word): # pylint:disable=inconsistent-return-statements
|
|
||||||
"""
|
|
||||||
Create a LanguageMatch for a given word
|
|
||||||
"""
|
|
||||||
lang = self.parse_language(word.value.lower())
|
|
||||||
|
|
||||||
if lang is not None:
|
|
||||||
return _LanguageMatch(property_name=key, word=word, lang=lang)
|
|
||||||
|
|
||||||
def parse_language(self, lang_word): # pylint:disable=inconsistent-return-statements
|
|
||||||
"""
|
|
||||||
Parse the lang_word into a valid Language.
|
|
||||||
|
|
||||||
Multi and Undetermined languages are also valid languages.
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
lang = babelfish.Language.fromguessit(lang_word)
|
|
||||||
if ((hasattr(lang, 'name') and lang.name.lower() in self.allowed_languages) or
|
|
||||||
(hasattr(lang, 'alpha2') and lang.alpha2.lower() in self.allowed_languages) or
|
|
||||||
lang.alpha3.lower() in self.allowed_languages):
|
|
||||||
return lang
|
|
||||||
|
|
||||||
except babelfish.Error:
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
class SubtitlePrefixLanguageRule(Rule):
|
|
||||||
"""
|
|
||||||
Convert language guess as subtitle_language if previous match is a subtitle language prefix
|
|
||||||
"""
|
|
||||||
consequence = RemoveMatch
|
|
||||||
|
|
||||||
properties = {'subtitle_language': [None]}
|
|
||||||
|
|
||||||
def enabled(self, context):
|
|
||||||
return not is_disabled(context, 'subtitle_language')
|
|
||||||
|
|
||||||
def when(self, matches, context):
|
|
||||||
to_rename = []
|
|
||||||
to_remove = matches.named('subtitle_language.prefix')
|
|
||||||
for lang in matches.named('language'):
|
|
||||||
prefix = matches.previous(lang, lambda match: match.name == 'subtitle_language.prefix', 0)
|
|
||||||
if not prefix:
|
|
||||||
group_marker = matches.markers.at_match(lang, lambda marker: marker.name == 'group', 0)
|
|
||||||
if group_marker:
|
|
||||||
# Find prefix if placed just before the group
|
|
||||||
prefix = matches.previous(group_marker, lambda match: match.name == 'subtitle_language.prefix',
|
|
||||||
0)
|
|
||||||
if not prefix:
|
|
||||||
# Find prefix if placed before in the group
|
|
||||||
prefix = matches.range(group_marker.start, lang.start,
|
|
||||||
lambda match: match.name == 'subtitle_language.prefix', 0)
|
|
||||||
if prefix:
|
|
||||||
to_rename.append((prefix, lang))
|
|
||||||
to_remove.extend(matches.conflicting(lang))
|
|
||||||
if prefix in to_remove:
|
|
||||||
to_remove.remove(prefix)
|
|
||||||
if to_rename or to_remove:
|
|
||||||
return to_rename, to_remove
|
|
||||||
return False
|
|
||||||
|
|
||||||
def then(self, matches, when_response, context):
|
|
||||||
to_rename, to_remove = when_response
|
|
||||||
super(SubtitlePrefixLanguageRule, self).then(matches, to_remove, context)
|
|
||||||
for prefix, match in to_rename:
|
|
||||||
# Remove suffix equivalent of prefix.
|
|
||||||
suffix = copy.copy(prefix)
|
|
||||||
suffix.name = 'subtitle_language.suffix'
|
|
||||||
if suffix in matches:
|
|
||||||
matches.remove(suffix)
|
|
||||||
matches.remove(match)
|
|
||||||
match.name = 'subtitle_language'
|
|
||||||
matches.append(match)
|
|
||||||
|
|
||||||
|
|
||||||
class SubtitleSuffixLanguageRule(Rule):
|
|
||||||
"""
|
|
||||||
Convert language guess as subtitle_language if next match is a subtitle language suffix
|
|
||||||
"""
|
|
||||||
dependency = SubtitlePrefixLanguageRule
|
|
||||||
consequence = RemoveMatch
|
|
||||||
|
|
||||||
properties = {'subtitle_language': [None]}
|
|
||||||
|
|
||||||
def enabled(self, context):
|
|
||||||
return not is_disabled(context, 'subtitle_language')
|
|
||||||
|
|
||||||
def when(self, matches, context):
|
|
||||||
to_append = []
|
|
||||||
to_remove = matches.named('subtitle_language.suffix')
|
|
||||||
for lang in matches.named('language'):
|
|
||||||
suffix = matches.next(lang, lambda match: match.name == 'subtitle_language.suffix', 0)
|
|
||||||
if suffix:
|
|
||||||
to_append.append(lang)
|
|
||||||
if suffix in to_remove:
|
|
||||||
to_remove.remove(suffix)
|
|
||||||
if to_append or to_remove:
|
|
||||||
return to_append, to_remove
|
|
||||||
return False
|
|
||||||
|
|
||||||
def then(self, matches, when_response, context):
|
|
||||||
to_rename, to_remove = when_response
|
|
||||||
super(SubtitleSuffixLanguageRule, self).then(matches, to_remove, context)
|
|
||||||
for match in to_rename:
|
|
||||||
matches.remove(match)
|
|
||||||
match.name = 'subtitle_language'
|
|
||||||
matches.append(match)
|
|
||||||
|
|
||||||
|
|
||||||
class SubtitleExtensionRule(Rule):
|
|
||||||
"""
|
|
||||||
Convert language guess as subtitle_language if next match is a subtitle extension.
|
|
||||||
|
|
||||||
Since it's a strong match, it also removes any conflicting source with it.
|
|
||||||
"""
|
|
||||||
consequence = [RemoveMatch, RenameMatch('subtitle_language')]
|
|
||||||
|
|
||||||
properties = {'subtitle_language': [None]}
|
|
||||||
|
|
||||||
def enabled(self, context):
|
|
||||||
return not is_disabled(context, 'subtitle_language')
|
|
||||||
|
|
||||||
def when(self, matches, context): # pylint:disable=inconsistent-return-statements
|
|
||||||
subtitle_extension = matches.named('container',
|
|
||||||
lambda match: 'extension' in match.tags and 'subtitle' in match.tags,
|
|
||||||
0)
|
|
||||||
if subtitle_extension:
|
|
||||||
subtitle_lang = matches.previous(subtitle_extension, lambda match: match.name == 'language', 0)
|
|
||||||
if subtitle_lang:
|
|
||||||
for weak in matches.named('subtitle_language', predicate=lambda m: 'weak-language' in m.tags):
|
|
||||||
weak.private = True
|
|
||||||
|
|
||||||
return matches.conflicting(subtitle_lang, lambda m: m.name == 'source'), subtitle_lang
|
|
||||||
|
|
||||||
|
|
||||||
class RemoveLanguage(Rule):
|
|
||||||
"""Remove language matches that were not converted to subtitle_language when language is disabled."""
|
|
||||||
|
|
||||||
consequence = RemoveMatch
|
|
||||||
|
|
||||||
def enabled(self, context):
|
|
||||||
return is_disabled(context, 'language')
|
|
||||||
|
|
||||||
def when(self, matches, context):
|
|
||||||
return matches.named('language')
|
|
||||||
|
|
||||||
|
|
||||||
class RemoveInvalidLanguages(Rule):
|
|
||||||
"""Remove language matches that matches the blacklisted common words."""
|
|
||||||
|
|
||||||
consequence = RemoveMatch
|
|
||||||
priority = 32
|
|
||||||
|
|
||||||
def __init__(self, common_words):
|
|
||||||
"""Constructor."""
|
|
||||||
super(RemoveInvalidLanguages, self).__init__()
|
|
||||||
self.common_words = common_words
|
|
||||||
|
|
||||||
def when(self, matches, context):
|
|
||||||
to_remove = []
|
|
||||||
for match in matches.range(0, len(matches.input_string),
|
|
||||||
predicate=lambda m: m.name in ('language', 'subtitle_language')):
|
|
||||||
if match.raw.lower() not in self.common_words:
|
|
||||||
continue
|
|
||||||
|
|
||||||
group = matches.markers.at_match(match, index=0, predicate=lambda m: m.name == 'group')
|
|
||||||
if group and (
|
|
||||||
not matches.range(
|
|
||||||
group.start, group.end, predicate=lambda m: m.name not in ('language', 'subtitle_language')
|
|
||||||
) and (not matches.holes(group.start, group.end, predicate=lambda m: m.value.strip(seps)))):
|
|
||||||
continue
|
|
||||||
|
|
||||||
to_remove.append(match)
|
|
||||||
|
|
||||||
return to_remove
|
|
||||||
@@ -1,55 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
"""
|
|
||||||
mimetype property
|
|
||||||
"""
|
|
||||||
import mimetypes
|
|
||||||
|
|
||||||
from rebulk import Rebulk, CustomRule, POST_PROCESS
|
|
||||||
from rebulk.match import Match
|
|
||||||
|
|
||||||
from ..common.pattern import is_disabled
|
|
||||||
from ...rules.processors import Processors
|
|
||||||
|
|
||||||
|
|
||||||
def mimetype(config): # pylint:disable=unused-argument
|
|
||||||
"""
|
|
||||||
Builder for rebulk object.
|
|
||||||
|
|
||||||
:param config: rule configuration
|
|
||||||
:type config: dict
|
|
||||||
:return: Created Rebulk object
|
|
||||||
:rtype: Rebulk
|
|
||||||
"""
|
|
||||||
rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'mimetype'))
|
|
||||||
rebulk.rules(Mimetype)
|
|
||||||
|
|
||||||
return rebulk
|
|
||||||
|
|
||||||
|
|
||||||
class Mimetype(CustomRule):
|
|
||||||
"""
|
|
||||||
Mimetype post processor
|
|
||||||
:param matches:
|
|
||||||
:type matches:
|
|
||||||
:return:
|
|
||||||
:rtype:
|
|
||||||
"""
|
|
||||||
priority = POST_PROCESS
|
|
||||||
|
|
||||||
dependency = Processors
|
|
||||||
|
|
||||||
def when(self, matches, context):
|
|
||||||
mime, _ = mimetypes.guess_type(matches.input_string, strict=False)
|
|
||||||
return mime
|
|
||||||
|
|
||||||
def then(self, matches, when_response, context):
|
|
||||||
mime = when_response
|
|
||||||
matches.append(Match(len(matches.input_string), len(matches.input_string), name='mimetype', value=mime))
|
|
||||||
|
|
||||||
@property
|
|
||||||
def properties(self):
|
|
||||||
"""
|
|
||||||
Properties for this rule.
|
|
||||||
"""
|
|
||||||
return {'mimetype': [None]}
|
|
||||||
@@ -1,383 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
"""
|
|
||||||
other property
|
|
||||||
"""
|
|
||||||
import copy
|
|
||||||
|
|
||||||
from rebulk import Rebulk, Rule, RemoveMatch, RenameMatch, POST_PROCESS, AppendMatch
|
|
||||||
from rebulk.remodule import re
|
|
||||||
|
|
||||||
from ..common import dash
|
|
||||||
from ..common import seps
|
|
||||||
from ..common.pattern import is_disabled
|
|
||||||
from ..common.validators import seps_after, seps_before, seps_surround, and_
|
|
||||||
from ...reutils import build_or_pattern
|
|
||||||
from ...rules.common.formatters import raw_cleanup
|
|
||||||
|
|
||||||
|
|
||||||
def other(config): # pylint:disable=unused-argument,too-many-statements
|
|
||||||
"""
|
|
||||||
Builder for rebulk object.
|
|
||||||
|
|
||||||
:param config: rule configuration
|
|
||||||
:type config: dict
|
|
||||||
:return: Created Rebulk object
|
|
||||||
:rtype: Rebulk
|
|
||||||
"""
|
|
||||||
rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'other'))
|
|
||||||
rebulk = rebulk.regex_defaults(flags=re.IGNORECASE, abbreviations=[dash]).string_defaults(ignore_case=True)
|
|
||||||
rebulk.defaults(name="other", validator=seps_surround)
|
|
||||||
|
|
||||||
rebulk.regex('Audio-?Fix', 'Audio-?Fixed', value='Audio Fixed')
|
|
||||||
rebulk.regex('Sync-?Fix', 'Sync-?Fixed', value='Sync Fixed')
|
|
||||||
rebulk.regex('Dual', 'Dual-?Audio', value='Dual Audio')
|
|
||||||
rebulk.regex('ws', 'wide-?screen', value='Widescreen')
|
|
||||||
rebulk.regex('Re-?Enc(?:oded)?', value='Reencoded')
|
|
||||||
|
|
||||||
rebulk.string('Repack', 'Rerip', value='Proper',
|
|
||||||
tags=['streaming_service.prefix', 'streaming_service.suffix'])
|
|
||||||
rebulk.string('Proper', value='Proper',
|
|
||||||
tags=['has-neighbor', 'streaming_service.prefix', 'streaming_service.suffix'])
|
|
||||||
|
|
||||||
rebulk.regex('Real-Proper', 'Real-Repack', 'Real-Rerip', value='Proper',
|
|
||||||
tags=['streaming_service.prefix', 'streaming_service.suffix', 'real'])
|
|
||||||
rebulk.regex('Real', value='Proper',
|
|
||||||
tags=['has-neighbor', 'streaming_service.prefix', 'streaming_service.suffix', 'real'])
|
|
||||||
|
|
||||||
rebulk.string('Fix', 'Fixed', value='Fix', tags=['has-neighbor-before', 'has-neighbor-after',
|
|
||||||
'streaming_service.prefix', 'streaming_service.suffix'])
|
|
||||||
rebulk.string('Dirfix', 'Nfofix', 'Prooffix', value='Fix',
|
|
||||||
tags=['streaming_service.prefix', 'streaming_service.suffix'])
|
|
||||||
rebulk.regex('(?:Proof-?)?Sample-?Fix', value='Fix',
|
|
||||||
tags=['streaming_service.prefix', 'streaming_service.suffix'])
|
|
||||||
|
|
||||||
rebulk.string('Fansub', value='Fan Subtitled', tags='has-neighbor')
|
|
||||||
rebulk.string('Fastsub', value='Fast Subtitled', tags='has-neighbor')
|
|
||||||
|
|
||||||
season_words = build_or_pattern(["seasons?", "series?"])
|
|
||||||
complete_articles = build_or_pattern(["The"])
|
|
||||||
|
|
||||||
def validate_complete(match):
|
|
||||||
"""
|
|
||||||
Make sure season word is are defined.
|
|
||||||
:param match:
|
|
||||||
:type match:
|
|
||||||
:return:
|
|
||||||
:rtype:
|
|
||||||
"""
|
|
||||||
children = match.children
|
|
||||||
if not children.named('completeWordsBefore') and not children.named('completeWordsAfter'):
|
|
||||||
return False
|
|
||||||
return True
|
|
||||||
|
|
||||||
rebulk.regex('(?P<completeArticle>' + complete_articles + '-)?' +
|
|
||||||
'(?P<completeWordsBefore>' + season_words + '-)?' +
|
|
||||||
'Complete' + '(?P<completeWordsAfter>-' + season_words + ')?',
|
|
||||||
private_names=['completeArticle', 'completeWordsBefore', 'completeWordsAfter'],
|
|
||||||
value={'other': 'Complete'},
|
|
||||||
tags=['release-group-prefix'],
|
|
||||||
validator={'__parent__': and_(seps_surround, validate_complete)})
|
|
||||||
rebulk.string('R5', value='Region 5')
|
|
||||||
rebulk.string('RC', value='Region C')
|
|
||||||
rebulk.regex('Pre-?Air', value='Preair')
|
|
||||||
rebulk.regex('(?:PS-?)Vita', value='PS Vita')
|
|
||||||
rebulk.regex('Vita', value='PS Vita', tags='has-neighbor')
|
|
||||||
rebulk.regex('(HD)(?P<another>Rip)', value={'other': 'HD', 'another': 'Rip'},
|
|
||||||
private_parent=True, children=True, validator={'__parent__': seps_surround}, validate_all=True)
|
|
||||||
|
|
||||||
for value in ('Screener', 'Remux', 'PAL', 'SECAM', 'NTSC', 'XXX'):
|
|
||||||
rebulk.string(value, value=value)
|
|
||||||
rebulk.string('3D', value='3D', tags='has-neighbor')
|
|
||||||
|
|
||||||
rebulk.string('HQ', value='High Quality', tags='uhdbluray-neighbor')
|
|
||||||
rebulk.string('HR', value='High Resolution')
|
|
||||||
rebulk.string('LD', value='Line Dubbed')
|
|
||||||
rebulk.string('MD', value='Mic Dubbed')
|
|
||||||
rebulk.string('mHD', 'HDLight', value='Micro HD')
|
|
||||||
rebulk.string('LDTV', value='Low Definition')
|
|
||||||
rebulk.string('HFR', value='High Frame Rate')
|
|
||||||
rebulk.string('VFR', value='Variable Frame Rate')
|
|
||||||
rebulk.string('HD', value='HD', validator=None,
|
|
||||||
tags=['streaming_service.prefix', 'streaming_service.suffix'])
|
|
||||||
rebulk.regex('Full-?HD', 'FHD', value='Full HD', validator=None,
|
|
||||||
tags=['streaming_service.prefix', 'streaming_service.suffix'])
|
|
||||||
rebulk.regex('Ultra-?(?:HD)?', 'UHD', value='Ultra HD', validator=None,
|
|
||||||
tags=['streaming_service.prefix', 'streaming_service.suffix'])
|
|
||||||
rebulk.regex('Upscaled?', value='Upscaled')
|
|
||||||
|
|
||||||
for value in ('Complete', 'Classic', 'Bonus', 'Trailer', 'Retail',
|
|
||||||
'Colorized', 'Internal'):
|
|
||||||
rebulk.string(value, value=value, tags=['has-neighbor', 'release-group-prefix'])
|
|
||||||
rebulk.regex('LiNE', value='Line Audio', tags=['has-neighbor-before', 'has-neighbor-after', 'release-group-prefix'])
|
|
||||||
rebulk.regex('Read-?NFO', value='Read NFO')
|
|
||||||
rebulk.string('CONVERT', value='Converted', tags='has-neighbor')
|
|
||||||
rebulk.string('DOCU', 'DOKU', value='Documentary', tags='has-neighbor')
|
|
||||||
rebulk.string('OM', value='Open Matte', tags='has-neighbor')
|
|
||||||
rebulk.string('STV', value='Straight to Video', tags='has-neighbor')
|
|
||||||
rebulk.string('OAR', value='Original Aspect Ratio', tags='has-neighbor')
|
|
||||||
rebulk.string('Complet', value='Complete', tags=['has-neighbor', 'release-group-prefix'])
|
|
||||||
|
|
||||||
for coast in ('East', 'West'):
|
|
||||||
rebulk.regex(r'(?:Live-)?(?:Episode-)?' + coast + '-?(?:Coast-)?Feed', value=coast + ' Coast Feed')
|
|
||||||
|
|
||||||
rebulk.string('VO', 'OV', value='Original Video', tags='has-neighbor')
|
|
||||||
rebulk.string('Ova', 'Oav', value='Original Animated Video')
|
|
||||||
|
|
||||||
rebulk.regex('Scr(?:eener)?', value='Screener', validator=None,
|
|
||||||
tags=['other.validate.screener', 'source-prefix', 'source-suffix'])
|
|
||||||
rebulk.string('Mux', value='Mux', validator=seps_after,
|
|
||||||
tags=['other.validate.mux', 'video-codec-prefix', 'source-suffix'])
|
|
||||||
rebulk.string('HC', 'vost', value='Hardcoded Subtitles')
|
|
||||||
|
|
||||||
rebulk.string('SDR', value='Standard Dynamic Range', tags='uhdbluray-neighbor')
|
|
||||||
rebulk.regex('HDR(?:10)?', value='HDR10', tags='uhdbluray-neighbor')
|
|
||||||
rebulk.regex('Dolby-?Vision', value='Dolby Vision', tags='uhdbluray-neighbor')
|
|
||||||
rebulk.regex('BT-?2020', value='BT.2020', tags='uhdbluray-neighbor')
|
|
||||||
|
|
||||||
rebulk.string('Sample', value='Sample', tags=['at-end', 'not-a-release-group'])
|
|
||||||
rebulk.string('Extras', value='Extras', tags='has-neighbor')
|
|
||||||
rebulk.regex('Digital-?Extras?', value='Extras')
|
|
||||||
rebulk.string('Proof', value='Proof', tags=['at-end', 'not-a-release-group'])
|
|
||||||
rebulk.string('Obfuscated', 'Scrambled', value='Obfuscated', tags=['at-end', 'not-a-release-group'])
|
|
||||||
rebulk.string('xpost', 'postbot', 'asrequested', value='Repost', tags='not-a-release-group')
|
|
||||||
|
|
||||||
rebulk.rules(RenameAnotherToOther, ValidateHasNeighbor, ValidateHasNeighborAfter, ValidateHasNeighborBefore,
|
|
||||||
ValidateScreenerRule, ValidateMuxRule, ValidateHardcodedSubs, ValidateStreamingServiceNeighbor,
|
|
||||||
ValidateAtEnd, ValidateReal, ProperCountRule)
|
|
||||||
|
|
||||||
return rebulk
|
|
||||||
|
|
||||||
|
|
||||||
class ProperCountRule(Rule):
|
|
||||||
"""
|
|
||||||
Add proper_count property
|
|
||||||
"""
|
|
||||||
priority = POST_PROCESS
|
|
||||||
|
|
||||||
consequence = AppendMatch
|
|
||||||
|
|
||||||
properties = {'proper_count': [None]}
|
|
||||||
|
|
||||||
def when(self, matches, context): # pylint:disable=inconsistent-return-statements
|
|
||||||
propers = matches.named('other', lambda match: match.value == 'Proper')
|
|
||||||
if propers:
|
|
||||||
raws = {} # Count distinct raw values
|
|
||||||
for proper in propers:
|
|
||||||
raws[raw_cleanup(proper.raw)] = proper
|
|
||||||
proper_count_match = copy.copy(propers[-1])
|
|
||||||
proper_count_match.name = 'proper_count'
|
|
||||||
|
|
||||||
value = 0
|
|
||||||
for raw in raws.values():
|
|
||||||
value += 2 if 'real' in raw.tags else 1
|
|
||||||
|
|
||||||
proper_count_match.value = value
|
|
||||||
return proper_count_match
|
|
||||||
|
|
||||||
|
|
||||||
class RenameAnotherToOther(Rule):
|
|
||||||
"""
|
|
||||||
Rename `another` properties to `other`
|
|
||||||
"""
|
|
||||||
priority = 32
|
|
||||||
consequence = RenameMatch('other')
|
|
||||||
|
|
||||||
def when(self, matches, context):
|
|
||||||
return matches.named('another')
|
|
||||||
|
|
||||||
|
|
||||||
class ValidateHasNeighbor(Rule):
|
|
||||||
"""
|
|
||||||
Validate tag has-neighbor
|
|
||||||
"""
|
|
||||||
consequence = RemoveMatch
|
|
||||||
priority = 64
|
|
||||||
|
|
||||||
def when(self, matches, context):
|
|
||||||
ret = []
|
|
||||||
for to_check in matches.range(predicate=lambda match: 'has-neighbor' in match.tags):
|
|
||||||
previous_match = matches.previous(to_check, index=0)
|
|
||||||
previous_group = matches.markers.previous(to_check, lambda marker: marker.name == 'group', 0)
|
|
||||||
if previous_group and (not previous_match or previous_group.end > previous_match.end):
|
|
||||||
previous_match = previous_group
|
|
||||||
if previous_match and not matches.input_string[previous_match.end:to_check.start].strip(seps):
|
|
||||||
break
|
|
||||||
next_match = matches.next(to_check, index=0)
|
|
||||||
next_group = matches.markers.next(to_check, lambda marker: marker.name == 'group', 0)
|
|
||||||
if next_group and (not next_match or next_group.start < next_match.start):
|
|
||||||
next_match = next_group
|
|
||||||
if next_match and not matches.input_string[to_check.end:next_match.start].strip(seps):
|
|
||||||
break
|
|
||||||
ret.append(to_check)
|
|
||||||
return ret
|
|
||||||
|
|
||||||
|
|
||||||
class ValidateHasNeighborBefore(Rule):
|
|
||||||
"""
|
|
||||||
Validate tag has-neighbor-before that previous match exists.
|
|
||||||
"""
|
|
||||||
consequence = RemoveMatch
|
|
||||||
priority = 64
|
|
||||||
|
|
||||||
def when(self, matches, context):
|
|
||||||
ret = []
|
|
||||||
for to_check in matches.range(predicate=lambda match: 'has-neighbor-before' in match.tags):
|
|
||||||
next_match = matches.next(to_check, index=0)
|
|
||||||
next_group = matches.markers.next(to_check, lambda marker: marker.name == 'group', 0)
|
|
||||||
if next_group and (not next_match or next_group.start < next_match.start):
|
|
||||||
next_match = next_group
|
|
||||||
if next_match and not matches.input_string[to_check.end:next_match.start].strip(seps):
|
|
||||||
break
|
|
||||||
ret.append(to_check)
|
|
||||||
return ret
|
|
||||||
|
|
||||||
|
|
||||||
class ValidateHasNeighborAfter(Rule):
|
|
||||||
"""
|
|
||||||
Validate tag has-neighbor-after that next match exists.
|
|
||||||
"""
|
|
||||||
consequence = RemoveMatch
|
|
||||||
priority = 64
|
|
||||||
|
|
||||||
def when(self, matches, context):
|
|
||||||
ret = []
|
|
||||||
for to_check in matches.range(predicate=lambda match: 'has-neighbor-after' in match.tags):
|
|
||||||
previous_match = matches.previous(to_check, index=0)
|
|
||||||
previous_group = matches.markers.previous(to_check, lambda marker: marker.name == 'group', 0)
|
|
||||||
if previous_group and (not previous_match or previous_group.end > previous_match.end):
|
|
||||||
previous_match = previous_group
|
|
||||||
if previous_match and not matches.input_string[previous_match.end:to_check.start].strip(seps):
|
|
||||||
break
|
|
||||||
ret.append(to_check)
|
|
||||||
return ret
|
|
||||||
|
|
||||||
|
|
||||||
class ValidateScreenerRule(Rule):
|
|
||||||
"""
|
|
||||||
Validate tag other.validate.screener
|
|
||||||
"""
|
|
||||||
consequence = RemoveMatch
|
|
||||||
priority = 64
|
|
||||||
|
|
||||||
def when(self, matches, context):
|
|
||||||
ret = []
|
|
||||||
for screener in matches.named('other', lambda match: 'other.validate.screener' in match.tags):
|
|
||||||
source_match = matches.previous(screener, lambda match: match.initiator.name == 'source', 0)
|
|
||||||
if not source_match or matches.input_string[source_match.end:screener.start].strip(seps):
|
|
||||||
ret.append(screener)
|
|
||||||
return ret
|
|
||||||
|
|
||||||
|
|
||||||
class ValidateMuxRule(Rule):
|
|
||||||
"""
|
|
||||||
Validate tag other.validate.mux
|
|
||||||
"""
|
|
||||||
consequence = RemoveMatch
|
|
||||||
priority = 64
|
|
||||||
|
|
||||||
def when(self, matches, context):
|
|
||||||
ret = []
|
|
||||||
for mux in matches.named('other', lambda match: 'other.validate.mux' in match.tags):
|
|
||||||
source_match = matches.previous(mux, lambda match: match.initiator.name == 'source', 0)
|
|
||||||
if not source_match:
|
|
||||||
ret.append(mux)
|
|
||||||
return ret
|
|
||||||
|
|
||||||
|
|
||||||
class ValidateHardcodedSubs(Rule):
|
|
||||||
"""Validate HC matches."""
|
|
||||||
|
|
||||||
priority = 32
|
|
||||||
consequence = RemoveMatch
|
|
||||||
|
|
||||||
def when(self, matches, context):
|
|
||||||
to_remove = []
|
|
||||||
for hc_match in matches.named('other', predicate=lambda match: match.value == 'Hardcoded Subtitles'):
|
|
||||||
next_match = matches.next(hc_match, predicate=lambda match: match.name == 'subtitle_language', index=0)
|
|
||||||
if next_match and not matches.holes(hc_match.end, next_match.start,
|
|
||||||
predicate=lambda match: match.value.strip(seps)):
|
|
||||||
continue
|
|
||||||
|
|
||||||
previous_match = matches.previous(hc_match,
|
|
||||||
predicate=lambda match: match.name == 'subtitle_language', index=0)
|
|
||||||
if previous_match and not matches.holes(previous_match.end, hc_match.start,
|
|
||||||
predicate=lambda match: match.value.strip(seps)):
|
|
||||||
continue
|
|
||||||
|
|
||||||
to_remove.append(hc_match)
|
|
||||||
|
|
||||||
return to_remove
|
|
||||||
|
|
||||||
|
|
||||||
class ValidateStreamingServiceNeighbor(Rule):
|
|
||||||
"""Validate streaming service's neighbors."""
|
|
||||||
|
|
||||||
priority = 32
|
|
||||||
consequence = RemoveMatch
|
|
||||||
|
|
||||||
def when(self, matches, context):
|
|
||||||
to_remove = []
|
|
||||||
for match in matches.named('other',
|
|
||||||
predicate=lambda m: (m.initiator.name != 'source'
|
|
||||||
and ('streaming_service.prefix' in m.tags
|
|
||||||
or 'streaming_service.suffix' in m.tags))):
|
|
||||||
match = match.initiator
|
|
||||||
if not seps_after(match):
|
|
||||||
if 'streaming_service.prefix' in match.tags:
|
|
||||||
next_match = matches.next(match, lambda m: m.name == 'streaming_service', 0)
|
|
||||||
if next_match and not matches.holes(match.end, next_match.start,
|
|
||||||
predicate=lambda m: m.value.strip(seps)):
|
|
||||||
continue
|
|
||||||
if match.children:
|
|
||||||
to_remove.extend(match.children)
|
|
||||||
to_remove.append(match)
|
|
||||||
|
|
||||||
elif not seps_before(match):
|
|
||||||
if 'streaming_service.suffix' in match.tags:
|
|
||||||
previous_match = matches.previous(match, lambda m: m.name == 'streaming_service', 0)
|
|
||||||
if previous_match and not matches.holes(previous_match.end, match.start,
|
|
||||||
predicate=lambda m: m.value.strip(seps)):
|
|
||||||
continue
|
|
||||||
|
|
||||||
if match.children:
|
|
||||||
to_remove.extend(match.children)
|
|
||||||
to_remove.append(match)
|
|
||||||
|
|
||||||
return to_remove
|
|
||||||
|
|
||||||
|
|
||||||
class ValidateAtEnd(Rule):
|
|
||||||
"""Validate other which should occur at the end of a filepart."""
|
|
||||||
|
|
||||||
priority = 32
|
|
||||||
consequence = RemoveMatch
|
|
||||||
|
|
||||||
def when(self, matches, context):
|
|
||||||
to_remove = []
|
|
||||||
for filepart in matches.markers.named('path'):
|
|
||||||
for match in matches.range(filepart.start, filepart.end,
|
|
||||||
predicate=lambda m: m.name == 'other' and 'at-end' in m.tags):
|
|
||||||
if (matches.holes(match.end, filepart.end, predicate=lambda m: m.value.strip(seps)) or
|
|
||||||
matches.range(match.end, filepart.end, predicate=lambda m: m.name not in (
|
|
||||||
'other', 'container'))):
|
|
||||||
to_remove.append(match)
|
|
||||||
|
|
||||||
return to_remove
|
|
||||||
|
|
||||||
|
|
||||||
class ValidateReal(Rule):
|
|
||||||
"""
|
|
||||||
Validate Real
|
|
||||||
"""
|
|
||||||
consequence = RemoveMatch
|
|
||||||
priority = 64
|
|
||||||
|
|
||||||
def when(self, matches, context):
|
|
||||||
ret = []
|
|
||||||
for filepart in matches.markers.named('path'):
|
|
||||||
for match in matches.range(filepart.start, filepart.end, lambda m: m.name == 'other' and 'real' in m.tags):
|
|
||||||
if not matches.range(filepart.start, match.start):
|
|
||||||
ret.append(match)
|
|
||||||
|
|
||||||
return ret
|
|
||||||
@@ -1,46 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
"""
|
|
||||||
part property
|
|
||||||
"""
|
|
||||||
from rebulk.remodule import re
|
|
||||||
|
|
||||||
from rebulk import Rebulk
|
|
||||||
from ..common import dash
|
|
||||||
from ..common.pattern import is_disabled
|
|
||||||
from ..common.validators import seps_surround, int_coercable, and_
|
|
||||||
from ..common.numeral import numeral, parse_numeral
|
|
||||||
from ...reutils import build_or_pattern
|
|
||||||
|
|
||||||
|
|
||||||
def part(config): # pylint:disable=unused-argument
|
|
||||||
"""
|
|
||||||
Builder for rebulk object.
|
|
||||||
|
|
||||||
:param config: rule configuration
|
|
||||||
:type config: dict
|
|
||||||
:return: Created Rebulk object
|
|
||||||
:rtype: Rebulk
|
|
||||||
"""
|
|
||||||
rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'part'))
|
|
||||||
rebulk.regex_defaults(flags=re.IGNORECASE, abbreviations=[dash], validator={'__parent__': seps_surround})
|
|
||||||
|
|
||||||
prefixes = config['prefixes']
|
|
||||||
|
|
||||||
def validate_roman(match):
|
|
||||||
"""
|
|
||||||
Validate a roman match if surrounded by separators
|
|
||||||
:param match:
|
|
||||||
:type match:
|
|
||||||
:return:
|
|
||||||
:rtype:
|
|
||||||
"""
|
|
||||||
if int_coercable(match.raw):
|
|
||||||
return True
|
|
||||||
return seps_surround(match)
|
|
||||||
|
|
||||||
rebulk.regex(build_or_pattern(prefixes) + r'-?(?P<part>' + numeral + r')',
|
|
||||||
prefixes=prefixes, validate_all=True, private_parent=True, children=True, formatter=parse_numeral,
|
|
||||||
validator={'part': and_(validate_roman, lambda m: 0 < m.value < 100)})
|
|
||||||
|
|
||||||
return rebulk
|
|
||||||
@@ -1,347 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
"""
|
|
||||||
release_group property
|
|
||||||
"""
|
|
||||||
import copy
|
|
||||||
|
|
||||||
from rebulk import Rebulk, Rule, AppendMatch, RemoveMatch
|
|
||||||
from rebulk.match import Match
|
|
||||||
|
|
||||||
from ..common import seps
|
|
||||||
from ..common.comparators import marker_sorted
|
|
||||||
from ..common.expected import build_expected_function
|
|
||||||
from ..common.formatters import cleanup
|
|
||||||
from ..common.pattern import is_disabled
|
|
||||||
from ..common.validators import int_coercable, seps_surround
|
|
||||||
from ..properties.title import TitleFromPosition
|
|
||||||
|
|
||||||
|
|
||||||
def release_group(config):
|
|
||||||
"""
|
|
||||||
Builder for rebulk object.
|
|
||||||
|
|
||||||
:param config: rule configuration
|
|
||||||
:type config: dict
|
|
||||||
:return: Created Rebulk object
|
|
||||||
:rtype: Rebulk
|
|
||||||
"""
|
|
||||||
forbidden_groupnames = config['forbidden_names']
|
|
||||||
|
|
||||||
groupname_ignore_seps = config['ignored_seps']
|
|
||||||
groupname_seps = ''.join([c for c in seps if c not in groupname_ignore_seps])
|
|
||||||
|
|
||||||
def clean_groupname(string):
|
|
||||||
"""
|
|
||||||
Removes and strip separators from input_string
|
|
||||||
:param string:
|
|
||||||
:type string:
|
|
||||||
:return:
|
|
||||||
:rtype:
|
|
||||||
"""
|
|
||||||
string = string.strip(groupname_seps)
|
|
||||||
if not (string.endswith(tuple(groupname_ignore_seps)) and string.startswith(tuple(groupname_ignore_seps))) \
|
|
||||||
and not any(i in string.strip(groupname_ignore_seps) for i in groupname_ignore_seps):
|
|
||||||
string = string.strip(groupname_ignore_seps)
|
|
||||||
for forbidden in forbidden_groupnames:
|
|
||||||
if string.lower().startswith(forbidden) and string[len(forbidden):len(forbidden) + 1] in seps:
|
|
||||||
string = string[len(forbidden):]
|
|
||||||
string = string.strip(groupname_seps)
|
|
||||||
if string.lower().endswith(forbidden) and string[-len(forbidden) - 1:-len(forbidden)] in seps:
|
|
||||||
string = string[:len(forbidden)]
|
|
||||||
string = string.strip(groupname_seps)
|
|
||||||
return string.strip()
|
|
||||||
|
|
||||||
rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'release_group'))
|
|
||||||
|
|
||||||
expected_group = build_expected_function('expected_group')
|
|
||||||
|
|
||||||
rebulk.functional(expected_group, name='release_group', tags=['expected'],
|
|
||||||
validator=seps_surround,
|
|
||||||
conflict_solver=lambda match, other: other,
|
|
||||||
disabled=lambda context: not context.get('expected_group'))
|
|
||||||
|
|
||||||
return rebulk.rules(
|
|
||||||
DashSeparatedReleaseGroup(clean_groupname),
|
|
||||||
SceneReleaseGroup(clean_groupname),
|
|
||||||
AnimeReleaseGroup
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
_scene_previous_names = ('video_codec', 'source', 'video_api', 'audio_codec', 'audio_profile', 'video_profile',
|
|
||||||
'audio_channels', 'screen_size', 'other', 'container', 'language', 'subtitle_language',
|
|
||||||
'subtitle_language.suffix', 'subtitle_language.prefix', 'language.suffix')
|
|
||||||
|
|
||||||
_scene_previous_tags = ('release-group-prefix',)
|
|
||||||
|
|
||||||
_scene_no_previous_tags = ('no-release-group-prefix',)
|
|
||||||
|
|
||||||
|
|
||||||
class DashSeparatedReleaseGroup(Rule):
|
|
||||||
"""
|
|
||||||
Detect dash separated release groups that might appear at the end or at the beginning of a release name.
|
|
||||||
|
|
||||||
Series.S01E02.Pilot.DVDRip.x264-CS.mkv
|
|
||||||
release_group: CS
|
|
||||||
abc-the.title.name.1983.1080p.bluray.x264.mkv
|
|
||||||
release_group: abc
|
|
||||||
|
|
||||||
At the end: Release groups should be dash-separated and shouldn't contain spaces nor
|
|
||||||
appear in a group with other matches. The preceding matches should be separated by dot.
|
|
||||||
If a release group is found, the conflicting matches are removed.
|
|
||||||
|
|
||||||
At the beginning: Release groups should be dash-separated and shouldn't contain spaces nor appear in a group.
|
|
||||||
It should be followed by a hole with dot-separated words.
|
|
||||||
Detection only happens if no matches exist at the beginning.
|
|
||||||
"""
|
|
||||||
consequence = [RemoveMatch, AppendMatch]
|
|
||||||
|
|
||||||
def __init__(self, value_formatter):
|
|
||||||
"""Default constructor."""
|
|
||||||
super(DashSeparatedReleaseGroup, self).__init__()
|
|
||||||
self.value_formatter = value_formatter
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def is_valid(cls, matches, candidate, start, end, at_end): # pylint:disable=inconsistent-return-statements
|
|
||||||
"""
|
|
||||||
Whether a candidate is a valid release group.
|
|
||||||
"""
|
|
||||||
if not at_end:
|
|
||||||
if len(candidate.value) <= 1:
|
|
||||||
return False
|
|
||||||
|
|
||||||
if matches.markers.at_match(candidate, predicate=lambda m: m.name == 'group'):
|
|
||||||
return False
|
|
||||||
|
|
||||||
first_hole = matches.holes(candidate.end, end, predicate=lambda m: m.start == candidate.end, index=0)
|
|
||||||
if not first_hole:
|
|
||||||
return False
|
|
||||||
|
|
||||||
raw_value = first_hole.raw
|
|
||||||
return raw_value[0] == '-' and '-' not in raw_value[1:] and '.' in raw_value and ' ' not in raw_value
|
|
||||||
|
|
||||||
group = matches.markers.at_match(candidate, predicate=lambda m: m.name == 'group', index=0)
|
|
||||||
if group and matches.at_match(group, predicate=lambda m: not m.private and m.span != candidate.span):
|
|
||||||
return False
|
|
||||||
|
|
||||||
count = 0
|
|
||||||
match = candidate
|
|
||||||
while match:
|
|
||||||
current = matches.range(start,
|
|
||||||
match.start,
|
|
||||||
index=-1,
|
|
||||||
predicate=lambda m: not m.private and not 'expected' in m.tags)
|
|
||||||
if not current:
|
|
||||||
break
|
|
||||||
|
|
||||||
separator = match.input_string[current.end:match.start]
|
|
||||||
if not separator and match.raw[0] == '-':
|
|
||||||
separator = '-'
|
|
||||||
|
|
||||||
match = current
|
|
||||||
|
|
||||||
if count == 0:
|
|
||||||
if separator != '-':
|
|
||||||
break
|
|
||||||
|
|
||||||
count += 1
|
|
||||||
continue
|
|
||||||
|
|
||||||
if separator == '.':
|
|
||||||
return True
|
|
||||||
|
|
||||||
def detect(self, matches, start, end, at_end): # pylint:disable=inconsistent-return-statements
|
|
||||||
"""
|
|
||||||
Detect release group at the end or at the beginning of a filepart.
|
|
||||||
"""
|
|
||||||
candidate = None
|
|
||||||
if at_end:
|
|
||||||
container = matches.ending(end, lambda m: m.name == 'container', index=0)
|
|
||||||
if container:
|
|
||||||
end = container.start
|
|
||||||
|
|
||||||
candidate = matches.ending(end, index=0, predicate=(
|
|
||||||
lambda m: not m.private and not (
|
|
||||||
m.name == 'other' and 'not-a-release-group' in m.tags
|
|
||||||
) and '-' not in m.raw and m.raw.strip() == m.raw))
|
|
||||||
|
|
||||||
if not candidate:
|
|
||||||
if at_end:
|
|
||||||
candidate = matches.holes(start, end, seps=seps, index=-1,
|
|
||||||
predicate=lambda m: m.end == end and m.raw.strip(seps) and m.raw[0] == '-')
|
|
||||||
else:
|
|
||||||
candidate = matches.holes(start, end, seps=seps, index=0,
|
|
||||||
predicate=lambda m: m.start == start and m.raw.strip(seps))
|
|
||||||
|
|
||||||
if candidate and self.is_valid(matches, candidate, start, end, at_end):
|
|
||||||
return candidate
|
|
||||||
|
|
||||||
def when(self, matches, context): # pylint:disable=inconsistent-return-statements
|
|
||||||
if matches.named('release_group'):
|
|
||||||
return
|
|
||||||
|
|
||||||
to_remove = []
|
|
||||||
to_append = []
|
|
||||||
for filepart in matches.markers.named('path'):
|
|
||||||
candidate = self.detect(matches, filepart.start, filepart.end, True)
|
|
||||||
if candidate:
|
|
||||||
to_remove.extend(matches.at_match(candidate))
|
|
||||||
else:
|
|
||||||
candidate = self.detect(matches, filepart.start, filepart.end, False)
|
|
||||||
|
|
||||||
if candidate:
|
|
||||||
releasegroup = Match(candidate.start, candidate.end, name='release_group',
|
|
||||||
formatter=self.value_formatter, input_string=candidate.input_string)
|
|
||||||
|
|
||||||
if releasegroup.value:
|
|
||||||
to_append.append(releasegroup)
|
|
||||||
if to_remove or to_append:
|
|
||||||
return to_remove, to_append
|
|
||||||
|
|
||||||
|
|
||||||
class SceneReleaseGroup(Rule):
|
|
||||||
"""
|
|
||||||
Add release_group match in existing matches (scene format).
|
|
||||||
|
|
||||||
Something.XViD-ReleaseGroup.mkv
|
|
||||||
"""
|
|
||||||
dependency = [TitleFromPosition]
|
|
||||||
consequence = AppendMatch
|
|
||||||
|
|
||||||
properties = {'release_group': [None]}
|
|
||||||
|
|
||||||
def __init__(self, value_formatter):
|
|
||||||
"""Default constructor."""
|
|
||||||
super(SceneReleaseGroup, self).__init__()
|
|
||||||
self.value_formatter = value_formatter
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def is_previous_match(match):
|
|
||||||
"""
|
|
||||||
Check if match can precede release_group
|
|
||||||
|
|
||||||
:param match:
|
|
||||||
:return:
|
|
||||||
"""
|
|
||||||
return not match.tagged(*_scene_no_previous_tags) if match.name in _scene_previous_names else \
|
|
||||||
match.tagged(*_scene_previous_tags)
|
|
||||||
|
|
||||||
def when(self, matches, context): # pylint:disable=too-many-locals
|
|
||||||
# If a release_group is found before, ignore this kind of release_group rule.
|
|
||||||
|
|
||||||
ret = []
|
|
||||||
|
|
||||||
for filepart in marker_sorted(matches.markers.named('path'), matches):
|
|
||||||
# pylint:disable=cell-var-from-loop
|
|
||||||
start, end = filepart.span
|
|
||||||
if matches.named('release_group', predicate=lambda m: m.start >= start and m.end <= end):
|
|
||||||
continue
|
|
||||||
|
|
||||||
titles = matches.named('title', predicate=lambda m: m.start >= start and m.end <= end)
|
|
||||||
|
|
||||||
def keep_only_first_title(match):
|
|
||||||
"""
|
|
||||||
Keep only first title from this filepart, as other ones are most likely release group.
|
|
||||||
|
|
||||||
:param match:
|
|
||||||
:type match:
|
|
||||||
:return:
|
|
||||||
:rtype:
|
|
||||||
"""
|
|
||||||
return match in titles[1:]
|
|
||||||
|
|
||||||
last_hole = matches.holes(start, end + 1, formatter=self.value_formatter,
|
|
||||||
ignore=keep_only_first_title,
|
|
||||||
predicate=lambda hole: cleanup(hole.value), index=-1)
|
|
||||||
|
|
||||||
if last_hole:
|
|
||||||
def previous_match_filter(match):
|
|
||||||
"""
|
|
||||||
Filter to apply to find previous match
|
|
||||||
|
|
||||||
:param match:
|
|
||||||
:type match:
|
|
||||||
:return:
|
|
||||||
:rtype:
|
|
||||||
"""
|
|
||||||
|
|
||||||
if match.start < filepart.start:
|
|
||||||
return False
|
|
||||||
return not match.private or self.is_previous_match(match)
|
|
||||||
|
|
||||||
previous_match = matches.previous(last_hole,
|
|
||||||
previous_match_filter,
|
|
||||||
index=0)
|
|
||||||
if previous_match and (self.is_previous_match(previous_match)) and \
|
|
||||||
not matches.input_string[previous_match.end:last_hole.start].strip(seps) \
|
|
||||||
and not int_coercable(last_hole.value.strip(seps)):
|
|
||||||
|
|
||||||
last_hole.name = 'release_group'
|
|
||||||
last_hole.tags = ['scene']
|
|
||||||
|
|
||||||
# if hole is inside a group marker with same value, remove [](){} ...
|
|
||||||
group = matches.markers.at_match(last_hole, lambda marker: marker.name == 'group', 0)
|
|
||||||
if group:
|
|
||||||
group.formatter = self.value_formatter
|
|
||||||
if group.value == last_hole.value:
|
|
||||||
last_hole.start = group.start + 1
|
|
||||||
last_hole.end = group.end - 1
|
|
||||||
last_hole.tags = ['anime']
|
|
||||||
|
|
||||||
ignored_matches = matches.range(last_hole.start, last_hole.end, keep_only_first_title)
|
|
||||||
|
|
||||||
for ignored_match in ignored_matches:
|
|
||||||
matches.remove(ignored_match)
|
|
||||||
|
|
||||||
ret.append(last_hole)
|
|
||||||
return ret
|
|
||||||
|
|
||||||
|
|
||||||
class AnimeReleaseGroup(Rule):
|
|
||||||
"""
|
|
||||||
Add release_group match in existing matches (anime format)
|
|
||||||
...[ReleaseGroup] Something.mkv
|
|
||||||
"""
|
|
||||||
dependency = [SceneReleaseGroup, TitleFromPosition]
|
|
||||||
consequence = [RemoveMatch, AppendMatch]
|
|
||||||
|
|
||||||
properties = {'release_group': [None]}
|
|
||||||
|
|
||||||
def when(self, matches, context):
|
|
||||||
to_remove = []
|
|
||||||
to_append = []
|
|
||||||
|
|
||||||
# If a release_group is found before, ignore this kind of release_group rule.
|
|
||||||
if matches.named('release_group'):
|
|
||||||
return False
|
|
||||||
|
|
||||||
if not matches.named('episode') and not matches.named('season') and matches.named('release_group'):
|
|
||||||
# This doesn't seems to be an anime, and we already found another release_group.
|
|
||||||
return False
|
|
||||||
|
|
||||||
for filepart in marker_sorted(matches.markers.named('path'), matches):
|
|
||||||
|
|
||||||
# pylint:disable=bad-continuation
|
|
||||||
empty_group = matches.markers.range(filepart.start,
|
|
||||||
filepart.end,
|
|
||||||
lambda marker: (marker.name == 'group'
|
|
||||||
and not matches.range(marker.start, marker.end,
|
|
||||||
lambda m:
|
|
||||||
'weak-language' not in m.tags)
|
|
||||||
and marker.value.strip(seps)
|
|
||||||
and not int_coercable(marker.value.strip(seps))), 0)
|
|
||||||
|
|
||||||
if empty_group:
|
|
||||||
group = copy.copy(empty_group)
|
|
||||||
group.marker = False
|
|
||||||
group.raw_start += 1
|
|
||||||
group.raw_end -= 1
|
|
||||||
group.tags = ['anime']
|
|
||||||
group.name = 'release_group'
|
|
||||||
to_append.append(group)
|
|
||||||
to_remove.extend(matches.range(empty_group.start, empty_group.end,
|
|
||||||
lambda m: 'weak-language' in m.tags))
|
|
||||||
|
|
||||||
if to_remove or to_append:
|
|
||||||
return to_remove, to_append
|
|
||||||
return False
|
|
||||||
@@ -1,163 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
"""
|
|
||||||
screen_size property
|
|
||||||
"""
|
|
||||||
from rebulk.match import Match
|
|
||||||
from rebulk.remodule import re
|
|
||||||
|
|
||||||
from rebulk import Rebulk, Rule, RemoveMatch, AppendMatch
|
|
||||||
|
|
||||||
from ..common.pattern import is_disabled
|
|
||||||
from ..common.quantity import FrameRate
|
|
||||||
from ..common.validators import seps_surround
|
|
||||||
from ..common import dash, seps
|
|
||||||
from ...reutils import build_or_pattern
|
|
||||||
|
|
||||||
|
|
||||||
def screen_size(config):
|
|
||||||
"""
|
|
||||||
Builder for rebulk object.
|
|
||||||
|
|
||||||
:param config: rule configuration
|
|
||||||
:type config: dict
|
|
||||||
:return: Created Rebulk object
|
|
||||||
:rtype: Rebulk
|
|
||||||
"""
|
|
||||||
interlaced = frozenset({res for res in config['interlaced']})
|
|
||||||
progressive = frozenset({res for res in config['progressive']})
|
|
||||||
frame_rates = [re.escape(rate) for rate in config['frame_rates']]
|
|
||||||
min_ar = config['min_ar']
|
|
||||||
max_ar = config['max_ar']
|
|
||||||
|
|
||||||
rebulk = Rebulk()
|
|
||||||
rebulk = rebulk.string_defaults(ignore_case=True).regex_defaults(flags=re.IGNORECASE)
|
|
||||||
|
|
||||||
rebulk.defaults(name='screen_size', validator=seps_surround, abbreviations=[dash],
|
|
||||||
disabled=lambda context: is_disabled(context, 'screen_size'))
|
|
||||||
|
|
||||||
frame_rate_pattern = build_or_pattern(frame_rates, name='frame_rate')
|
|
||||||
interlaced_pattern = build_or_pattern(interlaced, name='height')
|
|
||||||
progressive_pattern = build_or_pattern(progressive, name='height')
|
|
||||||
|
|
||||||
res_pattern = r'(?:(?P<width>\d{3,4})(?:x|\*))?'
|
|
||||||
rebulk.regex(res_pattern + interlaced_pattern + r'(?P<scan_type>i)' + frame_rate_pattern + '?')
|
|
||||||
rebulk.regex(res_pattern + progressive_pattern + r'(?P<scan_type>p)' + frame_rate_pattern + '?')
|
|
||||||
rebulk.regex(res_pattern + progressive_pattern + r'(?P<scan_type>p)?(?:hd)')
|
|
||||||
rebulk.regex(res_pattern + progressive_pattern + r'(?P<scan_type>p)?x?')
|
|
||||||
rebulk.string('4k', value='2160p')
|
|
||||||
rebulk.regex(r'(?P<width>\d{3,4})-?(?:x|\*)-?(?P<height>\d{3,4})',
|
|
||||||
conflict_solver=lambda match, other: '__default__' if other.name == 'screen_size' else other)
|
|
||||||
|
|
||||||
rebulk.regex(frame_rate_pattern + '(p|fps)', name='frame_rate',
|
|
||||||
formatter=FrameRate.fromstring, disabled=lambda context: is_disabled(context, 'frame_rate'))
|
|
||||||
|
|
||||||
rebulk.rules(PostProcessScreenSize(progressive, min_ar, max_ar), ScreenSizeOnlyOne, ResolveScreenSizeConflicts)
|
|
||||||
|
|
||||||
return rebulk
|
|
||||||
|
|
||||||
|
|
||||||
class PostProcessScreenSize(Rule):
|
|
||||||
"""
|
|
||||||
Process the screen size calculating the aspect ratio if available.
|
|
||||||
|
|
||||||
Convert to a standard notation (720p, 1080p, etc) when it's a standard resolution and
|
|
||||||
aspect ratio is valid or not available.
|
|
||||||
|
|
||||||
It also creates an aspect_ratio match when available.
|
|
||||||
"""
|
|
||||||
consequence = AppendMatch
|
|
||||||
|
|
||||||
def __init__(self, standard_heights, min_ar, max_ar):
|
|
||||||
super(PostProcessScreenSize, self).__init__()
|
|
||||||
self.standard_heights = standard_heights
|
|
||||||
self.min_ar = min_ar
|
|
||||||
self.max_ar = max_ar
|
|
||||||
|
|
||||||
def when(self, matches, context):
|
|
||||||
to_append = []
|
|
||||||
for match in matches.named('screen_size'):
|
|
||||||
if not is_disabled(context, 'frame_rate'):
|
|
||||||
for frame_rate in match.children.named('frame_rate'):
|
|
||||||
frame_rate.formatter = FrameRate.fromstring
|
|
||||||
to_append.append(frame_rate)
|
|
||||||
|
|
||||||
values = match.children.to_dict()
|
|
||||||
if 'height' not in values:
|
|
||||||
continue
|
|
||||||
|
|
||||||
scan_type = (values.get('scan_type') or 'p').lower()
|
|
||||||
height = values['height']
|
|
||||||
if 'width' not in values:
|
|
||||||
match.value = '{0}{1}'.format(height, scan_type)
|
|
||||||
continue
|
|
||||||
|
|
||||||
width = values['width']
|
|
||||||
calculated_ar = float(width) / float(height)
|
|
||||||
|
|
||||||
aspect_ratio = Match(match.start, match.end, input_string=match.input_string,
|
|
||||||
name='aspect_ratio', value=round(calculated_ar, 3))
|
|
||||||
|
|
||||||
if not is_disabled(context, 'aspect_ratio'):
|
|
||||||
to_append.append(aspect_ratio)
|
|
||||||
|
|
||||||
if height in self.standard_heights and self.min_ar < calculated_ar < self.max_ar:
|
|
||||||
match.value = '{0}{1}'.format(height, scan_type)
|
|
||||||
else:
|
|
||||||
match.value = '{0}x{1}'.format(width, height)
|
|
||||||
|
|
||||||
return to_append
|
|
||||||
|
|
||||||
|
|
||||||
class ScreenSizeOnlyOne(Rule):
|
|
||||||
"""
|
|
||||||
Keep a single screen_size per filepath part.
|
|
||||||
"""
|
|
||||||
consequence = RemoveMatch
|
|
||||||
|
|
||||||
def when(self, matches, context):
|
|
||||||
to_remove = []
|
|
||||||
for filepart in matches.markers.named('path'):
|
|
||||||
screensize = list(reversed(matches.range(filepart.start, filepart.end,
|
|
||||||
lambda match: match.name == 'screen_size')))
|
|
||||||
if len(screensize) > 1 and len(set((match.value for match in screensize))) > 1:
|
|
||||||
to_remove.extend(screensize[1:])
|
|
||||||
|
|
||||||
return to_remove
|
|
||||||
|
|
||||||
|
|
||||||
class ResolveScreenSizeConflicts(Rule):
|
|
||||||
"""
|
|
||||||
Resolve screen_size conflicts with season and episode matches.
|
|
||||||
"""
|
|
||||||
consequence = RemoveMatch
|
|
||||||
|
|
||||||
def when(self, matches, context):
|
|
||||||
to_remove = []
|
|
||||||
for filepart in matches.markers.named('path'):
|
|
||||||
screensize = matches.range(filepart.start, filepart.end, lambda match: match.name == 'screen_size', 0)
|
|
||||||
if not screensize:
|
|
||||||
continue
|
|
||||||
|
|
||||||
conflicts = matches.conflicting(screensize, lambda match: match.name in ('season', 'episode'))
|
|
||||||
if not conflicts:
|
|
||||||
continue
|
|
||||||
|
|
||||||
has_neighbor = False
|
|
||||||
video_profile = matches.range(screensize.end, filepart.end, lambda match: match.name == 'video_profile', 0)
|
|
||||||
if video_profile and not matches.holes(screensize.end, video_profile.start,
|
|
||||||
predicate=lambda h: h.value and h.value.strip(seps)):
|
|
||||||
to_remove.extend(conflicts)
|
|
||||||
has_neighbor = True
|
|
||||||
|
|
||||||
previous = matches.previous(screensize, index=0, predicate=(
|
|
||||||
lambda m: m.name in ('date', 'source', 'other', 'streaming_service')))
|
|
||||||
if previous and not matches.holes(previous.end, screensize.start,
|
|
||||||
predicate=lambda h: h.value and h.value.strip(seps)):
|
|
||||||
to_remove.extend(conflicts)
|
|
||||||
has_neighbor = True
|
|
||||||
|
|
||||||
if not has_neighbor:
|
|
||||||
to_remove.append(screensize)
|
|
||||||
|
|
||||||
return to_remove
|
|
||||||
@@ -1,30 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
"""
|
|
||||||
size property
|
|
||||||
"""
|
|
||||||
import re
|
|
||||||
|
|
||||||
from rebulk import Rebulk
|
|
||||||
|
|
||||||
from ..common import dash
|
|
||||||
from ..common.quantity import Size
|
|
||||||
from ..common.pattern import is_disabled
|
|
||||||
from ..common.validators import seps_surround
|
|
||||||
|
|
||||||
|
|
||||||
def size(config): # pylint:disable=unused-argument
|
|
||||||
"""
|
|
||||||
Builder for rebulk object.
|
|
||||||
|
|
||||||
:param config: rule configuration
|
|
||||||
:type config: dict
|
|
||||||
:return: Created Rebulk object
|
|
||||||
:rtype: Rebulk
|
|
||||||
"""
|
|
||||||
rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'size'))
|
|
||||||
rebulk.regex_defaults(flags=re.IGNORECASE, abbreviations=[dash])
|
|
||||||
rebulk.defaults(name='size', validator=seps_surround)
|
|
||||||
rebulk.regex(r'\d+-?[mgt]b', r'\d+\.\d+-?[mgt]b', formatter=Size.fromstring, tags=['release-group-prefix'])
|
|
||||||
|
|
||||||
return rebulk
|
|
||||||
@@ -1,235 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
"""
|
|
||||||
source property
|
|
||||||
"""
|
|
||||||
import copy
|
|
||||||
|
|
||||||
from rebulk.remodule import re
|
|
||||||
|
|
||||||
from rebulk import AppendMatch, Rebulk, RemoveMatch, Rule
|
|
||||||
|
|
||||||
from .audio_codec import HqConflictRule
|
|
||||||
from ..common import dash, seps
|
|
||||||
from ..common.pattern import is_disabled
|
|
||||||
from ..common.validators import seps_before, seps_after, or_
|
|
||||||
|
|
||||||
|
|
||||||
def source(config): # pylint:disable=unused-argument
|
|
||||||
"""
|
|
||||||
Builder for rebulk object.
|
|
||||||
|
|
||||||
:param config: rule configuration
|
|
||||||
:type config: dict
|
|
||||||
:return: Created Rebulk object
|
|
||||||
:rtype: Rebulk
|
|
||||||
"""
|
|
||||||
rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'source'))
|
|
||||||
rebulk = rebulk.regex_defaults(flags=re.IGNORECASE, abbreviations=[dash], private_parent=True, children=True)
|
|
||||||
rebulk = rebulk.defaults(name='source',
|
|
||||||
tags=['video-codec-prefix', 'streaming_service.suffix'],
|
|
||||||
validate_all=True,
|
|
||||||
validator={'__parent__': or_(seps_before, seps_after)})
|
|
||||||
|
|
||||||
rip_prefix = '(?P<other>Rip)-?'
|
|
||||||
rip_suffix = '-?(?P<other>Rip)'
|
|
||||||
rip_optional_suffix = '(?:' + rip_suffix + ')?'
|
|
||||||
|
|
||||||
def build_source_pattern(*patterns, **kwargs):
|
|
||||||
"""Helper pattern to build source pattern."""
|
|
||||||
prefix_format = kwargs.get('prefix') or ''
|
|
||||||
suffix_format = kwargs.get('suffix') or ''
|
|
||||||
|
|
||||||
string_format = prefix_format + '({0})' + suffix_format
|
|
||||||
return [string_format.format(pattern) for pattern in patterns]
|
|
||||||
|
|
||||||
def demote_other(match, other): # pylint: disable=unused-argument
|
|
||||||
"""Default conflict solver with 'other' property."""
|
|
||||||
return other if other.name == 'other' or other.name == 'release_group' else '__default__'
|
|
||||||
|
|
||||||
rebulk.regex(*build_source_pattern('VHS', suffix=rip_optional_suffix),
|
|
||||||
value={'source': 'VHS', 'other': 'Rip'})
|
|
||||||
rebulk.regex(*build_source_pattern('CAM', suffix=rip_optional_suffix),
|
|
||||||
value={'source': 'Camera', 'other': 'Rip'})
|
|
||||||
rebulk.regex(*build_source_pattern('HD-?CAM', suffix=rip_optional_suffix),
|
|
||||||
value={'source': 'HD Camera', 'other': 'Rip'})
|
|
||||||
rebulk.regex(*build_source_pattern('TELESYNC', 'TS', suffix=rip_optional_suffix),
|
|
||||||
value={'source': 'Telesync', 'other': 'Rip'})
|
|
||||||
rebulk.regex(*build_source_pattern('HD-?TELESYNC', 'HD-?TS', suffix=rip_optional_suffix),
|
|
||||||
value={'source': 'HD Telesync', 'other': 'Rip'})
|
|
||||||
rebulk.regex(*build_source_pattern('WORKPRINT', 'WP'), value='Workprint')
|
|
||||||
rebulk.regex(*build_source_pattern('TELECINE', 'TC', suffix=rip_optional_suffix),
|
|
||||||
value={'source': 'Telecine', 'other': 'Rip'})
|
|
||||||
rebulk.regex(*build_source_pattern('HD-?TELECINE', 'HD-?TC', suffix=rip_optional_suffix),
|
|
||||||
value={'source': 'HD Telecine', 'other': 'Rip'})
|
|
||||||
rebulk.regex(*build_source_pattern('PPV', suffix=rip_optional_suffix),
|
|
||||||
value={'source': 'Pay-per-view', 'other': 'Rip'})
|
|
||||||
rebulk.regex(*build_source_pattern('SD-?TV', suffix=rip_optional_suffix),
|
|
||||||
value={'source': 'TV', 'other': 'Rip'})
|
|
||||||
rebulk.regex(*build_source_pattern('TV', suffix=rip_suffix), # TV is too common to allow matching
|
|
||||||
value={'source': 'TV', 'other': 'Rip'})
|
|
||||||
rebulk.regex(*build_source_pattern('TV', 'SD-?TV', prefix=rip_prefix),
|
|
||||||
value={'source': 'TV', 'other': 'Rip'})
|
|
||||||
rebulk.regex(*build_source_pattern('TV-?(?=Dub)'), value='TV')
|
|
||||||
rebulk.regex(*build_source_pattern('DVB', 'PD-?TV', suffix=rip_optional_suffix),
|
|
||||||
value={'source': 'Digital TV', 'other': 'Rip'})
|
|
||||||
rebulk.regex(*build_source_pattern('DVD', suffix=rip_optional_suffix),
|
|
||||||
value={'source': 'DVD', 'other': 'Rip'})
|
|
||||||
rebulk.regex(*build_source_pattern('DM', suffix=rip_optional_suffix),
|
|
||||||
value={'source': 'Digital Master', 'other': 'Rip'})
|
|
||||||
rebulk.regex(*build_source_pattern('VIDEO-?TS', 'DVD-?R(?:$|(?!E))', # 'DVD-?R(?:$|^E)' => DVD-Real ...
|
|
||||||
'DVD-?9', 'DVD-?5'), value='DVD')
|
|
||||||
|
|
||||||
rebulk.regex(*build_source_pattern('HD-?TV', suffix=rip_optional_suffix), conflict_solver=demote_other,
|
|
||||||
value={'source': 'HDTV', 'other': 'Rip'})
|
|
||||||
rebulk.regex(*build_source_pattern('TV-?HD', suffix=rip_suffix), conflict_solver=demote_other,
|
|
||||||
value={'source': 'HDTV', 'other': 'Rip'})
|
|
||||||
rebulk.regex(*build_source_pattern('TV', suffix='-?(?P<other>Rip-?HD)'), conflict_solver=demote_other,
|
|
||||||
value={'source': 'HDTV', 'other': 'Rip'})
|
|
||||||
|
|
||||||
rebulk.regex(*build_source_pattern('VOD', suffix=rip_optional_suffix),
|
|
||||||
value={'source': 'Video on Demand', 'other': 'Rip'})
|
|
||||||
|
|
||||||
rebulk.regex(*build_source_pattern('WEB', 'WEB-?DL', suffix=rip_suffix),
|
|
||||||
value={'source': 'Web', 'other': 'Rip'})
|
|
||||||
# WEBCap is a synonym to WEBRip, mostly used by non english
|
|
||||||
rebulk.regex(*build_source_pattern('WEB-?(?P<another>Cap)', suffix=rip_optional_suffix),
|
|
||||||
value={'source': 'Web', 'other': 'Rip', 'another': 'Rip'})
|
|
||||||
rebulk.regex(*build_source_pattern('WEB-?DL', 'WEB-?U?HD', 'DL-?WEB', 'DL(?=-?Mux)'),
|
|
||||||
value={'source': 'Web'})
|
|
||||||
rebulk.regex('(WEB)', value='Web', tags='weak.source')
|
|
||||||
|
|
||||||
rebulk.regex(*build_source_pattern('HD-?DVD', suffix=rip_optional_suffix),
|
|
||||||
value={'source': 'HD-DVD', 'other': 'Rip'})
|
|
||||||
|
|
||||||
rebulk.regex(*build_source_pattern('Blu-?ray', 'BD', 'BD[59]', 'BD25', 'BD50', suffix=rip_optional_suffix),
|
|
||||||
value={'source': 'Blu-ray', 'other': 'Rip'})
|
|
||||||
rebulk.regex(*build_source_pattern('(?P<another>BR)-?(?=Scr(?:eener)?)', '(?P<another>BR)-?(?=Mux)'), # BRRip
|
|
||||||
value={'source': 'Blu-ray', 'another': 'Reencoded'})
|
|
||||||
rebulk.regex(*build_source_pattern('(?P<another>BR)', suffix=rip_suffix), # BRRip
|
|
||||||
value={'source': 'Blu-ray', 'other': 'Rip', 'another': 'Reencoded'})
|
|
||||||
|
|
||||||
rebulk.regex(*build_source_pattern('Ultra-?Blu-?ray', 'Blu-?ray-?Ultra'), value='Ultra HD Blu-ray')
|
|
||||||
|
|
||||||
rebulk.regex(*build_source_pattern('AHDTV'), value='Analog HDTV')
|
|
||||||
rebulk.regex(*build_source_pattern('UHD-?TV', suffix=rip_optional_suffix), conflict_solver=demote_other,
|
|
||||||
value={'source': 'Ultra HDTV', 'other': 'Rip'})
|
|
||||||
rebulk.regex(*build_source_pattern('UHD', suffix=rip_suffix), conflict_solver=demote_other,
|
|
||||||
value={'source': 'Ultra HDTV', 'other': 'Rip'})
|
|
||||||
|
|
||||||
rebulk.regex(*build_source_pattern('DSR', 'DTH', suffix=rip_optional_suffix),
|
|
||||||
value={'source': 'Satellite', 'other': 'Rip'})
|
|
||||||
rebulk.regex(*build_source_pattern('DSR?', 'SAT', suffix=rip_suffix),
|
|
||||||
value={'source': 'Satellite', 'other': 'Rip'})
|
|
||||||
|
|
||||||
rebulk.rules(ValidateSourcePrefixSuffix, ValidateWeakSource, UltraHdBlurayRule)
|
|
||||||
|
|
||||||
return rebulk
|
|
||||||
|
|
||||||
|
|
||||||
class UltraHdBlurayRule(Rule):
|
|
||||||
"""
|
|
||||||
Replace other:Ultra HD and source:Blu-ray with source:Ultra HD Blu-ray
|
|
||||||
"""
|
|
||||||
dependency = HqConflictRule
|
|
||||||
consequence = [RemoveMatch, AppendMatch]
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def find_ultrahd(cls, matches, start, end, index):
|
|
||||||
"""Find Ultra HD match."""
|
|
||||||
return matches.range(start, end, index=index, predicate=(
|
|
||||||
lambda m: not m.private and m.name == 'other' and m.value == 'Ultra HD'
|
|
||||||
))
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def validate_range(cls, matches, start, end):
|
|
||||||
"""Validate no holes or invalid matches exist in the specified range."""
|
|
||||||
return (
|
|
||||||
not matches.holes(start, end, predicate=lambda m: m.value.strip(seps)) and
|
|
||||||
not matches.range(start, end, predicate=(
|
|
||||||
lambda m: not m.private and (
|
|
||||||
m.name not in ('screen_size', 'color_depth') and (
|
|
||||||
m.name != 'other' or 'uhdbluray-neighbor' not in m.tags))))
|
|
||||||
)
|
|
||||||
|
|
||||||
def when(self, matches, context):
|
|
||||||
to_remove = []
|
|
||||||
to_append = []
|
|
||||||
for filepart in matches.markers.named('path'):
|
|
||||||
for match in matches.range(filepart.start, filepart.end, predicate=(
|
|
||||||
lambda m: not m.private and m.name == 'source' and m.value == 'Blu-ray')):
|
|
||||||
other = self.find_ultrahd(matches, filepart.start, match.start, -1)
|
|
||||||
if not other or not self.validate_range(matches, other.end, match.start):
|
|
||||||
other = self.find_ultrahd(matches, match.end, filepart.end, 0)
|
|
||||||
if not other or not self.validate_range(matches, match.end, other.start):
|
|
||||||
if not matches.range(filepart.start, filepart.end, predicate=(
|
|
||||||
lambda m: m.name == 'screen_size' and m.value == '2160p')):
|
|
||||||
continue
|
|
||||||
|
|
||||||
if other:
|
|
||||||
other.private = True
|
|
||||||
|
|
||||||
new_source = copy.copy(match)
|
|
||||||
new_source.value = 'Ultra HD Blu-ray'
|
|
||||||
to_remove.append(match)
|
|
||||||
to_append.append(new_source)
|
|
||||||
|
|
||||||
if to_remove or to_append:
|
|
||||||
return to_remove, to_append
|
|
||||||
return False
|
|
||||||
|
|
||||||
|
|
||||||
class ValidateSourcePrefixSuffix(Rule):
|
|
||||||
"""
|
|
||||||
Validate source with source prefix, source suffix.
|
|
||||||
"""
|
|
||||||
priority = 64
|
|
||||||
consequence = RemoveMatch
|
|
||||||
|
|
||||||
def when(self, matches, context):
|
|
||||||
ret = []
|
|
||||||
for filepart in matches.markers.named('path'):
|
|
||||||
for match in matches.range(filepart.start, filepart.end, predicate=lambda m: m.name == 'source'):
|
|
||||||
match = match.initiator
|
|
||||||
if not seps_before(match) and \
|
|
||||||
not matches.range(match.start - 1, match.start - 2,
|
|
||||||
lambda m: 'source-prefix' in m.tags):
|
|
||||||
if match.children:
|
|
||||||
ret.extend(match.children)
|
|
||||||
ret.append(match)
|
|
||||||
continue
|
|
||||||
if not seps_after(match) and \
|
|
||||||
not matches.range(match.end, match.end + 1,
|
|
||||||
lambda m: 'source-suffix' in m.tags):
|
|
||||||
if match.children:
|
|
||||||
ret.extend(match.children)
|
|
||||||
ret.append(match)
|
|
||||||
continue
|
|
||||||
|
|
||||||
return ret
|
|
||||||
|
|
||||||
|
|
||||||
class ValidateWeakSource(Rule):
|
|
||||||
"""
|
|
||||||
Validate weak source
|
|
||||||
"""
|
|
||||||
dependency = [ValidateSourcePrefixSuffix]
|
|
||||||
priority = 64
|
|
||||||
consequence = RemoveMatch
|
|
||||||
|
|
||||||
def when(self, matches, context):
|
|
||||||
ret = []
|
|
||||||
for filepart in matches.markers.named('path'):
|
|
||||||
for match in matches.range(filepart.start, filepart.end, predicate=lambda m: m.name == 'source'):
|
|
||||||
# if there are more than 1 source in this filepart, just before the year and with holes for the title
|
|
||||||
# most likely the source is part of the title
|
|
||||||
if 'weak.source' in match.tags \
|
|
||||||
and matches.range(match.end, filepart.end, predicate=lambda m: m.name == 'source') \
|
|
||||||
and matches.holes(filepart.start, match.start,
|
|
||||||
predicate=lambda m: m.value.strip(seps), index=-1):
|
|
||||||
if match.children:
|
|
||||||
ret.extend(match.children)
|
|
||||||
ret.append(match)
|
|
||||||
continue
|
|
||||||
|
|
||||||
return ret
|
|
||||||
@@ -1,78 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
"""
|
|
||||||
streaming_service property
|
|
||||||
"""
|
|
||||||
import re
|
|
||||||
|
|
||||||
from rebulk import Rebulk
|
|
||||||
from rebulk.rules import Rule, RemoveMatch
|
|
||||||
|
|
||||||
from ..common.pattern import is_disabled
|
|
||||||
from ...rules.common import seps, dash
|
|
||||||
from ...rules.common.validators import seps_before, seps_after
|
|
||||||
|
|
||||||
|
|
||||||
def streaming_service(config): # pylint: disable=too-many-statements,unused-argument
|
|
||||||
"""Streaming service property.
|
|
||||||
|
|
||||||
:param config: rule configuration
|
|
||||||
:type config: dict
|
|
||||||
:return:
|
|
||||||
:rtype: Rebulk
|
|
||||||
"""
|
|
||||||
rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'streaming_service'))
|
|
||||||
rebulk = rebulk.string_defaults(ignore_case=True).regex_defaults(flags=re.IGNORECASE, abbreviations=[dash])
|
|
||||||
rebulk.defaults(name='streaming_service', tags=['source-prefix'])
|
|
||||||
|
|
||||||
for value, items in config.items():
|
|
||||||
patterns = items if isinstance(items, list) else [items]
|
|
||||||
for pattern in patterns:
|
|
||||||
if pattern.startswith('re:'):
|
|
||||||
rebulk.regex(pattern, value=value)
|
|
||||||
else:
|
|
||||||
rebulk.string(pattern, value=value)
|
|
||||||
|
|
||||||
rebulk.rules(ValidateStreamingService)
|
|
||||||
|
|
||||||
return rebulk
|
|
||||||
|
|
||||||
|
|
||||||
class ValidateStreamingService(Rule):
|
|
||||||
"""Validate streaming service matches."""
|
|
||||||
|
|
||||||
priority = 128
|
|
||||||
consequence = RemoveMatch
|
|
||||||
|
|
||||||
def when(self, matches, context):
|
|
||||||
"""Streaming service is always before source.
|
|
||||||
|
|
||||||
:param matches:
|
|
||||||
:type matches: rebulk.match.Matches
|
|
||||||
:param context:
|
|
||||||
:type context: dict
|
|
||||||
:return:
|
|
||||||
"""
|
|
||||||
to_remove = []
|
|
||||||
for service in matches.named('streaming_service'):
|
|
||||||
next_match = matches.next(service, lambda match: 'streaming_service.suffix' in match.tags, 0)
|
|
||||||
previous_match = matches.previous(service, lambda match: 'streaming_service.prefix' in match.tags, 0)
|
|
||||||
has_other = service.initiator and service.initiator.children.named('other')
|
|
||||||
|
|
||||||
if not has_other:
|
|
||||||
if (not next_match or
|
|
||||||
matches.holes(service.end, next_match.start,
|
|
||||||
predicate=lambda match: match.value.strip(seps)) or
|
|
||||||
not seps_before(service)):
|
|
||||||
if (not previous_match or
|
|
||||||
matches.holes(previous_match.end, service.start,
|
|
||||||
predicate=lambda match: match.value.strip(seps)) or
|
|
||||||
not seps_after(service)):
|
|
||||||
to_remove.append(service)
|
|
||||||
continue
|
|
||||||
|
|
||||||
if service.value == 'Comedy Central':
|
|
||||||
# Current match is a valid streaming service, removing invalid Criterion Collection (CC) matches
|
|
||||||
to_remove.extend(matches.named('edition', predicate=lambda match: match.value == 'Criterion'))
|
|
||||||
|
|
||||||
return to_remove
|
|
||||||
@@ -1,349 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
"""
|
|
||||||
title property
|
|
||||||
"""
|
|
||||||
|
|
||||||
from rebulk import Rebulk, Rule, AppendMatch, RemoveMatch, AppendTags
|
|
||||||
from rebulk.formatters import formatters
|
|
||||||
|
|
||||||
from .film import FilmTitleRule
|
|
||||||
from .language import (
|
|
||||||
SubtitlePrefixLanguageRule,
|
|
||||||
SubtitleSuffixLanguageRule,
|
|
||||||
SubtitleExtensionRule,
|
|
||||||
NON_SPECIFIC_LANGUAGES
|
|
||||||
)
|
|
||||||
from ..common import seps, title_seps
|
|
||||||
from ..common.comparators import marker_sorted
|
|
||||||
from ..common.expected import build_expected_function
|
|
||||||
from ..common.formatters import cleanup, reorder_title
|
|
||||||
from ..common.pattern import is_disabled
|
|
||||||
from ..common.validators import seps_surround
|
|
||||||
|
|
||||||
|
|
||||||
def title(config): # pylint:disable=unused-argument
|
|
||||||
"""
|
|
||||||
Builder for rebulk object.
|
|
||||||
|
|
||||||
:param config: rule configuration
|
|
||||||
:type config: dict
|
|
||||||
:return: Created Rebulk object
|
|
||||||
:rtype: Rebulk
|
|
||||||
"""
|
|
||||||
rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'title'))
|
|
||||||
rebulk.rules(TitleFromPosition, PreferTitleWithYear)
|
|
||||||
|
|
||||||
expected_title = build_expected_function('expected_title')
|
|
||||||
|
|
||||||
rebulk.functional(expected_title, name='title', tags=['expected', 'title'],
|
|
||||||
validator=seps_surround,
|
|
||||||
formatter=formatters(cleanup, reorder_title),
|
|
||||||
conflict_solver=lambda match, other: other,
|
|
||||||
disabled=lambda context: not context.get('expected_title'))
|
|
||||||
|
|
||||||
return rebulk
|
|
||||||
|
|
||||||
|
|
||||||
class TitleBaseRule(Rule):
|
|
||||||
"""
|
|
||||||
Add title match in existing matches
|
|
||||||
"""
|
|
||||||
# pylint:disable=no-self-use,unused-argument
|
|
||||||
consequence = [AppendMatch, RemoveMatch]
|
|
||||||
|
|
||||||
def __init__(self, match_name, match_tags=None, alternative_match_name=None):
|
|
||||||
super(TitleBaseRule, self).__init__()
|
|
||||||
self.match_name = match_name
|
|
||||||
self.match_tags = match_tags
|
|
||||||
self.alternative_match_name = alternative_match_name
|
|
||||||
|
|
||||||
def hole_filter(self, hole, matches):
|
|
||||||
"""
|
|
||||||
Filter holes for titles.
|
|
||||||
:param hole:
|
|
||||||
:type hole:
|
|
||||||
:param matches:
|
|
||||||
:type matches:
|
|
||||||
:return:
|
|
||||||
:rtype:
|
|
||||||
"""
|
|
||||||
return True
|
|
||||||
|
|
||||||
def filepart_filter(self, filepart, matches):
|
|
||||||
"""
|
|
||||||
Filter filepart for titles.
|
|
||||||
:param filepart:
|
|
||||||
:type filepart:
|
|
||||||
:param matches:
|
|
||||||
:type matches:
|
|
||||||
:return:
|
|
||||||
:rtype:
|
|
||||||
"""
|
|
||||||
return True
|
|
||||||
|
|
||||||
def holes_process(self, holes, matches):
|
|
||||||
"""
|
|
||||||
process holes
|
|
||||||
:param holes:
|
|
||||||
:type holes:
|
|
||||||
:param matches:
|
|
||||||
:type matches:
|
|
||||||
:return:
|
|
||||||
:rtype:
|
|
||||||
"""
|
|
||||||
cropped_holes = []
|
|
||||||
group_markers = matches.markers.named('group')
|
|
||||||
for group_marker in group_markers:
|
|
||||||
path_marker = matches.markers.at_match(group_marker, predicate=lambda m: m.name == 'path', index=0)
|
|
||||||
if path_marker and path_marker.span == group_marker.span:
|
|
||||||
group_markers.remove(group_marker)
|
|
||||||
|
|
||||||
for hole in holes:
|
|
||||||
cropped_holes.extend(hole.crop(group_markers))
|
|
||||||
|
|
||||||
return cropped_holes
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def is_ignored(match):
|
|
||||||
"""
|
|
||||||
Ignore matches when scanning for title (hole).
|
|
||||||
|
|
||||||
Full word language and countries won't be ignored if they are uppercase.
|
|
||||||
"""
|
|
||||||
return not (len(match) > 3 and match.raw.isupper()) and match.name in ('language', 'country', 'episode_details')
|
|
||||||
|
|
||||||
def should_keep(self, match, to_keep, matches, filepart, hole, starting):
|
|
||||||
"""
|
|
||||||
Check if this match should be accepted when ending or starting a hole.
|
|
||||||
:param match:
|
|
||||||
:type match:
|
|
||||||
:param to_keep:
|
|
||||||
:type to_keep: list[Match]
|
|
||||||
:param matches:
|
|
||||||
:type matches: Matches
|
|
||||||
:param hole: the filepart match
|
|
||||||
:type hole: Match
|
|
||||||
:param hole: the hole match
|
|
||||||
:type hole: Match
|
|
||||||
:param starting: true if match is starting the hole
|
|
||||||
:type starting: bool
|
|
||||||
:return:
|
|
||||||
:rtype:
|
|
||||||
"""
|
|
||||||
if match.name in ('language', 'country'):
|
|
||||||
# Keep language if exactly matching the hole.
|
|
||||||
if len(hole.value) == len(match.raw):
|
|
||||||
return True
|
|
||||||
|
|
||||||
# Keep language if other languages exists in the filepart.
|
|
||||||
outside_matches = filepart.crop(hole)
|
|
||||||
other_languages = []
|
|
||||||
for outside in outside_matches:
|
|
||||||
other_languages.extend(matches.range(outside.start, outside.end,
|
|
||||||
lambda c_match: c_match.name == match.name and
|
|
||||||
c_match not in to_keep and
|
|
||||||
c_match.value not in NON_SPECIFIC_LANGUAGES))
|
|
||||||
|
|
||||||
if not other_languages and (not starting or len(match.raw) <= 3):
|
|
||||||
return True
|
|
||||||
|
|
||||||
return False
|
|
||||||
|
|
||||||
def should_remove(self, match, matches, filepart, hole, context):
|
|
||||||
"""
|
|
||||||
Check if this match should be removed after beeing ignored.
|
|
||||||
:param match:
|
|
||||||
:param matches:
|
|
||||||
:param filepart:
|
|
||||||
:param hole:
|
|
||||||
:return:
|
|
||||||
"""
|
|
||||||
if context.get('type') == 'episode' and match.name == 'episode_details':
|
|
||||||
return match.start >= hole.start and match.end <= hole.end
|
|
||||||
return True
|
|
||||||
|
|
||||||
def check_titles_in_filepart(self, filepart, matches, context): # pylint:disable=inconsistent-return-statements
|
|
||||||
"""
|
|
||||||
Find title in filepart (ignoring language)
|
|
||||||
"""
|
|
||||||
# pylint:disable=too-many-locals,too-many-branches,too-many-statements
|
|
||||||
start, end = filepart.span
|
|
||||||
|
|
||||||
holes = matches.holes(start, end + 1, formatter=formatters(cleanup, reorder_title),
|
|
||||||
ignore=self.is_ignored,
|
|
||||||
predicate=lambda m: m.value)
|
|
||||||
|
|
||||||
holes = self.holes_process(holes, matches)
|
|
||||||
|
|
||||||
for hole in holes:
|
|
||||||
if not hole or (self.hole_filter and not self.hole_filter(hole, matches)):
|
|
||||||
continue
|
|
||||||
|
|
||||||
to_remove = []
|
|
||||||
to_keep = []
|
|
||||||
|
|
||||||
ignored_matches = matches.range(hole.start, hole.end, self.is_ignored)
|
|
||||||
|
|
||||||
if ignored_matches:
|
|
||||||
for ignored_match in reversed(ignored_matches):
|
|
||||||
# pylint:disable=undefined-loop-variable, cell-var-from-loop
|
|
||||||
trailing = matches.chain_before(hole.end, seps, predicate=lambda m: m == ignored_match)
|
|
||||||
if trailing:
|
|
||||||
should_keep = self.should_keep(ignored_match, to_keep, matches, filepart, hole, False)
|
|
||||||
if should_keep:
|
|
||||||
# pylint:disable=unpacking-non-sequence
|
|
||||||
try:
|
|
||||||
append, crop = should_keep
|
|
||||||
except TypeError:
|
|
||||||
append, crop = should_keep, should_keep
|
|
||||||
if append:
|
|
||||||
to_keep.append(ignored_match)
|
|
||||||
if crop:
|
|
||||||
hole.end = ignored_match.start
|
|
||||||
|
|
||||||
for ignored_match in ignored_matches:
|
|
||||||
if ignored_match not in to_keep:
|
|
||||||
starting = matches.chain_after(hole.start, seps,
|
|
||||||
predicate=lambda m: m == ignored_match)
|
|
||||||
if starting:
|
|
||||||
should_keep = self.should_keep(ignored_match, to_keep, matches, filepart, hole, True)
|
|
||||||
if should_keep:
|
|
||||||
# pylint:disable=unpacking-non-sequence
|
|
||||||
try:
|
|
||||||
append, crop = should_keep
|
|
||||||
except TypeError:
|
|
||||||
append, crop = should_keep, should_keep
|
|
||||||
if append:
|
|
||||||
to_keep.append(ignored_match)
|
|
||||||
if crop:
|
|
||||||
hole.start = ignored_match.end
|
|
||||||
|
|
||||||
for match in ignored_matches:
|
|
||||||
if self.should_remove(match, matches, filepart, hole, context):
|
|
||||||
to_remove.append(match)
|
|
||||||
for keep_match in to_keep:
|
|
||||||
if keep_match in to_remove:
|
|
||||||
to_remove.remove(keep_match)
|
|
||||||
|
|
||||||
if hole and hole.value:
|
|
||||||
hole.name = self.match_name
|
|
||||||
hole.tags = self.match_tags
|
|
||||||
if self.alternative_match_name:
|
|
||||||
# Split and keep values that can be a title
|
|
||||||
titles = hole.split(title_seps, lambda m: m.value)
|
|
||||||
for title_match in list(titles[1:]):
|
|
||||||
previous_title = titles[titles.index(title_match) - 1]
|
|
||||||
separator = matches.input_string[previous_title.end:title_match.start]
|
|
||||||
if len(separator) == 1 and separator == '-' \
|
|
||||||
and previous_title.raw[-1] not in seps \
|
|
||||||
and title_match.raw[0] not in seps:
|
|
||||||
titles[titles.index(title_match) - 1].end = title_match.end
|
|
||||||
titles.remove(title_match)
|
|
||||||
else:
|
|
||||||
title_match.name = self.alternative_match_name
|
|
||||||
|
|
||||||
else:
|
|
||||||
titles = [hole]
|
|
||||||
return titles, to_remove
|
|
||||||
|
|
||||||
def when(self, matches, context):
|
|
||||||
ret = []
|
|
||||||
to_remove = []
|
|
||||||
|
|
||||||
if matches.named(self.match_name, lambda match: 'expected' in match.tags):
|
|
||||||
return False
|
|
||||||
|
|
||||||
fileparts = [filepart for filepart in list(marker_sorted(matches.markers.named('path'), matches))
|
|
||||||
if not self.filepart_filter or self.filepart_filter(filepart, matches)]
|
|
||||||
|
|
||||||
# Priorize fileparts containing the year
|
|
||||||
years_fileparts = []
|
|
||||||
for filepart in fileparts:
|
|
||||||
year_match = matches.range(filepart.start, filepart.end, lambda match: match.name == 'year', 0)
|
|
||||||
if year_match:
|
|
||||||
years_fileparts.append(filepart)
|
|
||||||
|
|
||||||
for filepart in fileparts:
|
|
||||||
try:
|
|
||||||
years_fileparts.remove(filepart)
|
|
||||||
except ValueError:
|
|
||||||
pass
|
|
||||||
titles = self.check_titles_in_filepart(filepart, matches, context)
|
|
||||||
if titles:
|
|
||||||
titles, to_remove_c = titles
|
|
||||||
ret.extend(titles)
|
|
||||||
to_remove.extend(to_remove_c)
|
|
||||||
break
|
|
||||||
|
|
||||||
# Add title match in all fileparts containing the year.
|
|
||||||
for filepart in years_fileparts:
|
|
||||||
titles = self.check_titles_in_filepart(filepart, matches, context)
|
|
||||||
if titles:
|
|
||||||
# pylint:disable=unbalanced-tuple-unpacking
|
|
||||||
titles, to_remove_c = titles
|
|
||||||
ret.extend(titles)
|
|
||||||
to_remove.extend(to_remove_c)
|
|
||||||
|
|
||||||
if ret or to_remove:
|
|
||||||
return ret, to_remove
|
|
||||||
return False
|
|
||||||
|
|
||||||
|
|
||||||
class TitleFromPosition(TitleBaseRule):
|
|
||||||
"""
|
|
||||||
Add title match in existing matches
|
|
||||||
"""
|
|
||||||
dependency = [FilmTitleRule, SubtitlePrefixLanguageRule, SubtitleSuffixLanguageRule, SubtitleExtensionRule]
|
|
||||||
|
|
||||||
properties = {'title': [None], 'alternative_title': [None]}
|
|
||||||
|
|
||||||
def __init__(self):
|
|
||||||
super(TitleFromPosition, self).__init__('title', ['title'], 'alternative_title')
|
|
||||||
|
|
||||||
def enabled(self, context):
|
|
||||||
return not is_disabled(context, 'alternative_title')
|
|
||||||
|
|
||||||
|
|
||||||
class PreferTitleWithYear(Rule):
|
|
||||||
"""
|
|
||||||
Prefer title where filepart contains year.
|
|
||||||
"""
|
|
||||||
dependency = TitleFromPosition
|
|
||||||
consequence = [RemoveMatch, AppendTags(['equivalent-ignore'])]
|
|
||||||
|
|
||||||
properties = {'title': [None]}
|
|
||||||
|
|
||||||
def when(self, matches, context):
|
|
||||||
with_year_in_group = []
|
|
||||||
with_year = []
|
|
||||||
titles = matches.named('title')
|
|
||||||
|
|
||||||
for title_match in titles:
|
|
||||||
filepart = matches.markers.at_match(title_match, lambda marker: marker.name == 'path', 0)
|
|
||||||
if filepart:
|
|
||||||
year_match = matches.range(filepart.start, filepart.end, lambda match: match.name == 'year', 0)
|
|
||||||
if year_match:
|
|
||||||
group = matches.markers.at_match(year_match, lambda m: m.name == 'group')
|
|
||||||
if group:
|
|
||||||
with_year_in_group.append(title_match)
|
|
||||||
else:
|
|
||||||
with_year.append(title_match)
|
|
||||||
|
|
||||||
to_tag = []
|
|
||||||
if with_year_in_group:
|
|
||||||
title_values = {title_match.value for title_match in with_year_in_group}
|
|
||||||
to_tag.extend(with_year_in_group)
|
|
||||||
elif with_year:
|
|
||||||
title_values = {title_match.value for title_match in with_year}
|
|
||||||
to_tag.extend(with_year)
|
|
||||||
else:
|
|
||||||
title_values = {title_match.value for title_match in titles}
|
|
||||||
|
|
||||||
to_remove = []
|
|
||||||
for title_match in titles:
|
|
||||||
if title_match.value not in title_values:
|
|
||||||
to_remove.append(title_match)
|
|
||||||
if to_remove or to_tag:
|
|
||||||
return to_remove, to_tag
|
|
||||||
return False
|
|
||||||
@@ -1,83 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
"""
|
|
||||||
type property
|
|
||||||
"""
|
|
||||||
from rebulk import CustomRule, Rebulk, POST_PROCESS
|
|
||||||
from rebulk.match import Match
|
|
||||||
|
|
||||||
from ..common.pattern import is_disabled
|
|
||||||
from ...rules.processors import Processors
|
|
||||||
|
|
||||||
|
|
||||||
def _type(matches, value):
|
|
||||||
"""
|
|
||||||
Define type match with given value.
|
|
||||||
:param matches:
|
|
||||||
:param value:
|
|
||||||
:return:
|
|
||||||
"""
|
|
||||||
matches.append(Match(len(matches.input_string), len(matches.input_string), name='type', value=value))
|
|
||||||
|
|
||||||
|
|
||||||
def type_(config): # pylint:disable=unused-argument
|
|
||||||
"""
|
|
||||||
Builder for rebulk object.
|
|
||||||
|
|
||||||
:param config: rule configuration
|
|
||||||
:type config: dict
|
|
||||||
:return: Created Rebulk object
|
|
||||||
:rtype: Rebulk
|
|
||||||
"""
|
|
||||||
rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'type'))
|
|
||||||
rebulk = rebulk.rules(TypeProcessor)
|
|
||||||
|
|
||||||
return rebulk
|
|
||||||
|
|
||||||
|
|
||||||
class TypeProcessor(CustomRule):
|
|
||||||
"""
|
|
||||||
Post processor to find file type based on all others found matches.
|
|
||||||
"""
|
|
||||||
priority = POST_PROCESS
|
|
||||||
|
|
||||||
dependency = Processors
|
|
||||||
|
|
||||||
properties = {'type': ['episode', 'movie']}
|
|
||||||
|
|
||||||
def when(self, matches, context): # pylint:disable=too-many-return-statements
|
|
||||||
option_type = context.get('type', None)
|
|
||||||
if option_type:
|
|
||||||
return option_type
|
|
||||||
|
|
||||||
episode = matches.named('episode')
|
|
||||||
season = matches.named('season')
|
|
||||||
absolute_episode = matches.named('absolute_episode')
|
|
||||||
episode_details = matches.named('episode_details')
|
|
||||||
|
|
||||||
if episode or season or episode_details or absolute_episode:
|
|
||||||
return 'episode'
|
|
||||||
|
|
||||||
film = matches.named('film')
|
|
||||||
if film:
|
|
||||||
return 'movie'
|
|
||||||
|
|
||||||
year = matches.named('year')
|
|
||||||
date = matches.named('date')
|
|
||||||
|
|
||||||
if date and not year:
|
|
||||||
return 'episode'
|
|
||||||
|
|
||||||
bonus = matches.named('bonus')
|
|
||||||
if bonus and not year:
|
|
||||||
return 'episode'
|
|
||||||
|
|
||||||
crc32 = matches.named('crc32')
|
|
||||||
anime_release_group = matches.named('release_group', lambda match: 'anime' in match.tags)
|
|
||||||
if crc32 and anime_release_group:
|
|
||||||
return 'episode'
|
|
||||||
|
|
||||||
return 'movie'
|
|
||||||
|
|
||||||
def then(self, matches, when_response, context):
|
|
||||||
_type(matches, when_response)
|
|
||||||
@@ -1,126 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
"""
|
|
||||||
video_codec and video_profile property
|
|
||||||
"""
|
|
||||||
from rebulk import Rebulk, Rule, RemoveMatch
|
|
||||||
from rebulk.remodule import re
|
|
||||||
|
|
||||||
from ..common import dash
|
|
||||||
from ..common.pattern import is_disabled
|
|
||||||
from ..common.validators import seps_after, seps_before, seps_surround
|
|
||||||
|
|
||||||
|
|
||||||
def video_codec(config): # pylint:disable=unused-argument
|
|
||||||
"""
|
|
||||||
Builder for rebulk object.
|
|
||||||
|
|
||||||
:param config: rule configuration
|
|
||||||
:type config: dict
|
|
||||||
:return: Created Rebulk object
|
|
||||||
:rtype: Rebulk
|
|
||||||
"""
|
|
||||||
rebulk = Rebulk()
|
|
||||||
rebulk = rebulk.regex_defaults(flags=re.IGNORECASE, abbreviations=[dash]).string_defaults(ignore_case=True)
|
|
||||||
rebulk.defaults(name="video_codec",
|
|
||||||
tags=['source-suffix', 'streaming_service.suffix'],
|
|
||||||
disabled=lambda context: is_disabled(context, 'video_codec'))
|
|
||||||
|
|
||||||
rebulk.regex(r'Rv\d{2}', value='RealVideo')
|
|
||||||
rebulk.regex('Mpe?g-?2', '[hx]-?262', value='MPEG-2')
|
|
||||||
rebulk.string("DVDivX", "DivX", value="DivX")
|
|
||||||
rebulk.string('XviD', value='Xvid')
|
|
||||||
rebulk.regex('VC-?1', value='VC-1')
|
|
||||||
rebulk.string('VP7', value='VP7')
|
|
||||||
rebulk.string('VP8', 'VP80', value='VP8')
|
|
||||||
rebulk.string('VP9', value='VP9')
|
|
||||||
rebulk.regex('[hx]-?263', value='H.263')
|
|
||||||
rebulk.regex('[hx]-?264', '(MPEG-?4)?AVC(?:HD)?', value='H.264')
|
|
||||||
rebulk.regex('[hx]-?265', 'HEVC', value='H.265')
|
|
||||||
rebulk.regex('(?P<video_codec>hevc)(?P<color_depth>10)', value={'video_codec': 'H.265', 'color_depth': '10-bit'},
|
|
||||||
tags=['video-codec-suffix'], children=True)
|
|
||||||
|
|
||||||
# http://blog.mediacoderhq.com/h264-profiles-and-levels/
|
|
||||||
# https://en.wikipedia.org/wiki/H.264/MPEG-4_AVC
|
|
||||||
rebulk.defaults(clear=True,
|
|
||||||
name="video_profile",
|
|
||||||
validator=seps_surround,
|
|
||||||
disabled=lambda context: is_disabled(context, 'video_profile'))
|
|
||||||
|
|
||||||
rebulk.string('BP', value='Baseline', tags='video_profile.rule')
|
|
||||||
rebulk.string('XP', 'EP', value='Extended', tags='video_profile.rule')
|
|
||||||
rebulk.string('MP', value='Main', tags='video_profile.rule')
|
|
||||||
rebulk.string('HP', 'HiP', value='High', tags='video_profile.rule')
|
|
||||||
|
|
||||||
# https://en.wikipedia.org/wiki/Scalable_Video_Coding
|
|
||||||
rebulk.string('SC', 'SVC', value='Scalable Video Coding', tags='video_profile.rule')
|
|
||||||
# https://en.wikipedia.org/wiki/AVCHD
|
|
||||||
rebulk.regex('AVC(?:HD)?', value='Advanced Video Codec High Definition', tags='video_profile.rule')
|
|
||||||
# https://en.wikipedia.org/wiki/H.265/HEVC
|
|
||||||
rebulk.string('HEVC', value='High Efficiency Video Coding', tags='video_profile.rule')
|
|
||||||
|
|
||||||
rebulk.regex('Hi422P', value='High 4:2:2')
|
|
||||||
rebulk.regex('Hi444PP', value='High 4:4:4 Predictive')
|
|
||||||
rebulk.regex('Hi10P?', value='High 10') # no profile validation is required
|
|
||||||
|
|
||||||
rebulk.string('DXVA', value='DXVA', name='video_api',
|
|
||||||
disabled=lambda context: is_disabled(context, 'video_api'))
|
|
||||||
|
|
||||||
rebulk.defaults(clear=True,
|
|
||||||
name='color_depth',
|
|
||||||
validator=seps_surround,
|
|
||||||
disabled=lambda context: is_disabled(context, 'color_depth'))
|
|
||||||
rebulk.regex('12.?bits?', value='12-bit')
|
|
||||||
rebulk.regex('10.?bits?', 'YUV420P10', 'Hi10P?', value='10-bit')
|
|
||||||
rebulk.regex('8.?bits?', value='8-bit')
|
|
||||||
|
|
||||||
rebulk.rules(ValidateVideoCodec, VideoProfileRule)
|
|
||||||
|
|
||||||
return rebulk
|
|
||||||
|
|
||||||
|
|
||||||
class ValidateVideoCodec(Rule):
|
|
||||||
"""
|
|
||||||
Validate video_codec with source property or separated
|
|
||||||
"""
|
|
||||||
priority = 64
|
|
||||||
consequence = RemoveMatch
|
|
||||||
|
|
||||||
def enabled(self, context):
|
|
||||||
return not is_disabled(context, 'video_codec')
|
|
||||||
|
|
||||||
def when(self, matches, context):
|
|
||||||
ret = []
|
|
||||||
for codec in matches.named('video_codec'):
|
|
||||||
if not seps_before(codec) and \
|
|
||||||
not matches.at_index(codec.start - 1, lambda match: 'video-codec-prefix' in match.tags):
|
|
||||||
ret.append(codec)
|
|
||||||
continue
|
|
||||||
if not seps_after(codec) and \
|
|
||||||
not matches.at_index(codec.end + 1, lambda match: 'video-codec-suffix' in match.tags):
|
|
||||||
ret.append(codec)
|
|
||||||
continue
|
|
||||||
return ret
|
|
||||||
|
|
||||||
|
|
||||||
class VideoProfileRule(Rule):
|
|
||||||
"""
|
|
||||||
Rule to validate video_profile
|
|
||||||
"""
|
|
||||||
consequence = RemoveMatch
|
|
||||||
|
|
||||||
def enabled(self, context):
|
|
||||||
return not is_disabled(context, 'video_profile')
|
|
||||||
|
|
||||||
def when(self, matches, context):
|
|
||||||
profile_list = matches.named('video_profile', lambda match: 'video_profile.rule' in match.tags)
|
|
||||||
ret = []
|
|
||||||
for profile in profile_list:
|
|
||||||
codec = matches.at_span(profile.span, lambda match: match.name == 'video_codec', 0)
|
|
||||||
if not codec:
|
|
||||||
codec = matches.previous(profile, lambda match: match.name == 'video_codec')
|
|
||||||
if not codec:
|
|
||||||
codec = matches.next(profile, lambda match: match.name == 'video_codec')
|
|
||||||
if not codec:
|
|
||||||
ret.append(profile)
|
|
||||||
return ret
|
|
||||||
@@ -1,108 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
"""
|
|
||||||
Website property.
|
|
||||||
"""
|
|
||||||
from pkg_resources import resource_stream # @UnresolvedImport
|
|
||||||
from rebulk.remodule import re
|
|
||||||
|
|
||||||
from rebulk import Rebulk, Rule, RemoveMatch
|
|
||||||
from ..common import seps
|
|
||||||
from ..common.formatters import cleanup
|
|
||||||
from ..common.pattern import is_disabled
|
|
||||||
from ..common.validators import seps_surround
|
|
||||||
from ...reutils import build_or_pattern
|
|
||||||
|
|
||||||
|
|
||||||
def website(config):
|
|
||||||
"""
|
|
||||||
Builder for rebulk object.
|
|
||||||
|
|
||||||
:param config: rule configuration
|
|
||||||
:type config: dict
|
|
||||||
:return: Created Rebulk object
|
|
||||||
:rtype: Rebulk
|
|
||||||
"""
|
|
||||||
rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'website'))
|
|
||||||
rebulk = rebulk.regex_defaults(flags=re.IGNORECASE).string_defaults(ignore_case=True)
|
|
||||||
rebulk.defaults(name="website")
|
|
||||||
|
|
||||||
with resource_stream('guessit', 'tlds-alpha-by-domain.txt') as tld_file:
|
|
||||||
tlds = [
|
|
||||||
tld.strip().decode('utf-8')
|
|
||||||
for tld in tld_file.readlines()
|
|
||||||
if b'--' not in tld
|
|
||||||
][1:] # All registered domain extension
|
|
||||||
|
|
||||||
safe_tlds = config['safe_tlds'] # For sure a website extension
|
|
||||||
safe_subdomains = config['safe_subdomains'] # For sure a website subdomain
|
|
||||||
safe_prefix = config['safe_prefixes'] # Those words before a tlds are sure
|
|
||||||
website_prefixes = config['prefixes']
|
|
||||||
|
|
||||||
rebulk.regex(r'(?:[^a-z0-9]|^)((?:'+build_or_pattern(safe_subdomains) +
|
|
||||||
r'\.)+(?:[a-z-]+\.)+(?:'+build_or_pattern(tlds) +
|
|
||||||
r'))(?:[^a-z0-9]|$)',
|
|
||||||
children=True)
|
|
||||||
rebulk.regex(r'(?:[^a-z0-9]|^)((?:'+build_or_pattern(safe_subdomains) +
|
|
||||||
r'\.)*[a-z-]+\.(?:'+build_or_pattern(safe_tlds) +
|
|
||||||
r'))(?:[^a-z0-9]|$)',
|
|
||||||
safe_subdomains=safe_subdomains, safe_tlds=safe_tlds, children=True)
|
|
||||||
rebulk.regex(r'(?:[^a-z0-9]|^)((?:'+build_or_pattern(safe_subdomains) +
|
|
||||||
r'\.)*[a-z-]+\.(?:'+build_or_pattern(safe_prefix) +
|
|
||||||
r'\.)+(?:'+build_or_pattern(tlds) +
|
|
||||||
r'))(?:[^a-z0-9]|$)',
|
|
||||||
safe_subdomains=safe_subdomains, safe_prefix=safe_prefix, tlds=tlds, children=True)
|
|
||||||
|
|
||||||
rebulk.string(*website_prefixes,
|
|
||||||
validator=seps_surround, private=True, tags=['website.prefix'])
|
|
||||||
|
|
||||||
class PreferTitleOverWebsite(Rule):
|
|
||||||
"""
|
|
||||||
If found match is more likely a title, remove website.
|
|
||||||
"""
|
|
||||||
consequence = RemoveMatch
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def valid_followers(match):
|
|
||||||
"""
|
|
||||||
Validator for next website matches
|
|
||||||
"""
|
|
||||||
return match.named('season', 'episode', 'year')
|
|
||||||
|
|
||||||
def when(self, matches, context):
|
|
||||||
to_remove = []
|
|
||||||
for website_match in matches.named('website'):
|
|
||||||
safe = False
|
|
||||||
for safe_start in safe_subdomains + safe_prefix:
|
|
||||||
if website_match.value.lower().startswith(safe_start):
|
|
||||||
safe = True
|
|
||||||
break
|
|
||||||
if not safe:
|
|
||||||
suffix = matches.next(website_match, PreferTitleOverWebsite.valid_followers, 0)
|
|
||||||
if suffix:
|
|
||||||
group = matches.markers.at_match(website_match, lambda marker: marker.name == 'group', 0)
|
|
||||||
if not group:
|
|
||||||
to_remove.append(website_match)
|
|
||||||
return to_remove
|
|
||||||
|
|
||||||
rebulk.rules(PreferTitleOverWebsite, ValidateWebsitePrefix)
|
|
||||||
|
|
||||||
return rebulk
|
|
||||||
|
|
||||||
|
|
||||||
class ValidateWebsitePrefix(Rule):
|
|
||||||
"""
|
|
||||||
Validate website prefixes
|
|
||||||
"""
|
|
||||||
priority = 64
|
|
||||||
consequence = RemoveMatch
|
|
||||||
|
|
||||||
def when(self, matches, context):
|
|
||||||
to_remove = []
|
|
||||||
for prefix in matches.tagged('website.prefix'):
|
|
||||||
website_match = matches.next(prefix, predicate=lambda match: match.name == 'website', index=0)
|
|
||||||
if (not website_match or
|
|
||||||
matches.holes(prefix.end, website_match.start,
|
|
||||||
formatter=cleanup, seps=seps, predicate=lambda match: match.value)):
|
|
||||||
to_remove.append(prefix)
|
|
||||||
return to_remove
|
|
||||||
@@ -1,341 +0,0 @@
|
|||||||
# Version 2013112900, Last Updated Fri Nov 29 07:07:01 2013 UTC
|
|
||||||
AC
|
|
||||||
AD
|
|
||||||
AE
|
|
||||||
AERO
|
|
||||||
AF
|
|
||||||
AG
|
|
||||||
AI
|
|
||||||
AL
|
|
||||||
AM
|
|
||||||
AN
|
|
||||||
AO
|
|
||||||
AQ
|
|
||||||
AR
|
|
||||||
ARPA
|
|
||||||
AS
|
|
||||||
ASIA
|
|
||||||
AT
|
|
||||||
AU
|
|
||||||
AW
|
|
||||||
AX
|
|
||||||
AZ
|
|
||||||
BA
|
|
||||||
BB
|
|
||||||
BD
|
|
||||||
BE
|
|
||||||
BF
|
|
||||||
BG
|
|
||||||
BH
|
|
||||||
BI
|
|
||||||
BIKE
|
|
||||||
BIZ
|
|
||||||
BJ
|
|
||||||
BM
|
|
||||||
BN
|
|
||||||
BO
|
|
||||||
BR
|
|
||||||
BS
|
|
||||||
BT
|
|
||||||
BV
|
|
||||||
BW
|
|
||||||
BY
|
|
||||||
BZ
|
|
||||||
CA
|
|
||||||
CAMERA
|
|
||||||
CAT
|
|
||||||
CC
|
|
||||||
CD
|
|
||||||
CF
|
|
||||||
CG
|
|
||||||
CH
|
|
||||||
CI
|
|
||||||
CK
|
|
||||||
CL
|
|
||||||
CLOTHING
|
|
||||||
CM
|
|
||||||
CN
|
|
||||||
CO
|
|
||||||
COM
|
|
||||||
CONSTRUCTION
|
|
||||||
CONTRACTORS
|
|
||||||
COOP
|
|
||||||
CR
|
|
||||||
CU
|
|
||||||
CV
|
|
||||||
CW
|
|
||||||
CX
|
|
||||||
CY
|
|
||||||
CZ
|
|
||||||
DE
|
|
||||||
DIAMONDS
|
|
||||||
DIRECTORY
|
|
||||||
DJ
|
|
||||||
DK
|
|
||||||
DM
|
|
||||||
DO
|
|
||||||
DZ
|
|
||||||
EC
|
|
||||||
EDU
|
|
||||||
EE
|
|
||||||
EG
|
|
||||||
ENTERPRISES
|
|
||||||
EQUIPMENT
|
|
||||||
ER
|
|
||||||
ES
|
|
||||||
ESTATE
|
|
||||||
ET
|
|
||||||
EU
|
|
||||||
FI
|
|
||||||
FJ
|
|
||||||
FK
|
|
||||||
FM
|
|
||||||
FO
|
|
||||||
FR
|
|
||||||
GA
|
|
||||||
GALLERY
|
|
||||||
GB
|
|
||||||
GD
|
|
||||||
GE
|
|
||||||
GF
|
|
||||||
GG
|
|
||||||
GH
|
|
||||||
GI
|
|
||||||
GL
|
|
||||||
GM
|
|
||||||
GN
|
|
||||||
GOV
|
|
||||||
GP
|
|
||||||
GQ
|
|
||||||
GR
|
|
||||||
GRAPHICS
|
|
||||||
GS
|
|
||||||
GT
|
|
||||||
GU
|
|
||||||
GURU
|
|
||||||
GW
|
|
||||||
GY
|
|
||||||
HK
|
|
||||||
HM
|
|
||||||
HN
|
|
||||||
HOLDINGS
|
|
||||||
HR
|
|
||||||
HT
|
|
||||||
HU
|
|
||||||
ID
|
|
||||||
IE
|
|
||||||
IL
|
|
||||||
IM
|
|
||||||
IN
|
|
||||||
INFO
|
|
||||||
INT
|
|
||||||
IO
|
|
||||||
IQ
|
|
||||||
IR
|
|
||||||
IS
|
|
||||||
IT
|
|
||||||
JE
|
|
||||||
JM
|
|
||||||
JO
|
|
||||||
JOBS
|
|
||||||
JP
|
|
||||||
KE
|
|
||||||
KG
|
|
||||||
KH
|
|
||||||
KI
|
|
||||||
KITCHEN
|
|
||||||
KM
|
|
||||||
KN
|
|
||||||
KP
|
|
||||||
KR
|
|
||||||
KW
|
|
||||||
KY
|
|
||||||
KZ
|
|
||||||
LA
|
|
||||||
LAND
|
|
||||||
LB
|
|
||||||
LC
|
|
||||||
LI
|
|
||||||
LIGHTING
|
|
||||||
LK
|
|
||||||
LR
|
|
||||||
LS
|
|
||||||
LT
|
|
||||||
LU
|
|
||||||
LV
|
|
||||||
LY
|
|
||||||
MA
|
|
||||||
MC
|
|
||||||
MD
|
|
||||||
ME
|
|
||||||
MG
|
|
||||||
MH
|
|
||||||
MIL
|
|
||||||
MK
|
|
||||||
ML
|
|
||||||
MM
|
|
||||||
MN
|
|
||||||
MO
|
|
||||||
MOBI
|
|
||||||
MP
|
|
||||||
MQ
|
|
||||||
MR
|
|
||||||
MS
|
|
||||||
MT
|
|
||||||
MU
|
|
||||||
MUSEUM
|
|
||||||
MV
|
|
||||||
MW
|
|
||||||
MX
|
|
||||||
MY
|
|
||||||
MZ
|
|
||||||
NA
|
|
||||||
NAME
|
|
||||||
NC
|
|
||||||
NE
|
|
||||||
NET
|
|
||||||
NF
|
|
||||||
NG
|
|
||||||
NI
|
|
||||||
NL
|
|
||||||
NO
|
|
||||||
NP
|
|
||||||
NR
|
|
||||||
NU
|
|
||||||
NZ
|
|
||||||
OM
|
|
||||||
ORG
|
|
||||||
PA
|
|
||||||
PE
|
|
||||||
PF
|
|
||||||
PG
|
|
||||||
PH
|
|
||||||
PHOTOGRAPHY
|
|
||||||
PK
|
|
||||||
PL
|
|
||||||
PLUMBING
|
|
||||||
PM
|
|
||||||
PN
|
|
||||||
POST
|
|
||||||
PR
|
|
||||||
PRO
|
|
||||||
PS
|
|
||||||
PT
|
|
||||||
PW
|
|
||||||
PY
|
|
||||||
QA
|
|
||||||
RE
|
|
||||||
RO
|
|
||||||
RS
|
|
||||||
RU
|
|
||||||
RW
|
|
||||||
SA
|
|
||||||
SB
|
|
||||||
SC
|
|
||||||
SD
|
|
||||||
SE
|
|
||||||
SEXY
|
|
||||||
SG
|
|
||||||
SH
|
|
||||||
SI
|
|
||||||
SINGLES
|
|
||||||
SJ
|
|
||||||
SK
|
|
||||||
SL
|
|
||||||
SM
|
|
||||||
SN
|
|
||||||
SO
|
|
||||||
SR
|
|
||||||
ST
|
|
||||||
SU
|
|
||||||
SV
|
|
||||||
SX
|
|
||||||
SY
|
|
||||||
SZ
|
|
||||||
TATTOO
|
|
||||||
TC
|
|
||||||
TD
|
|
||||||
TECHNOLOGY
|
|
||||||
TEL
|
|
||||||
TF
|
|
||||||
TG
|
|
||||||
TH
|
|
||||||
TIPS
|
|
||||||
TJ
|
|
||||||
TK
|
|
||||||
TL
|
|
||||||
TM
|
|
||||||
TN
|
|
||||||
TO
|
|
||||||
TODAY
|
|
||||||
TP
|
|
||||||
TR
|
|
||||||
TRAVEL
|
|
||||||
TT
|
|
||||||
TV
|
|
||||||
TW
|
|
||||||
TZ
|
|
||||||
UA
|
|
||||||
UG
|
|
||||||
UK
|
|
||||||
US
|
|
||||||
UY
|
|
||||||
UZ
|
|
||||||
VA
|
|
||||||
VC
|
|
||||||
VE
|
|
||||||
VENTURES
|
|
||||||
VG
|
|
||||||
VI
|
|
||||||
VN
|
|
||||||
VOYAGE
|
|
||||||
VU
|
|
||||||
WF
|
|
||||||
WS
|
|
||||||
XN--3E0B707E
|
|
||||||
XN--45BRJ9C
|
|
||||||
XN--80AO21A
|
|
||||||
XN--80ASEHDB
|
|
||||||
XN--80ASWG
|
|
||||||
XN--90A3AC
|
|
||||||
XN--CLCHC0EA0B2G2A9GCD
|
|
||||||
XN--FIQS8S
|
|
||||||
XN--FIQZ9S
|
|
||||||
XN--FPCRJ9C3D
|
|
||||||
XN--FZC2C9E2C
|
|
||||||
XN--GECRJ9C
|
|
||||||
XN--H2BRJ9C
|
|
||||||
XN--J1AMH
|
|
||||||
XN--J6W193G
|
|
||||||
XN--KPRW13D
|
|
||||||
XN--KPRY57D
|
|
||||||
XN--L1ACC
|
|
||||||
XN--LGBBAT1AD8J
|
|
||||||
XN--MGB9AWBF
|
|
||||||
XN--MGBA3A4F16A
|
|
||||||
XN--MGBAAM7A8H
|
|
||||||
XN--MGBAYH7GPA
|
|
||||||
XN--MGBBH1A71E
|
|
||||||
XN--MGBC0A9AZCG
|
|
||||||
XN--MGBERP4A5D4AR
|
|
||||||
XN--MGBX4CD0AB
|
|
||||||
XN--NGBC5AZD
|
|
||||||
XN--O3CW4H
|
|
||||||
XN--OGBPF8FL
|
|
||||||
XN--P1AI
|
|
||||||
XN--PGBS0DH
|
|
||||||
XN--Q9JYB4C
|
|
||||||
XN--S9BRJ9C
|
|
||||||
XN--UNUP4Y
|
|
||||||
XN--WGBH1C
|
|
||||||
XN--WGBL6A
|
|
||||||
XN--XKC2AL3HYE2A
|
|
||||||
XN--XKC2DL3A5EE0H
|
|
||||||
XN--YFRO4I67O
|
|
||||||
XN--YGBI2AMMX
|
|
||||||
XXX
|
|
||||||
YE
|
|
||||||
YT
|
|
||||||
ZA
|
|
||||||
ZM
|
|
||||||
ZW
|
|
||||||
@@ -1,81 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
"""
|
|
||||||
Options
|
|
||||||
"""
|
|
||||||
|
|
||||||
try:
|
|
||||||
from collections import OrderedDict
|
|
||||||
except ImportError: # pragma: no-cover
|
|
||||||
from ordereddict import OrderedDict # pylint:disable=import-error
|
|
||||||
import babelfish
|
|
||||||
|
|
||||||
import yaml # pylint:disable=wrong-import-order
|
|
||||||
|
|
||||||
from .rules.common.quantity import BitRate, FrameRate, Size
|
|
||||||
|
|
||||||
|
|
||||||
class OrderedDictYAMLLoader(yaml.Loader):
|
|
||||||
"""
|
|
||||||
A YAML loader that loads mappings into ordered dictionaries.
|
|
||||||
From https://gist.github.com/enaeseth/844388
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self, *args, **kwargs):
|
|
||||||
yaml.Loader.__init__(self, *args, **kwargs)
|
|
||||||
|
|
||||||
self.add_constructor(u'tag:yaml.org,2002:map', type(self).construct_yaml_map)
|
|
||||||
self.add_constructor(u'tag:yaml.org,2002:omap', type(self).construct_yaml_map)
|
|
||||||
|
|
||||||
def construct_yaml_map(self, node):
|
|
||||||
data = OrderedDict()
|
|
||||||
yield data
|
|
||||||
value = self.construct_mapping(node)
|
|
||||||
data.update(value)
|
|
||||||
|
|
||||||
def construct_mapping(self, node, deep=False):
|
|
||||||
if isinstance(node, yaml.MappingNode):
|
|
||||||
self.flatten_mapping(node)
|
|
||||||
else: # pragma: no cover
|
|
||||||
raise yaml.constructor.ConstructorError(None, None,
|
|
||||||
'expected a mapping node, but found %s' % node.id, node.start_mark)
|
|
||||||
|
|
||||||
mapping = OrderedDict()
|
|
||||||
for key_node, value_node in node.value:
|
|
||||||
key = self.construct_object(key_node, deep=deep)
|
|
||||||
try:
|
|
||||||
hash(key)
|
|
||||||
except TypeError as exc: # pragma: no cover
|
|
||||||
raise yaml.constructor.ConstructorError('while constructing a mapping',
|
|
||||||
node.start_mark, 'found unacceptable key (%s)'
|
|
||||||
% exc, key_node.start_mark)
|
|
||||||
value = self.construct_object(value_node, deep=deep)
|
|
||||||
mapping[key] = value
|
|
||||||
return mapping
|
|
||||||
|
|
||||||
|
|
||||||
class CustomDumper(yaml.SafeDumper):
|
|
||||||
"""
|
|
||||||
Custom YAML Dumper.
|
|
||||||
"""
|
|
||||||
pass # pylint:disable=unnecessary-pass
|
|
||||||
|
|
||||||
|
|
||||||
def default_representer(dumper, data):
|
|
||||||
"""Default representer"""
|
|
||||||
return dumper.represent_str(str(data))
|
|
||||||
|
|
||||||
|
|
||||||
CustomDumper.add_representer(babelfish.Language, default_representer)
|
|
||||||
CustomDumper.add_representer(babelfish.Country, default_representer)
|
|
||||||
CustomDumper.add_representer(BitRate, default_representer)
|
|
||||||
CustomDumper.add_representer(FrameRate, default_representer)
|
|
||||||
CustomDumper.add_representer(Size, default_representer)
|
|
||||||
|
|
||||||
|
|
||||||
def ordered_dict_representer(dumper, data):
|
|
||||||
"""OrderedDict representer"""
|
|
||||||
return dumper.represent_mapping('tag:yaml.org,2002:map', data.items())
|
|
||||||
|
|
||||||
|
|
||||||
CustomDumper.add_representer(OrderedDict, ordered_dict_representer)
|
|
||||||
@@ -1,10 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
"""
|
|
||||||
Define simple search patterns in bulk to perform advanced matching on any string.
|
|
||||||
"""
|
|
||||||
# pylint:disable=import-self
|
|
||||||
from .rebulk import Rebulk
|
|
||||||
from .rules import Rule, CustomRule, AppendMatch, RemoveMatch, RenameMatch, AppendTags, RemoveTags
|
|
||||||
from .processors import ConflictSolver, PrivateRemover, POST_PROCESS, PRE_PROCESS
|
|
||||||
from .pattern import REGEX_AVAILABLE
|
|
||||||
@@ -1,7 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
"""
|
|
||||||
Version module
|
|
||||||
"""
|
|
||||||
# pragma: no cover
|
|
||||||
__version__ = '2.0.1.dev0'
|
|
||||||
@@ -1,217 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
"""
|
|
||||||
Base builder class for Rebulk
|
|
||||||
"""
|
|
||||||
from abc import ABCMeta, abstractmethod
|
|
||||||
from copy import deepcopy
|
|
||||||
from logging import getLogger
|
|
||||||
|
|
||||||
from six import add_metaclass
|
|
||||||
|
|
||||||
from .loose import set_defaults
|
|
||||||
from .pattern import RePattern, StringPattern, FunctionalPattern
|
|
||||||
|
|
||||||
log = getLogger(__name__).log
|
|
||||||
|
|
||||||
|
|
||||||
@add_metaclass(ABCMeta)
|
|
||||||
class Builder(object):
|
|
||||||
"""
|
|
||||||
Base builder class for patterns
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self):
|
|
||||||
self._defaults = {}
|
|
||||||
self._regex_defaults = {}
|
|
||||||
self._string_defaults = {}
|
|
||||||
self._functional_defaults = {}
|
|
||||||
self._chain_defaults = {}
|
|
||||||
|
|
||||||
def reset(self):
|
|
||||||
"""
|
|
||||||
Reset all defaults.
|
|
||||||
|
|
||||||
:return:
|
|
||||||
"""
|
|
||||||
self.__init__()
|
|
||||||
|
|
||||||
def defaults(self, **kwargs):
|
|
||||||
"""
|
|
||||||
Define default keyword arguments for all patterns
|
|
||||||
:param kwargs:
|
|
||||||
:type kwargs:
|
|
||||||
:return:
|
|
||||||
:rtype:
|
|
||||||
"""
|
|
||||||
set_defaults(kwargs, self._defaults, override=True)
|
|
||||||
return self
|
|
||||||
|
|
||||||
def regex_defaults(self, **kwargs):
|
|
||||||
"""
|
|
||||||
Define default keyword arguments for functional patterns.
|
|
||||||
:param kwargs:
|
|
||||||
:type kwargs:
|
|
||||||
:return:
|
|
||||||
:rtype:
|
|
||||||
"""
|
|
||||||
set_defaults(kwargs, self._regex_defaults, override=True)
|
|
||||||
return self
|
|
||||||
|
|
||||||
def string_defaults(self, **kwargs):
|
|
||||||
"""
|
|
||||||
Define default keyword arguments for string patterns.
|
|
||||||
:param kwargs:
|
|
||||||
:type kwargs:
|
|
||||||
:return:
|
|
||||||
:rtype:
|
|
||||||
"""
|
|
||||||
set_defaults(kwargs, self._string_defaults, override=True)
|
|
||||||
return self
|
|
||||||
|
|
||||||
def functional_defaults(self, **kwargs):
|
|
||||||
"""
|
|
||||||
Define default keyword arguments for functional patterns.
|
|
||||||
:param kwargs:
|
|
||||||
:type kwargs:
|
|
||||||
:return:
|
|
||||||
:rtype:
|
|
||||||
"""
|
|
||||||
set_defaults(kwargs, self._functional_defaults, override=True)
|
|
||||||
return self
|
|
||||||
|
|
||||||
def chain_defaults(self, **kwargs):
|
|
||||||
"""
|
|
||||||
Define default keyword arguments for patterns chain.
|
|
||||||
:param kwargs:
|
|
||||||
:type kwargs:
|
|
||||||
:return:
|
|
||||||
:rtype:
|
|
||||||
"""
|
|
||||||
set_defaults(kwargs, self._chain_defaults, override=True)
|
|
||||||
return self
|
|
||||||
|
|
||||||
def build_re(self, *pattern, **kwargs):
|
|
||||||
"""
|
|
||||||
Builds a new regular expression pattern
|
|
||||||
|
|
||||||
:param pattern:
|
|
||||||
:type pattern:
|
|
||||||
:param kwargs:
|
|
||||||
:type kwargs:
|
|
||||||
:return:
|
|
||||||
:rtype:
|
|
||||||
"""
|
|
||||||
set_defaults(self._regex_defaults, kwargs)
|
|
||||||
set_defaults(self._defaults, kwargs)
|
|
||||||
return RePattern(*pattern, **kwargs)
|
|
||||||
|
|
||||||
def build_string(self, *pattern, **kwargs):
|
|
||||||
"""
|
|
||||||
Builds a new string pattern
|
|
||||||
|
|
||||||
:param pattern:
|
|
||||||
:type pattern:
|
|
||||||
:param kwargs:
|
|
||||||
:type kwargs:
|
|
||||||
:return:
|
|
||||||
:rtype:
|
|
||||||
"""
|
|
||||||
set_defaults(self._string_defaults, kwargs)
|
|
||||||
set_defaults(self._defaults, kwargs)
|
|
||||||
return StringPattern(*pattern, **kwargs)
|
|
||||||
|
|
||||||
def build_functional(self, *pattern, **kwargs):
|
|
||||||
"""
|
|
||||||
Builds a new functional pattern
|
|
||||||
|
|
||||||
:param pattern:
|
|
||||||
:type pattern:
|
|
||||||
:param kwargs:
|
|
||||||
:type kwargs:
|
|
||||||
:return:
|
|
||||||
:rtype:
|
|
||||||
"""
|
|
||||||
set_defaults(self._functional_defaults, kwargs)
|
|
||||||
set_defaults(self._defaults, kwargs)
|
|
||||||
return FunctionalPattern(*pattern, **kwargs)
|
|
||||||
|
|
||||||
def build_chain(self, **kwargs):
|
|
||||||
"""
|
|
||||||
Builds a new patterns chain
|
|
||||||
|
|
||||||
:param pattern:
|
|
||||||
:type pattern:
|
|
||||||
:param kwargs:
|
|
||||||
:type kwargs:
|
|
||||||
:return:
|
|
||||||
:rtype:
|
|
||||||
"""
|
|
||||||
from .chain import Chain
|
|
||||||
set_defaults(self._chain_defaults, kwargs)
|
|
||||||
set_defaults(self._defaults, kwargs)
|
|
||||||
chain = Chain(self, **kwargs)
|
|
||||||
chain._defaults = deepcopy(self._defaults) # pylint: disable=protected-access
|
|
||||||
chain._regex_defaults = deepcopy(self._regex_defaults) # pylint: disable=protected-access
|
|
||||||
chain._functional_defaults = deepcopy(self._functional_defaults) # pylint: disable=protected-access
|
|
||||||
chain._string_defaults = deepcopy(self._string_defaults) # pylint: disable=protected-access
|
|
||||||
chain._chain_defaults = deepcopy(self._chain_defaults) # pylint: disable=protected-access
|
|
||||||
return chain
|
|
||||||
|
|
||||||
@abstractmethod
|
|
||||||
def pattern(self, *pattern):
|
|
||||||
"""
|
|
||||||
Register a list of Pattern instance
|
|
||||||
:param pattern:
|
|
||||||
:return:
|
|
||||||
"""
|
|
||||||
pass
|
|
||||||
|
|
||||||
def regex(self, *pattern, **kwargs):
|
|
||||||
"""
|
|
||||||
Add re pattern
|
|
||||||
|
|
||||||
:param pattern:
|
|
||||||
:type pattern:
|
|
||||||
:return: self
|
|
||||||
:rtype: Rebulk
|
|
||||||
"""
|
|
||||||
return self.pattern(self.build_re(*pattern, **kwargs))
|
|
||||||
|
|
||||||
def string(self, *pattern, **kwargs):
|
|
||||||
"""
|
|
||||||
Add string pattern
|
|
||||||
|
|
||||||
:param pattern:
|
|
||||||
:type pattern:
|
|
||||||
:return: self
|
|
||||||
:rtype: Rebulk
|
|
||||||
"""
|
|
||||||
return self.pattern(self.build_string(*pattern, **kwargs))
|
|
||||||
|
|
||||||
def functional(self, *pattern, **kwargs):
|
|
||||||
"""
|
|
||||||
Add functional pattern
|
|
||||||
|
|
||||||
:param pattern:
|
|
||||||
:type pattern:
|
|
||||||
:return: self
|
|
||||||
:rtype: Rebulk
|
|
||||||
"""
|
|
||||||
functional = self.build_functional(*pattern, **kwargs)
|
|
||||||
return self.pattern(functional)
|
|
||||||
|
|
||||||
def chain(self, **kwargs):
|
|
||||||
"""
|
|
||||||
Add patterns chain, using configuration of this rebulk
|
|
||||||
|
|
||||||
:param pattern:
|
|
||||||
:type pattern:
|
|
||||||
:param kwargs:
|
|
||||||
:type kwargs:
|
|
||||||
:return:
|
|
||||||
:rtype:
|
|
||||||
"""
|
|
||||||
chain = self.build_chain(**kwargs)
|
|
||||||
self.pattern(chain)
|
|
||||||
return chain
|
|
||||||
@@ -1,380 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
"""
|
|
||||||
Chain patterns and handle repetiting capture group
|
|
||||||
"""
|
|
||||||
# pylint: disable=super-init-not-called
|
|
||||||
import itertools
|
|
||||||
|
|
||||||
from .builder import Builder
|
|
||||||
from .loose import call
|
|
||||||
from .match import Match, Matches
|
|
||||||
from .pattern import Pattern, filter_match_kwargs, BasePattern
|
|
||||||
from .remodule import re
|
|
||||||
|
|
||||||
|
|
||||||
class _InvalidChainException(Exception):
|
|
||||||
"""
|
|
||||||
Internal exception raised when a chain is not valid
|
|
||||||
"""
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
class Chain(Pattern, Builder):
|
|
||||||
"""
|
|
||||||
Definition of a pattern chain to search for.
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self, parent, chain_breaker=None, **kwargs):
|
|
||||||
Builder.__init__(self)
|
|
||||||
call(Pattern.__init__, self, **kwargs)
|
|
||||||
self._kwargs = kwargs
|
|
||||||
self._match_kwargs = filter_match_kwargs(kwargs)
|
|
||||||
if callable(chain_breaker):
|
|
||||||
self.chain_breaker = chain_breaker
|
|
||||||
else:
|
|
||||||
self.chain_breaker = None
|
|
||||||
self.parent = parent
|
|
||||||
self.parts = []
|
|
||||||
|
|
||||||
def pattern(self, *pattern):
|
|
||||||
"""
|
|
||||||
|
|
||||||
:param pattern:
|
|
||||||
:return:
|
|
||||||
"""
|
|
||||||
if not pattern:
|
|
||||||
raise ValueError("One pattern should be given to the chain")
|
|
||||||
if len(pattern) > 1:
|
|
||||||
raise ValueError("Only one pattern can be given to the chain")
|
|
||||||
part = ChainPart(self, pattern[0])
|
|
||||||
self.parts.append(part)
|
|
||||||
return part
|
|
||||||
|
|
||||||
def close(self):
|
|
||||||
"""
|
|
||||||
Deeply close the chain
|
|
||||||
:return: Rebulk instance
|
|
||||||
"""
|
|
||||||
parent = self.parent
|
|
||||||
while isinstance(parent, Chain):
|
|
||||||
parent = parent.parent
|
|
||||||
return parent
|
|
||||||
|
|
||||||
def _match(self, pattern, input_string, context=None):
|
|
||||||
# pylint: disable=too-many-locals,too-many-nested-blocks
|
|
||||||
chain_matches = []
|
|
||||||
chain_input_string = input_string
|
|
||||||
offset = 0
|
|
||||||
while offset < len(input_string):
|
|
||||||
chain_found = False
|
|
||||||
current_chain_matches = []
|
|
||||||
valid_chain = True
|
|
||||||
for chain_part in self.parts:
|
|
||||||
try:
|
|
||||||
chain_part_matches, raw_chain_part_matches = chain_part.matches(chain_input_string,
|
|
||||||
context,
|
|
||||||
with_raw_matches=True)
|
|
||||||
|
|
||||||
chain_found, chain_input_string, offset = \
|
|
||||||
self._to_next_chain_part(chain_part, chain_part_matches, raw_chain_part_matches, chain_found,
|
|
||||||
input_string, chain_input_string, offset, current_chain_matches)
|
|
||||||
except _InvalidChainException:
|
|
||||||
valid_chain = False
|
|
||||||
if current_chain_matches:
|
|
||||||
offset = current_chain_matches[0].raw_end
|
|
||||||
break
|
|
||||||
if not chain_found:
|
|
||||||
break
|
|
||||||
if current_chain_matches and valid_chain:
|
|
||||||
match = self._build_chain_match(current_chain_matches, input_string)
|
|
||||||
chain_matches.append(match)
|
|
||||||
|
|
||||||
return chain_matches
|
|
||||||
|
|
||||||
def _to_next_chain_part(self, chain_part, chain_part_matches, raw_chain_part_matches, chain_found,
|
|
||||||
input_string, chain_input_string, offset, current_chain_matches):
|
|
||||||
Chain._fix_matches_offset(chain_part_matches, input_string, offset)
|
|
||||||
Chain._fix_matches_offset(raw_chain_part_matches, input_string, offset)
|
|
||||||
|
|
||||||
if raw_chain_part_matches:
|
|
||||||
grouped_matches_dict = self._group_by_match_index(chain_part_matches)
|
|
||||||
grouped_raw_matches_dict = self._group_by_match_index(raw_chain_part_matches)
|
|
||||||
|
|
||||||
for match_index, grouped_raw_matches in grouped_raw_matches_dict.items():
|
|
||||||
chain_found = True
|
|
||||||
offset = grouped_raw_matches[-1].raw_end
|
|
||||||
chain_input_string = input_string[offset:]
|
|
||||||
|
|
||||||
if not chain_part.is_hidden:
|
|
||||||
grouped_matches = grouped_matches_dict.get(match_index, [])
|
|
||||||
if self._chain_breaker_eval(current_chain_matches + grouped_matches):
|
|
||||||
current_chain_matches.extend(grouped_matches)
|
|
||||||
return chain_found, chain_input_string, offset
|
|
||||||
|
|
||||||
def _process_match(self, match, match_index, child=False):
|
|
||||||
"""
|
|
||||||
Handle a match
|
|
||||||
:param match:
|
|
||||||
:type match:
|
|
||||||
:param match_index:
|
|
||||||
:type match_index:
|
|
||||||
:param child:
|
|
||||||
:type child:
|
|
||||||
:return:
|
|
||||||
:rtype:
|
|
||||||
"""
|
|
||||||
# pylint: disable=too-many-locals
|
|
||||||
ret = super(Chain, self)._process_match(match, match_index, child=child)
|
|
||||||
if ret:
|
|
||||||
return True
|
|
||||||
|
|
||||||
if match.children:
|
|
||||||
last_pattern = match.children[-1].pattern
|
|
||||||
last_pattern_groups = self._group_by_match_index(
|
|
||||||
[child_ for child_ in match.children if child_.pattern == last_pattern]
|
|
||||||
)
|
|
||||||
|
|
||||||
if last_pattern_groups:
|
|
||||||
original_children = Matches(match.children)
|
|
||||||
original_end = match.end
|
|
||||||
|
|
||||||
for index in reversed(list(last_pattern_groups)):
|
|
||||||
last_matches = last_pattern_groups[index]
|
|
||||||
for last_match in last_matches:
|
|
||||||
match.children.remove(last_match)
|
|
||||||
match.end = match.children[-1].end if match.children else match.start
|
|
||||||
ret = super(Chain, self)._process_match(match, match_index, child=child)
|
|
||||||
if ret:
|
|
||||||
return True
|
|
||||||
|
|
||||||
match.children = original_children
|
|
||||||
match.end = original_end
|
|
||||||
|
|
||||||
return False
|
|
||||||
|
|
||||||
def _build_chain_match(self, current_chain_matches, input_string):
|
|
||||||
start = None
|
|
||||||
end = None
|
|
||||||
for match in current_chain_matches:
|
|
||||||
if start is None or start > match.start:
|
|
||||||
start = match.start
|
|
||||||
if end is None or end < match.end:
|
|
||||||
end = match.end
|
|
||||||
match = call(Match, start, end, pattern=self, input_string=input_string, **self._match_kwargs)
|
|
||||||
for chain_match in current_chain_matches:
|
|
||||||
if chain_match.children:
|
|
||||||
for child in chain_match.children:
|
|
||||||
match.children.append(child)
|
|
||||||
if chain_match not in match.children:
|
|
||||||
match.children.append(chain_match)
|
|
||||||
chain_match.parent = match
|
|
||||||
return match
|
|
||||||
|
|
||||||
def _chain_breaker_eval(self, matches):
|
|
||||||
return not self.chain_breaker or not self.chain_breaker(Matches(matches))
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def _fix_matches_offset(chain_part_matches, input_string, offset):
|
|
||||||
for chain_part_match in chain_part_matches:
|
|
||||||
if chain_part_match.input_string != input_string:
|
|
||||||
chain_part_match.input_string = input_string
|
|
||||||
chain_part_match.end += offset
|
|
||||||
chain_part_match.start += offset
|
|
||||||
if chain_part_match.children:
|
|
||||||
Chain._fix_matches_offset(chain_part_match.children, input_string, offset)
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def _group_by_match_index(matches):
|
|
||||||
grouped_matches_dict = dict()
|
|
||||||
for match_index, match in itertools.groupby(matches, lambda m: m.match_index):
|
|
||||||
grouped_matches_dict[match_index] = list(match)
|
|
||||||
return grouped_matches_dict
|
|
||||||
|
|
||||||
@property
|
|
||||||
def match_options(self):
|
|
||||||
return {}
|
|
||||||
|
|
||||||
@property
|
|
||||||
def patterns(self):
|
|
||||||
return [self]
|
|
||||||
|
|
||||||
def __repr__(self):
|
|
||||||
defined = ""
|
|
||||||
if self.defined_at:
|
|
||||||
defined = "@%s" % (self.defined_at,)
|
|
||||||
return "<%s%s:%s>" % (self.__class__.__name__, defined, self.parts)
|
|
||||||
|
|
||||||
|
|
||||||
class ChainPart(BasePattern):
|
|
||||||
"""
|
|
||||||
Part of a pattern chain.
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self, chain, pattern):
|
|
||||||
self._chain = chain
|
|
||||||
self.pattern = pattern
|
|
||||||
self.repeater_start = 1
|
|
||||||
self.repeater_end = 1
|
|
||||||
self._hidden = False
|
|
||||||
|
|
||||||
@property
|
|
||||||
def _is_chain_start(self):
|
|
||||||
return self._chain.parts[0] == self
|
|
||||||
|
|
||||||
def matches(self, input_string, context=None, with_raw_matches=False):
|
|
||||||
matches, raw_matches = self.pattern.matches(input_string, context=context, with_raw_matches=True)
|
|
||||||
|
|
||||||
matches = self._truncate_repeater(matches, input_string)
|
|
||||||
raw_matches = self._truncate_repeater(raw_matches, input_string)
|
|
||||||
|
|
||||||
self._validate_repeater(raw_matches)
|
|
||||||
|
|
||||||
if with_raw_matches:
|
|
||||||
return matches, raw_matches
|
|
||||||
|
|
||||||
return matches
|
|
||||||
|
|
||||||
def _truncate_repeater(self, matches, input_string):
|
|
||||||
if not matches:
|
|
||||||
return matches
|
|
||||||
|
|
||||||
if not self._is_chain_start:
|
|
||||||
separator = input_string[0:matches[0].initiator.raw_start]
|
|
||||||
if separator:
|
|
||||||
return []
|
|
||||||
|
|
||||||
j = 1
|
|
||||||
for i in range(0, len(matches) - 1):
|
|
||||||
separator = input_string[matches[i].initiator.raw_end:
|
|
||||||
matches[i + 1].initiator.raw_start]
|
|
||||||
if separator:
|
|
||||||
break
|
|
||||||
j += 1
|
|
||||||
truncated = matches[:j]
|
|
||||||
if self.repeater_end is not None:
|
|
||||||
truncated = [m for m in truncated if m.match_index < self.repeater_end]
|
|
||||||
return truncated
|
|
||||||
|
|
||||||
def _validate_repeater(self, matches):
|
|
||||||
max_match_index = -1
|
|
||||||
if matches:
|
|
||||||
max_match_index = max([m.match_index for m in matches])
|
|
||||||
if max_match_index + 1 < self.repeater_start:
|
|
||||||
raise _InvalidChainException
|
|
||||||
|
|
||||||
def chain(self):
|
|
||||||
"""
|
|
||||||
Add patterns chain, using configuration from this chain
|
|
||||||
|
|
||||||
:return:
|
|
||||||
:rtype:
|
|
||||||
"""
|
|
||||||
return self._chain.chain()
|
|
||||||
|
|
||||||
def hidden(self, hidden=True):
|
|
||||||
"""
|
|
||||||
Hide chain part results from global chain result
|
|
||||||
|
|
||||||
:param hidden:
|
|
||||||
:type hidden:
|
|
||||||
:return:
|
|
||||||
:rtype:
|
|
||||||
"""
|
|
||||||
self._hidden = hidden
|
|
||||||
return self
|
|
||||||
|
|
||||||
@property
|
|
||||||
def is_hidden(self):
|
|
||||||
"""
|
|
||||||
Check if the chain part is hidden
|
|
||||||
:return:
|
|
||||||
:rtype:
|
|
||||||
"""
|
|
||||||
return self._hidden
|
|
||||||
|
|
||||||
def regex(self, *pattern, **kwargs):
|
|
||||||
"""
|
|
||||||
Add re pattern
|
|
||||||
|
|
||||||
:param pattern:
|
|
||||||
:type pattern:
|
|
||||||
:param kwargs:
|
|
||||||
:type kwargs:
|
|
||||||
:return:
|
|
||||||
:rtype:
|
|
||||||
"""
|
|
||||||
return self._chain.regex(*pattern, **kwargs)
|
|
||||||
|
|
||||||
def functional(self, *pattern, **kwargs):
|
|
||||||
"""
|
|
||||||
Add functional pattern
|
|
||||||
|
|
||||||
:param pattern:
|
|
||||||
:type pattern:
|
|
||||||
:param kwargs:
|
|
||||||
:type kwargs:
|
|
||||||
:return:
|
|
||||||
:rtype:
|
|
||||||
"""
|
|
||||||
return self._chain.functional(*pattern, **kwargs)
|
|
||||||
|
|
||||||
def string(self, *pattern, **kwargs):
|
|
||||||
"""
|
|
||||||
Add string pattern
|
|
||||||
|
|
||||||
:param pattern:
|
|
||||||
:type pattern:
|
|
||||||
:param kwargs:
|
|
||||||
:type kwargs:
|
|
||||||
:return:
|
|
||||||
:rtype:
|
|
||||||
"""
|
|
||||||
return self._chain.string(*pattern, **kwargs)
|
|
||||||
|
|
||||||
def close(self):
|
|
||||||
"""
|
|
||||||
Close the chain builder to continue registering other patterns
|
|
||||||
|
|
||||||
:return:
|
|
||||||
:rtype:
|
|
||||||
"""
|
|
||||||
return self._chain.close()
|
|
||||||
|
|
||||||
def repeater(self, value):
|
|
||||||
"""
|
|
||||||
Define the repeater of the current chain part.
|
|
||||||
|
|
||||||
:param value:
|
|
||||||
:type value:
|
|
||||||
:return:
|
|
||||||
:rtype:
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
value = int(value)
|
|
||||||
self.repeater_start = value
|
|
||||||
self.repeater_end = value
|
|
||||||
return self
|
|
||||||
except ValueError:
|
|
||||||
pass
|
|
||||||
if value == '+':
|
|
||||||
self.repeater_start = 1
|
|
||||||
self.repeater_end = None
|
|
||||||
if value == '*':
|
|
||||||
self.repeater_start = 0
|
|
||||||
self.repeater_end = None
|
|
||||||
elif value == '?':
|
|
||||||
self.repeater_start = 0
|
|
||||||
self.repeater_end = 1
|
|
||||||
else:
|
|
||||||
match = re.match(r'\{\s*(\d*)\s*,?\s*(\d*)\s*\}', value)
|
|
||||||
if match:
|
|
||||||
start = match.group(1)
|
|
||||||
end = match.group(2)
|
|
||||||
if start or end:
|
|
||||||
self.repeater_start = int(start) if start else 0
|
|
||||||
self.repeater_end = int(end) if end else None
|
|
||||||
return self
|
|
||||||
|
|
||||||
def __repr__(self):
|
|
||||||
return "%s({%s,%s})" % (self.pattern, self.repeater_start, self.repeater_end)
|
|
||||||
@@ -1,56 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
"""
|
|
||||||
Debug tools.
|
|
||||||
|
|
||||||
Can be configured by changing values of those variable.
|
|
||||||
|
|
||||||
DEBUG = False
|
|
||||||
Enable this variable to activate debug features (like defined_at parameters). It can slow down Rebulk
|
|
||||||
|
|
||||||
LOG_LEVEL = 0
|
|
||||||
Default log level of generated rebulk logs.
|
|
||||||
"""
|
|
||||||
|
|
||||||
import inspect
|
|
||||||
import logging
|
|
||||||
import os
|
|
||||||
from collections import namedtuple
|
|
||||||
|
|
||||||
|
|
||||||
DEBUG = False
|
|
||||||
LOG_LEVEL = logging.DEBUG
|
|
||||||
|
|
||||||
|
|
||||||
class Frame(namedtuple('Frame', ['lineno', 'package', 'name', 'filename'])):
|
|
||||||
"""
|
|
||||||
Stack frame representation.
|
|
||||||
"""
|
|
||||||
__slots__ = ()
|
|
||||||
|
|
||||||
def __repr__(self):
|
|
||||||
return "%s#L%s" % (os.path.basename(self.filename), self.lineno)
|
|
||||||
|
|
||||||
|
|
||||||
def defined_at():
|
|
||||||
"""
|
|
||||||
Get definition location of a pattern or a match (outside of rebulk package).
|
|
||||||
:return:
|
|
||||||
:rtype:
|
|
||||||
"""
|
|
||||||
if DEBUG:
|
|
||||||
frame = inspect.currentframe()
|
|
||||||
while frame:
|
|
||||||
try:
|
|
||||||
if frame.f_globals['__package__'] != __package__:
|
|
||||||
break
|
|
||||||
except KeyError: # pragma:no cover
|
|
||||||
# If package is missing, consider we are in. Workaround for python 3.3.
|
|
||||||
break
|
|
||||||
frame = frame.f_back
|
|
||||||
ret = Frame(frame.f_lineno,
|
|
||||||
frame.f_globals.get('__package__'),
|
|
||||||
frame.f_globals.get('__name__'),
|
|
||||||
frame.f_code.co_filename)
|
|
||||||
del frame
|
|
||||||
return ret
|
|
||||||
@@ -1,33 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
"""
|
|
||||||
Formatter functions to use in patterns.
|
|
||||||
|
|
||||||
All those function have last argument as match.value (str).
|
|
||||||
"""
|
|
||||||
|
|
||||||
|
|
||||||
def formatters(*chained_formatters):
|
|
||||||
"""
|
|
||||||
Chain formatter functions.
|
|
||||||
:param chained_formatters:
|
|
||||||
:type chained_formatters:
|
|
||||||
:return:
|
|
||||||
:rtype:
|
|
||||||
"""
|
|
||||||
|
|
||||||
def formatters_chain(input_string): # pylint:disable=missing-docstring
|
|
||||||
for chained_formatter in chained_formatters:
|
|
||||||
input_string = chained_formatter(input_string)
|
|
||||||
return input_string
|
|
||||||
|
|
||||||
return formatters_chain
|
|
||||||
|
|
||||||
|
|
||||||
def default_formatter(input_string):
|
|
||||||
"""
|
|
||||||
Default formatter
|
|
||||||
:param input_string:
|
|
||||||
:return:
|
|
||||||
"""
|
|
||||||
return input_string
|
|
||||||
@@ -1,127 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
"""
|
|
||||||
Introspect rebulk object to retrieve capabilities.
|
|
||||||
"""
|
|
||||||
from abc import ABCMeta, abstractmethod
|
|
||||||
from collections import defaultdict
|
|
||||||
|
|
||||||
import six
|
|
||||||
from .pattern import StringPattern, RePattern, FunctionalPattern
|
|
||||||
from .utils import extend_safe
|
|
||||||
|
|
||||||
|
|
||||||
@six.add_metaclass(ABCMeta)
|
|
||||||
class Description(object):
|
|
||||||
"""
|
|
||||||
Abstract class for a description.
|
|
||||||
"""
|
|
||||||
@property
|
|
||||||
@abstractmethod
|
|
||||||
def properties(self): # pragma: no cover
|
|
||||||
"""
|
|
||||||
Properties of described object.
|
|
||||||
:return: all properties that described object can generate grouped by name.
|
|
||||||
:rtype: dict
|
|
||||||
"""
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
class PatternDescription(Description):
|
|
||||||
"""
|
|
||||||
Description of a pattern.
|
|
||||||
"""
|
|
||||||
def __init__(self, pattern): # pylint:disable=too-many-branches
|
|
||||||
self.pattern = pattern
|
|
||||||
self._properties = defaultdict(list)
|
|
||||||
|
|
||||||
if pattern.properties:
|
|
||||||
for key, values in pattern.properties.items():
|
|
||||||
extend_safe(self._properties[key], values)
|
|
||||||
elif 'value' in pattern.match_options:
|
|
||||||
self._properties[pattern.name].append(pattern.match_options['value'])
|
|
||||||
elif isinstance(pattern, StringPattern):
|
|
||||||
extend_safe(self._properties[pattern.name], pattern.patterns)
|
|
||||||
elif isinstance(pattern, RePattern):
|
|
||||||
if pattern.name and pattern.name not in pattern.private_names:
|
|
||||||
extend_safe(self._properties[pattern.name], [None])
|
|
||||||
if not pattern.private_children:
|
|
||||||
for regex_pattern in pattern.patterns:
|
|
||||||
for group_name, values in regex_pattern.groupindex.items():
|
|
||||||
if group_name not in pattern.private_names:
|
|
||||||
extend_safe(self._properties[group_name], [None])
|
|
||||||
elif isinstance(pattern, FunctionalPattern):
|
|
||||||
if pattern.name and pattern.name not in pattern.private_names:
|
|
||||||
extend_safe(self._properties[pattern.name], [None])
|
|
||||||
|
|
||||||
|
|
||||||
@property
|
|
||||||
def properties(self):
|
|
||||||
"""
|
|
||||||
Properties for this rule.
|
|
||||||
:return:
|
|
||||||
:rtype: dict
|
|
||||||
"""
|
|
||||||
return self._properties
|
|
||||||
|
|
||||||
|
|
||||||
class RuleDescription(Description):
|
|
||||||
"""
|
|
||||||
Description of a rule.
|
|
||||||
"""
|
|
||||||
def __init__(self, rule):
|
|
||||||
self.rule = rule
|
|
||||||
|
|
||||||
self._properties = defaultdict(list)
|
|
||||||
|
|
||||||
if rule.properties:
|
|
||||||
for key, values in rule.properties.items():
|
|
||||||
extend_safe(self._properties[key], values)
|
|
||||||
|
|
||||||
@property
|
|
||||||
def properties(self):
|
|
||||||
"""
|
|
||||||
Properties for this rule.
|
|
||||||
:return:
|
|
||||||
:rtype: dict
|
|
||||||
"""
|
|
||||||
return self._properties
|
|
||||||
|
|
||||||
|
|
||||||
class Introspection(Description):
|
|
||||||
"""
|
|
||||||
Introspection results.
|
|
||||||
"""
|
|
||||||
def __init__(self, rebulk, context=None):
|
|
||||||
self.patterns = [PatternDescription(pattern) for pattern in rebulk.effective_patterns(context)
|
|
||||||
if not pattern.private and not pattern.marker]
|
|
||||||
self.rules = [RuleDescription(rule) for rule in rebulk.effective_rules(context)]
|
|
||||||
|
|
||||||
@property
|
|
||||||
def properties(self):
|
|
||||||
"""
|
|
||||||
Properties for Introspection results.
|
|
||||||
:return:
|
|
||||||
:rtype:
|
|
||||||
"""
|
|
||||||
properties = defaultdict(list)
|
|
||||||
for pattern in self.patterns:
|
|
||||||
for key, values in pattern.properties.items():
|
|
||||||
extend_safe(properties[key], values)
|
|
||||||
for rule in self.rules:
|
|
||||||
for key, values in rule.properties.items():
|
|
||||||
extend_safe(properties[key], values)
|
|
||||||
return properties
|
|
||||||
|
|
||||||
|
|
||||||
def introspect(rebulk, context=None):
|
|
||||||
"""
|
|
||||||
Introspect a Rebulk instance to grab defined objects and properties that can be generated.
|
|
||||||
:param rebulk:
|
|
||||||
:type rebulk: Rebulk
|
|
||||||
:param context:
|
|
||||||
:type context:
|
|
||||||
:return: Introspection instance
|
|
||||||
:rtype: Introspection
|
|
||||||
"""
|
|
||||||
return Introspection(rebulk, context)
|
|
||||||
@@ -1,242 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
"""
|
|
||||||
Various utilities functions
|
|
||||||
"""
|
|
||||||
|
|
||||||
import sys
|
|
||||||
|
|
||||||
from inspect import isclass
|
|
||||||
try:
|
|
||||||
from inspect import getfullargspec as getargspec
|
|
||||||
|
|
||||||
_fullargspec_supported = True
|
|
||||||
except ImportError:
|
|
||||||
_fullargspec_supported = False
|
|
||||||
from inspect import getargspec
|
|
||||||
|
|
||||||
from .utils import is_iterable
|
|
||||||
|
|
||||||
if sys.version_info < (3, 4, 0): # pragma: no cover
|
|
||||||
def _constructor(class_):
|
|
||||||
"""
|
|
||||||
Retrieves constructor from given class
|
|
||||||
|
|
||||||
:param class_:
|
|
||||||
:type class_: class
|
|
||||||
:return: constructor from given class
|
|
||||||
:rtype: callable
|
|
||||||
"""
|
|
||||||
return class_.__init__
|
|
||||||
else: # pragma: no cover
|
|
||||||
def _constructor(class_):
|
|
||||||
"""
|
|
||||||
Retrieves constructor from given class
|
|
||||||
|
|
||||||
:param class_:
|
|
||||||
:type class_: class
|
|
||||||
:return: constructor from given class
|
|
||||||
:rtype: callable
|
|
||||||
"""
|
|
||||||
return class_
|
|
||||||
|
|
||||||
|
|
||||||
def call(function, *args, **kwargs):
|
|
||||||
"""
|
|
||||||
Call a function or constructor with given args and kwargs after removing args and kwargs that doesn't match
|
|
||||||
function or constructor signature
|
|
||||||
|
|
||||||
:param function: Function or constructor to call
|
|
||||||
:type function: callable
|
|
||||||
:param args:
|
|
||||||
:type args:
|
|
||||||
:param kwargs:
|
|
||||||
:type kwargs:
|
|
||||||
:return: sale vakye as default function call
|
|
||||||
:rtype: object
|
|
||||||
"""
|
|
||||||
func = constructor_args if isclass(function) else function_args
|
|
||||||
call_args, call_kwargs = func(function, *args, **kwargs)
|
|
||||||
return function(*call_args, **call_kwargs)
|
|
||||||
|
|
||||||
|
|
||||||
def function_args(callable_, *args, **kwargs):
|
|
||||||
"""
|
|
||||||
Return (args, kwargs) matching the function signature
|
|
||||||
|
|
||||||
:param callable: callable to inspect
|
|
||||||
:type callable: callable
|
|
||||||
:param args:
|
|
||||||
:type args:
|
|
||||||
:param kwargs:
|
|
||||||
:type kwargs:
|
|
||||||
:return: (args, kwargs) matching the function signature
|
|
||||||
:rtype: tuple
|
|
||||||
"""
|
|
||||||
argspec = getargspec(callable_) # pylint:disable=deprecated-method
|
|
||||||
return argspec_args(argspec, False, *args, **kwargs)
|
|
||||||
|
|
||||||
|
|
||||||
def constructor_args(class_, *args, **kwargs):
|
|
||||||
"""
|
|
||||||
Return (args, kwargs) matching the function signature
|
|
||||||
|
|
||||||
:param callable: callable to inspect
|
|
||||||
:type callable: Callable
|
|
||||||
:param args:
|
|
||||||
:type args:
|
|
||||||
:param kwargs:
|
|
||||||
:type kwargs:
|
|
||||||
:return: (args, kwargs) matching the function signature
|
|
||||||
:rtype: tuple
|
|
||||||
"""
|
|
||||||
argspec = getargspec(_constructor(class_)) # pylint:disable=deprecated-method
|
|
||||||
return argspec_args(argspec, True, *args, **kwargs)
|
|
||||||
|
|
||||||
|
|
||||||
def argspec_args(argspec, constructor, *args, **kwargs):
|
|
||||||
"""
|
|
||||||
Return (args, kwargs) matching the argspec object
|
|
||||||
|
|
||||||
:param argspec: argspec to use
|
|
||||||
:type argspec: argspec
|
|
||||||
:param constructor: is it a constructor ?
|
|
||||||
:type constructor: bool
|
|
||||||
:param args:
|
|
||||||
:type args:
|
|
||||||
:param kwargs:
|
|
||||||
:type kwargs:
|
|
||||||
:return: (args, kwargs) matching the function signature
|
|
||||||
:rtype: tuple
|
|
||||||
"""
|
|
||||||
if argspec.varkw:
|
|
||||||
call_kwarg = kwargs
|
|
||||||
else:
|
|
||||||
call_kwarg = dict((k, kwargs[k]) for k in kwargs if k in argspec.args) # Python 2.6 dict comprehension
|
|
||||||
if argspec.varargs:
|
|
||||||
call_args = args
|
|
||||||
else:
|
|
||||||
call_args = args[:len(argspec.args) - (1 if constructor else 0)]
|
|
||||||
return call_args, call_kwarg
|
|
||||||
|
|
||||||
|
|
||||||
if not _fullargspec_supported:
|
|
||||||
def argspec_args_legacy(argspec, constructor, *args, **kwargs):
|
|
||||||
"""
|
|
||||||
Return (args, kwargs) matching the argspec object
|
|
||||||
|
|
||||||
:param argspec: argspec to use
|
|
||||||
:type argspec: argspec
|
|
||||||
:param constructor: is it a constructor ?
|
|
||||||
:type constructor: bool
|
|
||||||
:param args:
|
|
||||||
:type args:
|
|
||||||
:param kwargs:
|
|
||||||
:type kwargs:
|
|
||||||
:return: (args, kwargs) matching the function signature
|
|
||||||
:rtype: tuple
|
|
||||||
"""
|
|
||||||
if argspec.keywords:
|
|
||||||
call_kwarg = kwargs
|
|
||||||
else:
|
|
||||||
call_kwarg = dict((k, kwargs[k]) for k in kwargs if k in argspec.args) # Python 2.6 dict comprehension
|
|
||||||
if argspec.varargs:
|
|
||||||
call_args = args
|
|
||||||
else:
|
|
||||||
call_args = args[:len(argspec.args) - (1 if constructor else 0)]
|
|
||||||
return call_args, call_kwarg
|
|
||||||
|
|
||||||
|
|
||||||
argspec_args = argspec_args_legacy
|
|
||||||
|
|
||||||
|
|
||||||
def ensure_list(param):
|
|
||||||
"""
|
|
||||||
Retrieves a list from given parameter.
|
|
||||||
|
|
||||||
:param param:
|
|
||||||
:type param:
|
|
||||||
:return:
|
|
||||||
:rtype:
|
|
||||||
"""
|
|
||||||
if not param:
|
|
||||||
param = []
|
|
||||||
elif not is_iterable(param):
|
|
||||||
param = [param]
|
|
||||||
return param
|
|
||||||
|
|
||||||
|
|
||||||
def ensure_dict(param, default_value, default_key=None):
|
|
||||||
"""
|
|
||||||
Retrieves a dict and a default value from given parameter.
|
|
||||||
|
|
||||||
if parameter is not a dict, it will be promoted as the default value.
|
|
||||||
|
|
||||||
:param param:
|
|
||||||
:type param:
|
|
||||||
:param default_value:
|
|
||||||
:type default_value:
|
|
||||||
:param default_key:
|
|
||||||
:type default_key:
|
|
||||||
:return:
|
|
||||||
:rtype:
|
|
||||||
"""
|
|
||||||
if not param:
|
|
||||||
param = default_value
|
|
||||||
if not isinstance(param, dict):
|
|
||||||
if param:
|
|
||||||
default_value = param
|
|
||||||
return {default_key: param}, default_value
|
|
||||||
return param, default_value
|
|
||||||
|
|
||||||
|
|
||||||
def filter_index(collection, predicate=None, index=None):
|
|
||||||
"""
|
|
||||||
Filter collection with predicate function and index.
|
|
||||||
|
|
||||||
If index is not found, returns None.
|
|
||||||
:param collection:
|
|
||||||
:type collection: collection supporting iteration and slicing
|
|
||||||
:param predicate: function to filter the collection with
|
|
||||||
:type predicate: function
|
|
||||||
:param index: position of a single element to retrieve
|
|
||||||
:type index: int
|
|
||||||
:return: filtered list, or single element of filtered list if index is defined
|
|
||||||
:rtype: list or object
|
|
||||||
"""
|
|
||||||
if index is None and isinstance(predicate, int):
|
|
||||||
index = predicate
|
|
||||||
predicate = None
|
|
||||||
if predicate:
|
|
||||||
collection = collection.__class__(filter(predicate, collection))
|
|
||||||
if index is not None:
|
|
||||||
try:
|
|
||||||
collection = collection[index]
|
|
||||||
except IndexError:
|
|
||||||
collection = None
|
|
||||||
return collection
|
|
||||||
|
|
||||||
|
|
||||||
def set_defaults(defaults, kwargs, override=False):
|
|
||||||
"""
|
|
||||||
Set defaults from defaults dict to kwargs dict
|
|
||||||
|
|
||||||
:param override:
|
|
||||||
:type override:
|
|
||||||
:param defaults:
|
|
||||||
:type defaults:
|
|
||||||
:param kwargs:
|
|
||||||
:type kwargs:
|
|
||||||
:return:
|
|
||||||
:rtype:
|
|
||||||
"""
|
|
||||||
if 'clear' in defaults.keys() and defaults.pop('clear'):
|
|
||||||
kwargs.clear()
|
|
||||||
for key, value in defaults.items():
|
|
||||||
if key in kwargs:
|
|
||||||
if isinstance(value, list) and isinstance(kwargs[key], list):
|
|
||||||
kwargs[key] = list(value) + kwargs[key]
|
|
||||||
elif isinstance(value, dict) and isinstance(kwargs[key], dict):
|
|
||||||
set_defaults(value, kwargs[key])
|
|
||||||
if key not in kwargs or override:
|
|
||||||
kwargs[key] = value
|
|
||||||
@@ -1,890 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
"""
|
|
||||||
Classes and functions related to matches
|
|
||||||
"""
|
|
||||||
import copy
|
|
||||||
import itertools
|
|
||||||
from collections import defaultdict
|
|
||||||
try:
|
|
||||||
from collections.abc import MutableSequence
|
|
||||||
except ImportError:
|
|
||||||
from collections import MutableSequence
|
|
||||||
|
|
||||||
try:
|
|
||||||
from collections import OrderedDict # pylint:disable=ungrouped-imports
|
|
||||||
except ImportError: # pragma: no cover
|
|
||||||
from ordereddict import OrderedDict # pylint:disable=import-error
|
|
||||||
import six
|
|
||||||
|
|
||||||
from .loose import ensure_list, filter_index
|
|
||||||
from .utils import is_iterable
|
|
||||||
from .debug import defined_at
|
|
||||||
|
|
||||||
|
|
||||||
class MatchesDict(OrderedDict):
|
|
||||||
"""
|
|
||||||
A custom dict with matches property.
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self):
|
|
||||||
super(MatchesDict, self).__init__()
|
|
||||||
self.matches = defaultdict(list)
|
|
||||||
self.values_list = defaultdict(list)
|
|
||||||
|
|
||||||
|
|
||||||
class _BaseMatches(MutableSequence):
|
|
||||||
"""
|
|
||||||
A custom list[Match] that automatically maintains name, tag, start and end lookup structures.
|
|
||||||
"""
|
|
||||||
_base = list
|
|
||||||
_base_add = _base.append
|
|
||||||
_base_remove = _base.remove
|
|
||||||
_base_extend = _base.extend
|
|
||||||
|
|
||||||
def __init__(self, matches=None, input_string=None): # pylint: disable=super-init-not-called
|
|
||||||
self.input_string = input_string
|
|
||||||
self._max_end = 0
|
|
||||||
self._delegate = []
|
|
||||||
self.__name_dict = None
|
|
||||||
self.__tag_dict = None
|
|
||||||
self.__start_dict = None
|
|
||||||
self.__end_dict = None
|
|
||||||
self.__index_dict = None
|
|
||||||
if matches:
|
|
||||||
self.extend(matches)
|
|
||||||
|
|
||||||
@property
|
|
||||||
def _name_dict(self):
|
|
||||||
if self.__name_dict is None:
|
|
||||||
self.__name_dict = defaultdict(_BaseMatches._base)
|
|
||||||
for name, values in itertools.groupby([m for m in self._delegate if m.name], lambda item: item.name):
|
|
||||||
_BaseMatches._base_extend(self.__name_dict[name], values)
|
|
||||||
|
|
||||||
return self.__name_dict
|
|
||||||
|
|
||||||
@property
|
|
||||||
def _start_dict(self):
|
|
||||||
if self.__start_dict is None:
|
|
||||||
self.__start_dict = defaultdict(_BaseMatches._base)
|
|
||||||
for start, values in itertools.groupby([m for m in self._delegate], lambda item: item.start):
|
|
||||||
_BaseMatches._base_extend(self.__start_dict[start], values)
|
|
||||||
|
|
||||||
return self.__start_dict
|
|
||||||
|
|
||||||
@property
|
|
||||||
def _end_dict(self):
|
|
||||||
if self.__end_dict is None:
|
|
||||||
self.__end_dict = defaultdict(_BaseMatches._base)
|
|
||||||
for start, values in itertools.groupby([m for m in self._delegate], lambda item: item.end):
|
|
||||||
_BaseMatches._base_extend(self.__end_dict[start], values)
|
|
||||||
|
|
||||||
return self.__end_dict
|
|
||||||
|
|
||||||
@property
|
|
||||||
def _tag_dict(self):
|
|
||||||
if self.__tag_dict is None:
|
|
||||||
self.__tag_dict = defaultdict(_BaseMatches._base)
|
|
||||||
for match in self._delegate:
|
|
||||||
for tag in match.tags:
|
|
||||||
_BaseMatches._base_add(self.__tag_dict[tag], match)
|
|
||||||
|
|
||||||
return self.__tag_dict
|
|
||||||
|
|
||||||
@property
|
|
||||||
def _index_dict(self):
|
|
||||||
if self.__index_dict is None:
|
|
||||||
self.__index_dict = defaultdict(_BaseMatches._base)
|
|
||||||
for match in self._delegate:
|
|
||||||
for index in range(*match.span):
|
|
||||||
_BaseMatches._base_add(self.__index_dict[index], match)
|
|
||||||
|
|
||||||
return self.__index_dict
|
|
||||||
|
|
||||||
def _add_match(self, match):
|
|
||||||
"""
|
|
||||||
Add a match
|
|
||||||
:param match:
|
|
||||||
:type match: Match
|
|
||||||
"""
|
|
||||||
if self.__name_dict is not None:
|
|
||||||
if match.name:
|
|
||||||
_BaseMatches._base_add(self._name_dict[match.name], (match))
|
|
||||||
if self.__tag_dict is not None:
|
|
||||||
for tag in match.tags:
|
|
||||||
_BaseMatches._base_add(self._tag_dict[tag], match)
|
|
||||||
if self.__start_dict is not None:
|
|
||||||
_BaseMatches._base_add(self._start_dict[match.start], match)
|
|
||||||
if self.__end_dict is not None:
|
|
||||||
_BaseMatches._base_add(self._end_dict[match.end], match)
|
|
||||||
if self.__index_dict is not None:
|
|
||||||
for index in range(*match.span):
|
|
||||||
_BaseMatches._base_add(self._index_dict[index], match)
|
|
||||||
if match.end > self._max_end:
|
|
||||||
self._max_end = match.end
|
|
||||||
|
|
||||||
def _remove_match(self, match):
|
|
||||||
"""
|
|
||||||
Remove a match
|
|
||||||
:param match:
|
|
||||||
:type match: Match
|
|
||||||
"""
|
|
||||||
if self.__name_dict is not None:
|
|
||||||
if match.name:
|
|
||||||
_BaseMatches._base_remove(self._name_dict[match.name], match)
|
|
||||||
if self.__tag_dict is not None:
|
|
||||||
for tag in match.tags:
|
|
||||||
_BaseMatches._base_remove(self._tag_dict[tag], match)
|
|
||||||
if self.__start_dict is not None:
|
|
||||||
_BaseMatches._base_remove(self._start_dict[match.start], match)
|
|
||||||
if self.__end_dict is not None:
|
|
||||||
_BaseMatches._base_remove(self._end_dict[match.end], match)
|
|
||||||
if self.__index_dict is not None:
|
|
||||||
for index in range(*match.span):
|
|
||||||
_BaseMatches._base_remove(self._index_dict[index], match)
|
|
||||||
if match.end >= self._max_end and not self._end_dict[match.end]:
|
|
||||||
self._max_end = max(self._end_dict.keys())
|
|
||||||
|
|
||||||
def previous(self, match, predicate=None, index=None):
|
|
||||||
"""
|
|
||||||
Retrieves the nearest previous matches.
|
|
||||||
:param match:
|
|
||||||
:type match:
|
|
||||||
:param predicate:
|
|
||||||
:type predicate:
|
|
||||||
:param index:
|
|
||||||
:type index: int
|
|
||||||
:return:
|
|
||||||
:rtype:
|
|
||||||
"""
|
|
||||||
current = match.start
|
|
||||||
while current > -1:
|
|
||||||
previous_matches = self.ending(current)
|
|
||||||
if previous_matches:
|
|
||||||
return filter_index(previous_matches, predicate, index)
|
|
||||||
current -= 1
|
|
||||||
return filter_index(_BaseMatches._base(), predicate, index)
|
|
||||||
|
|
||||||
def next(self, match, predicate=None, index=None):
|
|
||||||
"""
|
|
||||||
Retrieves the nearest next matches.
|
|
||||||
:param match:
|
|
||||||
:type match:
|
|
||||||
:param predicate:
|
|
||||||
:type predicate:
|
|
||||||
:param index:
|
|
||||||
:type index: int
|
|
||||||
:return:
|
|
||||||
:rtype:
|
|
||||||
"""
|
|
||||||
current = match.start + 1
|
|
||||||
while current <= self._max_end:
|
|
||||||
next_matches = self.starting(current)
|
|
||||||
if next_matches:
|
|
||||||
return filter_index(next_matches, predicate, index)
|
|
||||||
current += 1
|
|
||||||
return filter_index(_BaseMatches._base(), predicate, index)
|
|
||||||
|
|
||||||
def named(self, name, predicate=None, index=None):
|
|
||||||
"""
|
|
||||||
Retrieves a set of Match objects that have the given name.
|
|
||||||
:param name:
|
|
||||||
:type name: str
|
|
||||||
:param predicate:
|
|
||||||
:type predicate:
|
|
||||||
:param index:
|
|
||||||
:type index: int
|
|
||||||
:return: set of matches
|
|
||||||
:rtype: set[Match]
|
|
||||||
"""
|
|
||||||
return filter_index(_BaseMatches._base(self._name_dict[name]), predicate, index)
|
|
||||||
|
|
||||||
def tagged(self, tag, predicate=None, index=None):
|
|
||||||
"""
|
|
||||||
Retrieves a set of Match objects that have the given tag defined.
|
|
||||||
:param tag:
|
|
||||||
:type tag: str
|
|
||||||
:param predicate:
|
|
||||||
:type predicate:
|
|
||||||
:param index:
|
|
||||||
:type index: int
|
|
||||||
:return: set of matches
|
|
||||||
:rtype: set[Match]
|
|
||||||
"""
|
|
||||||
return filter_index(_BaseMatches._base(self._tag_dict[tag]), predicate, index)
|
|
||||||
|
|
||||||
def starting(self, start, predicate=None, index=None):
|
|
||||||
"""
|
|
||||||
Retrieves a set of Match objects that starts at given index.
|
|
||||||
:param start: the starting index
|
|
||||||
:type start: int
|
|
||||||
:param predicate:
|
|
||||||
:type predicate:
|
|
||||||
:param index:
|
|
||||||
:type index: int
|
|
||||||
:return: set of matches
|
|
||||||
:rtype: set[Match]
|
|
||||||
"""
|
|
||||||
return filter_index(_BaseMatches._base(self._start_dict[start]), predicate, index)
|
|
||||||
|
|
||||||
def ending(self, end, predicate=None, index=None):
|
|
||||||
"""
|
|
||||||
Retrieves a set of Match objects that ends at given index.
|
|
||||||
:param end: the ending index
|
|
||||||
:type end: int
|
|
||||||
:param predicate:
|
|
||||||
:type predicate:
|
|
||||||
:return: set of matches
|
|
||||||
:rtype: set[Match]
|
|
||||||
"""
|
|
||||||
return filter_index(_BaseMatches._base(self._end_dict[end]), predicate, index)
|
|
||||||
|
|
||||||
def range(self, start=0, end=None, predicate=None, index=None):
|
|
||||||
"""
|
|
||||||
Retrieves a set of Match objects that are available in given range, sorted from start to end.
|
|
||||||
:param start: the starting index
|
|
||||||
:type start: int
|
|
||||||
:param end: the ending index
|
|
||||||
:type end: int
|
|
||||||
:param predicate:
|
|
||||||
:type predicate:
|
|
||||||
:param index:
|
|
||||||
:type index: int
|
|
||||||
:return: set of matches
|
|
||||||
:rtype: set[Match]
|
|
||||||
"""
|
|
||||||
if end is None:
|
|
||||||
end = self.max_end
|
|
||||||
else:
|
|
||||||
end = min(self.max_end, end)
|
|
||||||
ret = _BaseMatches._base()
|
|
||||||
for match in sorted(self):
|
|
||||||
if match.start < end and match.end > start:
|
|
||||||
ret.append(match)
|
|
||||||
return filter_index(ret, predicate, index)
|
|
||||||
|
|
||||||
def chain_before(self, position, seps, start=0, predicate=None, index=None):
|
|
||||||
"""
|
|
||||||
Retrieves a list of chained matches, before position, matching predicate and separated by characters from seps
|
|
||||||
only.
|
|
||||||
:param position:
|
|
||||||
:type position:
|
|
||||||
:param seps:
|
|
||||||
:type seps:
|
|
||||||
:param start:
|
|
||||||
:type start:
|
|
||||||
:param predicate:
|
|
||||||
:type predicate:
|
|
||||||
:param index:
|
|
||||||
:type index:
|
|
||||||
:return:
|
|
||||||
:rtype:
|
|
||||||
"""
|
|
||||||
if hasattr(position, 'start'):
|
|
||||||
position = position.start
|
|
||||||
|
|
||||||
chain = _BaseMatches._base()
|
|
||||||
position = min(self.max_end, position)
|
|
||||||
|
|
||||||
for i in reversed(range(start, position)):
|
|
||||||
index_matches = self.at_index(i)
|
|
||||||
filtered_matches = [index_match for index_match in index_matches if not predicate or predicate(index_match)]
|
|
||||||
if filtered_matches:
|
|
||||||
for chain_match in filtered_matches:
|
|
||||||
if chain_match not in chain:
|
|
||||||
chain.append(chain_match)
|
|
||||||
elif self.input_string[i] not in seps:
|
|
||||||
break
|
|
||||||
|
|
||||||
return filter_index(chain, predicate, index)
|
|
||||||
|
|
||||||
def chain_after(self, position, seps, end=None, predicate=None, index=None):
|
|
||||||
"""
|
|
||||||
Retrieves a list of chained matches, after position, matching predicate and separated by characters from seps
|
|
||||||
only.
|
|
||||||
:param position:
|
|
||||||
:type position:
|
|
||||||
:param seps:
|
|
||||||
:type seps:
|
|
||||||
:param end:
|
|
||||||
:type end:
|
|
||||||
:param predicate:
|
|
||||||
:type predicate:
|
|
||||||
:param index:
|
|
||||||
:type index:
|
|
||||||
:return:
|
|
||||||
:rtype:
|
|
||||||
"""
|
|
||||||
if hasattr(position, 'end'):
|
|
||||||
position = position.end
|
|
||||||
chain = _BaseMatches._base()
|
|
||||||
|
|
||||||
if end is None:
|
|
||||||
end = self.max_end
|
|
||||||
else:
|
|
||||||
end = min(self.max_end, end)
|
|
||||||
|
|
||||||
for i in range(position, end):
|
|
||||||
index_matches = self.at_index(i)
|
|
||||||
filtered_matches = [index_match for index_match in index_matches if not predicate or predicate(index_match)]
|
|
||||||
if filtered_matches:
|
|
||||||
for chain_match in filtered_matches:
|
|
||||||
if chain_match not in chain:
|
|
||||||
chain.append(chain_match)
|
|
||||||
elif self.input_string[i] not in seps:
|
|
||||||
break
|
|
||||||
|
|
||||||
return filter_index(chain, predicate, index)
|
|
||||||
|
|
||||||
@property
|
|
||||||
def max_end(self):
|
|
||||||
"""
|
|
||||||
Retrieves the maximum index.
|
|
||||||
:return:
|
|
||||||
"""
|
|
||||||
return max(len(self.input_string), self._max_end) if self.input_string else self._max_end
|
|
||||||
|
|
||||||
def _hole_start(self, position, ignore=None):
|
|
||||||
"""
|
|
||||||
Retrieves the start of hole index from position.
|
|
||||||
:param position:
|
|
||||||
:type position:
|
|
||||||
:param ignore:
|
|
||||||
:type ignore:
|
|
||||||
:return:
|
|
||||||
:rtype:
|
|
||||||
"""
|
|
||||||
for lindex in reversed(range(0, position)):
|
|
||||||
for starting in self.starting(lindex):
|
|
||||||
if not ignore or not ignore(starting):
|
|
||||||
return lindex
|
|
||||||
return 0
|
|
||||||
|
|
||||||
def _hole_end(self, position, ignore=None):
|
|
||||||
"""
|
|
||||||
Retrieves the end of hole index from position.
|
|
||||||
:param position:
|
|
||||||
:type position:
|
|
||||||
:param ignore:
|
|
||||||
:type ignore:
|
|
||||||
:return:
|
|
||||||
:rtype:
|
|
||||||
"""
|
|
||||||
for rindex in range(position, self.max_end):
|
|
||||||
for starting in self.starting(rindex):
|
|
||||||
if not ignore or not ignore(starting):
|
|
||||||
return rindex
|
|
||||||
return self.max_end
|
|
||||||
|
|
||||||
def holes(self, start=0, end=None, formatter=None, ignore=None, seps=None, predicate=None,
|
|
||||||
index=None): # pylint: disable=too-many-branches,too-many-locals
|
|
||||||
"""
|
|
||||||
Retrieves a set of Match objects that are not defined in given range.
|
|
||||||
:param start:
|
|
||||||
:type start:
|
|
||||||
:param end:
|
|
||||||
:type end:
|
|
||||||
:param formatter:
|
|
||||||
:type formatter:
|
|
||||||
:param ignore:
|
|
||||||
:type ignore:
|
|
||||||
:param seps:
|
|
||||||
:type seps:
|
|
||||||
:param predicate:
|
|
||||||
:type predicate:
|
|
||||||
:param index:
|
|
||||||
:type index:
|
|
||||||
:return:
|
|
||||||
:rtype:
|
|
||||||
"""
|
|
||||||
assert self.input_string if seps else True, "input_string must be defined when using seps parameter"
|
|
||||||
if end is None:
|
|
||||||
end = self.max_end
|
|
||||||
else:
|
|
||||||
end = min(self.max_end, end)
|
|
||||||
ret = _BaseMatches._base()
|
|
||||||
hole = False
|
|
||||||
rindex = start
|
|
||||||
|
|
||||||
loop_start = self._hole_start(start, ignore)
|
|
||||||
|
|
||||||
for rindex in range(loop_start, end):
|
|
||||||
current = []
|
|
||||||
for at_index in self.at_index(rindex):
|
|
||||||
if not ignore or not ignore(at_index):
|
|
||||||
current.append(at_index)
|
|
||||||
|
|
||||||
if seps and hole and self.input_string and self.input_string[rindex] in seps:
|
|
||||||
hole = False
|
|
||||||
ret[-1].end = rindex
|
|
||||||
else:
|
|
||||||
if not current and not hole:
|
|
||||||
# Open a new hole match
|
|
||||||
hole = True
|
|
||||||
ret.append(Match(max(rindex, start), None, input_string=self.input_string, formatter=formatter))
|
|
||||||
elif current and hole:
|
|
||||||
# Close current hole match
|
|
||||||
hole = False
|
|
||||||
ret[-1].end = rindex
|
|
||||||
|
|
||||||
if ret and hole:
|
|
||||||
# go the the next starting element ...
|
|
||||||
ret[-1].end = min(self._hole_end(rindex, ignore), end)
|
|
||||||
return filter_index(ret, predicate, index)
|
|
||||||
|
|
||||||
def conflicting(self, match, predicate=None, index=None):
|
|
||||||
"""
|
|
||||||
Retrieves a list of ``Match`` objects that conflicts with given match.
|
|
||||||
:param match:
|
|
||||||
:type match:
|
|
||||||
:param predicate:
|
|
||||||
:type predicate:
|
|
||||||
:param index:
|
|
||||||
:type index:
|
|
||||||
:return:
|
|
||||||
:rtype:
|
|
||||||
"""
|
|
||||||
ret = _BaseMatches._base()
|
|
||||||
|
|
||||||
for i in range(*match.span):
|
|
||||||
for at_match in self.at_index(i):
|
|
||||||
if at_match not in ret:
|
|
||||||
ret.append(at_match)
|
|
||||||
|
|
||||||
ret.remove(match)
|
|
||||||
|
|
||||||
return filter_index(ret, predicate, index)
|
|
||||||
|
|
||||||
def at_match(self, match, predicate=None, index=None):
|
|
||||||
"""
|
|
||||||
Retrieves a list of matches from given match.
|
|
||||||
"""
|
|
||||||
return self.at_span(match.span, predicate, index)
|
|
||||||
|
|
||||||
def at_span(self, span, predicate=None, index=None):
|
|
||||||
"""
|
|
||||||
Retrieves a list of matches from given (start, end) tuple.
|
|
||||||
"""
|
|
||||||
starting = self._index_dict[span[0]]
|
|
||||||
ending = self._index_dict[span[1] - 1]
|
|
||||||
|
|
||||||
merged = list(starting)
|
|
||||||
for marker in ending:
|
|
||||||
if marker not in merged:
|
|
||||||
merged.append(marker)
|
|
||||||
|
|
||||||
return filter_index(merged, predicate, index)
|
|
||||||
|
|
||||||
def at_index(self, pos, predicate=None, index=None):
|
|
||||||
"""
|
|
||||||
Retrieves a list of matches from given position
|
|
||||||
"""
|
|
||||||
return filter_index(self._index_dict[pos], predicate, index)
|
|
||||||
|
|
||||||
@property
|
|
||||||
def names(self):
|
|
||||||
"""
|
|
||||||
Retrieve all names.
|
|
||||||
:return:
|
|
||||||
"""
|
|
||||||
return self._name_dict.keys()
|
|
||||||
|
|
||||||
@property
|
|
||||||
def tags(self):
|
|
||||||
"""
|
|
||||||
Retrieve all tags.
|
|
||||||
:return:
|
|
||||||
"""
|
|
||||||
return self._tag_dict.keys()
|
|
||||||
|
|
||||||
def to_dict(self, details=False, first_value=False, enforce_list=False):
|
|
||||||
"""
|
|
||||||
Converts matches to a dict object.
|
|
||||||
:param details if True, values will be complete Match object, else it will be only string Match.value property
|
|
||||||
:type details: bool
|
|
||||||
:param first_value if True, only the first value will be kept. Else, multiple values will be set as a list in
|
|
||||||
the dict.
|
|
||||||
:type first_value: bool
|
|
||||||
:param enforce_list: if True, value is wrapped in a list even when a single value is found. Else, list values
|
|
||||||
are available under `values_list` property of the returned dict object.
|
|
||||||
:type enforce_list: bool
|
|
||||||
:return:
|
|
||||||
:rtype: dict
|
|
||||||
"""
|
|
||||||
ret = MatchesDict()
|
|
||||||
for match in sorted(self):
|
|
||||||
value = match if details else match.value
|
|
||||||
ret.matches[match.name].append(match)
|
|
||||||
if not enforce_list and value not in ret.values_list[match.name]:
|
|
||||||
ret.values_list[match.name].append(value)
|
|
||||||
if match.name in ret.keys():
|
|
||||||
if not first_value:
|
|
||||||
if not isinstance(ret[match.name], list):
|
|
||||||
if ret[match.name] == value:
|
|
||||||
continue
|
|
||||||
ret[match.name] = [ret[match.name]]
|
|
||||||
else:
|
|
||||||
if value in ret[match.name]:
|
|
||||||
continue
|
|
||||||
ret[match.name].append(value)
|
|
||||||
else:
|
|
||||||
if enforce_list and not isinstance(value, list):
|
|
||||||
ret[match.name] = [value]
|
|
||||||
else:
|
|
||||||
ret[match.name] = value
|
|
||||||
return ret
|
|
||||||
|
|
||||||
if six.PY2: # pragma: no cover
|
|
||||||
def clear(self):
|
|
||||||
"""
|
|
||||||
Python 3 backport
|
|
||||||
"""
|
|
||||||
del self[:]
|
|
||||||
|
|
||||||
def __len__(self):
|
|
||||||
return len(self._delegate)
|
|
||||||
|
|
||||||
def __getitem__(self, index):
|
|
||||||
ret = self._delegate[index]
|
|
||||||
if isinstance(ret, list):
|
|
||||||
return Matches(ret)
|
|
||||||
return ret
|
|
||||||
|
|
||||||
def __setitem__(self, index, match):
|
|
||||||
self._delegate[index] = match
|
|
||||||
if isinstance(index, slice):
|
|
||||||
for match_item in match:
|
|
||||||
self._add_match(match_item)
|
|
||||||
return
|
|
||||||
self._add_match(match)
|
|
||||||
|
|
||||||
def __delitem__(self, index):
|
|
||||||
match = self._delegate[index]
|
|
||||||
del self._delegate[index]
|
|
||||||
if isinstance(match, list):
|
|
||||||
# if index is a slice, we has a match list
|
|
||||||
for match_item in match:
|
|
||||||
self._remove_match(match_item)
|
|
||||||
else:
|
|
||||||
self._remove_match(match)
|
|
||||||
|
|
||||||
def __repr__(self):
|
|
||||||
return self._delegate.__repr__()
|
|
||||||
|
|
||||||
def insert(self, index, value):
|
|
||||||
self._delegate.insert(index, value)
|
|
||||||
self._add_match(value)
|
|
||||||
|
|
||||||
|
|
||||||
class Matches(_BaseMatches):
|
|
||||||
"""
|
|
||||||
A custom list[Match] contains matches list.
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self, matches=None, input_string=None):
|
|
||||||
self.markers = Markers(input_string=input_string)
|
|
||||||
super(Matches, self).__init__(matches=matches, input_string=input_string)
|
|
||||||
|
|
||||||
def _add_match(self, match):
|
|
||||||
assert not match.marker, "A marker match should not be added to <Matches> object"
|
|
||||||
super(Matches, self)._add_match(match)
|
|
||||||
|
|
||||||
|
|
||||||
class Markers(_BaseMatches):
|
|
||||||
"""
|
|
||||||
A custom list[Match] containing markers list.
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self, matches=None, input_string=None):
|
|
||||||
super(Markers, self).__init__(matches=None, input_string=input_string)
|
|
||||||
|
|
||||||
def _add_match(self, match):
|
|
||||||
assert match.marker, "A non-marker match should not be added to <Markers> object"
|
|
||||||
super(Markers, self)._add_match(match)
|
|
||||||
|
|
||||||
|
|
||||||
class Match(object):
|
|
||||||
"""
|
|
||||||
Object storing values related to a single match
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self, start, end, value=None, name=None, tags=None, marker=None, parent=None, private=None,
|
|
||||||
pattern=None, input_string=None, formatter=None, conflict_solver=None, **kwargs):
|
|
||||||
# pylint: disable=unused-argument
|
|
||||||
self.start = start
|
|
||||||
self.end = end
|
|
||||||
self.name = name
|
|
||||||
self._value = value
|
|
||||||
self.tags = ensure_list(tags)
|
|
||||||
self.marker = marker
|
|
||||||
self.parent = parent
|
|
||||||
self.input_string = input_string
|
|
||||||
self.formatter = formatter
|
|
||||||
self.pattern = pattern
|
|
||||||
self.private = private
|
|
||||||
self.conflict_solver = conflict_solver
|
|
||||||
self._children = None
|
|
||||||
self._raw_start = None
|
|
||||||
self._raw_end = None
|
|
||||||
self.defined_at = pattern.defined_at if pattern else defined_at()
|
|
||||||
|
|
||||||
@property
|
|
||||||
def span(self):
|
|
||||||
"""
|
|
||||||
2-tuple with start and end indices of the match
|
|
||||||
"""
|
|
||||||
return self.start, self.end
|
|
||||||
|
|
||||||
@property
|
|
||||||
def children(self):
|
|
||||||
"""
|
|
||||||
Children matches.
|
|
||||||
"""
|
|
||||||
if self._children is None:
|
|
||||||
self._children = Matches(None, self.input_string)
|
|
||||||
return self._children
|
|
||||||
|
|
||||||
@children.setter
|
|
||||||
def children(self, value):
|
|
||||||
self._children = value
|
|
||||||
|
|
||||||
@property
|
|
||||||
def value(self):
|
|
||||||
"""
|
|
||||||
Get the value of the match, using formatter if defined.
|
|
||||||
:return:
|
|
||||||
:rtype:
|
|
||||||
"""
|
|
||||||
if self._value:
|
|
||||||
return self._value
|
|
||||||
if self.formatter:
|
|
||||||
return self.formatter(self.raw)
|
|
||||||
return self.raw
|
|
||||||
|
|
||||||
@value.setter
|
|
||||||
def value(self, value):
|
|
||||||
"""
|
|
||||||
Set the value (hardcode)
|
|
||||||
:param value:
|
|
||||||
:type value:
|
|
||||||
:return:
|
|
||||||
:rtype:
|
|
||||||
"""
|
|
||||||
self._value = value # pylint: disable=attribute-defined-outside-init
|
|
||||||
|
|
||||||
@property
|
|
||||||
def names(self):
|
|
||||||
"""
|
|
||||||
Get all names of children
|
|
||||||
:return:
|
|
||||||
:rtype:
|
|
||||||
"""
|
|
||||||
if not self.children:
|
|
||||||
return set([self.name])
|
|
||||||
ret = set()
|
|
||||||
for child in self.children:
|
|
||||||
for name in child.names:
|
|
||||||
ret.add(name)
|
|
||||||
return ret
|
|
||||||
|
|
||||||
@property
|
|
||||||
def raw_start(self):
|
|
||||||
"""
|
|
||||||
start index of raw value
|
|
||||||
:return:
|
|
||||||
:rtype:
|
|
||||||
"""
|
|
||||||
if self._raw_start is None:
|
|
||||||
return self.start
|
|
||||||
return self._raw_start
|
|
||||||
|
|
||||||
@raw_start.setter
|
|
||||||
def raw_start(self, value):
|
|
||||||
"""
|
|
||||||
Set start index of raw value
|
|
||||||
:return:
|
|
||||||
:rtype:
|
|
||||||
"""
|
|
||||||
self._raw_start = value
|
|
||||||
|
|
||||||
@property
|
|
||||||
def raw_end(self):
|
|
||||||
"""
|
|
||||||
end index of raw value
|
|
||||||
:return:
|
|
||||||
:rtype:
|
|
||||||
"""
|
|
||||||
if self._raw_end is None:
|
|
||||||
return self.end
|
|
||||||
return self._raw_end
|
|
||||||
|
|
||||||
@raw_end.setter
|
|
||||||
def raw_end(self, value):
|
|
||||||
"""
|
|
||||||
Set end index of raw value
|
|
||||||
:return:
|
|
||||||
:rtype:
|
|
||||||
"""
|
|
||||||
self._raw_end = value
|
|
||||||
|
|
||||||
@property
|
|
||||||
def raw(self):
|
|
||||||
"""
|
|
||||||
Get the raw value of the match, without using hardcoded value nor formatter.
|
|
||||||
:return:
|
|
||||||
:rtype:
|
|
||||||
"""
|
|
||||||
if self.input_string:
|
|
||||||
return self.input_string[self.raw_start:self.raw_end]
|
|
||||||
return None
|
|
||||||
|
|
||||||
@property
|
|
||||||
def initiator(self):
|
|
||||||
"""
|
|
||||||
Retrieve the initiator parent of a match
|
|
||||||
:param match:
|
|
||||||
:type match:
|
|
||||||
:return:
|
|
||||||
:rtype:
|
|
||||||
"""
|
|
||||||
match = self
|
|
||||||
while match.parent:
|
|
||||||
match = match.parent
|
|
||||||
return match
|
|
||||||
|
|
||||||
def crop(self, crops, predicate=None, index=None):
|
|
||||||
"""
|
|
||||||
crop the match with given Match objects or spans tuples
|
|
||||||
:param crops:
|
|
||||||
:type crops: list or object
|
|
||||||
:return: a list of Match objects
|
|
||||||
:rtype: list[Match]
|
|
||||||
"""
|
|
||||||
if not is_iterable(crops) or len(crops) == 2 and isinstance(crops[0], int):
|
|
||||||
crops = [crops]
|
|
||||||
initial = copy.deepcopy(self)
|
|
||||||
ret = [initial]
|
|
||||||
for crop in crops:
|
|
||||||
if hasattr(crop, 'span'):
|
|
||||||
start, end = crop.span
|
|
||||||
else:
|
|
||||||
start, end = crop
|
|
||||||
for current in list(ret):
|
|
||||||
if start <= current.start and end >= current.end:
|
|
||||||
# self is included in crop, remove current ...
|
|
||||||
ret.remove(current)
|
|
||||||
elif start >= current.start and end <= current.end:
|
|
||||||
# crop is included in self, split current ...
|
|
||||||
right = copy.deepcopy(current)
|
|
||||||
current.end = start
|
|
||||||
if not current:
|
|
||||||
ret.remove(current)
|
|
||||||
right.start = end
|
|
||||||
if right:
|
|
||||||
ret.append(right)
|
|
||||||
elif current.end >= end > current.start:
|
|
||||||
current.start = end
|
|
||||||
elif current.start <= start < current.end:
|
|
||||||
current.end = start
|
|
||||||
return filter_index(ret, predicate, index)
|
|
||||||
|
|
||||||
def split(self, seps, predicate=None, index=None):
|
|
||||||
"""
|
|
||||||
Split this match in multiple matches using given separators.
|
|
||||||
:param seps:
|
|
||||||
:type seps: string containing separator characters
|
|
||||||
:return: list of new Match objects
|
|
||||||
:rtype: list
|
|
||||||
"""
|
|
||||||
split_match = copy.deepcopy(self)
|
|
||||||
current_match = split_match
|
|
||||||
ret = []
|
|
||||||
|
|
||||||
for i in range(0, len(self.raw)):
|
|
||||||
if self.raw[i] in seps:
|
|
||||||
if not split_match:
|
|
||||||
split_match = copy.deepcopy(current_match)
|
|
||||||
current_match.end = self.start + i
|
|
||||||
|
|
||||||
else:
|
|
||||||
if split_match:
|
|
||||||
split_match.start = self.start + i
|
|
||||||
current_match = split_match
|
|
||||||
ret.append(split_match)
|
|
||||||
split_match = None
|
|
||||||
|
|
||||||
return filter_index(ret, predicate, index)
|
|
||||||
|
|
||||||
def tagged(self, *tags):
|
|
||||||
"""
|
|
||||||
Check if this match has at least one of the provided tags
|
|
||||||
|
|
||||||
:param tags:
|
|
||||||
:return: True if at least one tag is defined, False otherwise.
|
|
||||||
"""
|
|
||||||
return any(tag in self.tags for tag in tags)
|
|
||||||
|
|
||||||
def named(self, *names):
|
|
||||||
"""
|
|
||||||
Check if one of the children match has one of the provided name
|
|
||||||
|
|
||||||
:param names:
|
|
||||||
:return: True if at least one child is named with a given name is defined, False otherwise.
|
|
||||||
"""
|
|
||||||
return any(name in self.names for name in names)
|
|
||||||
|
|
||||||
def __len__(self):
|
|
||||||
return self.end - self.start
|
|
||||||
|
|
||||||
def __hash__(self):
|
|
||||||
return hash(Match) + hash(self.start) + hash(self.end) + hash(self.value)
|
|
||||||
|
|
||||||
def __eq__(self, other):
|
|
||||||
if isinstance(other, Match):
|
|
||||||
return self.span == other.span and self.value == other.value and self.name == other.name and \
|
|
||||||
self.parent == other.parent
|
|
||||||
return NotImplemented
|
|
||||||
|
|
||||||
def __ne__(self, other):
|
|
||||||
if isinstance(other, Match):
|
|
||||||
return self.span != other.span or self.value != other.value or self.name != other.name or \
|
|
||||||
self.parent != other.parent
|
|
||||||
return NotImplemented
|
|
||||||
|
|
||||||
def __lt__(self, other):
|
|
||||||
if isinstance(other, Match):
|
|
||||||
return self.span < other.span
|
|
||||||
return NotImplemented
|
|
||||||
|
|
||||||
def __gt__(self, other):
|
|
||||||
if isinstance(other, Match):
|
|
||||||
return self.span > other.span
|
|
||||||
return NotImplemented
|
|
||||||
|
|
||||||
def __le__(self, other):
|
|
||||||
if isinstance(other, Match):
|
|
||||||
return self.span <= other.span
|
|
||||||
return NotImplemented
|
|
||||||
|
|
||||||
def __ge__(self, other):
|
|
||||||
if isinstance(other, Match):
|
|
||||||
return self.span >= other.span
|
|
||||||
return NotImplemented
|
|
||||||
|
|
||||||
def __repr__(self):
|
|
||||||
flags = ""
|
|
||||||
name = ""
|
|
||||||
tags = ""
|
|
||||||
defined = ""
|
|
||||||
initiator = ""
|
|
||||||
if self.initiator.value != self.value:
|
|
||||||
initiator = "+initiator=" + self.initiator.value
|
|
||||||
if self.private:
|
|
||||||
flags += '+private'
|
|
||||||
if self.name:
|
|
||||||
name = "+name=%s" % (self.name,)
|
|
||||||
if self.tags:
|
|
||||||
tags = "+tags=%s" % (self.tags,)
|
|
||||||
if self.defined_at:
|
|
||||||
defined += "@%s" % (self.defined_at,)
|
|
||||||
return "<%s:%s%s%s%s%s%s>" % (self.value, self.span, flags, name, tags, initiator, defined)
|
|
||||||
@@ -1,559 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
"""
|
|
||||||
Abstract pattern class definition along with various implementations (regexp, string, functional)
|
|
||||||
"""
|
|
||||||
# pylint: disable=super-init-not-called,wrong-import-position
|
|
||||||
|
|
||||||
from abc import ABCMeta, abstractmethod, abstractproperty
|
|
||||||
|
|
||||||
import six
|
|
||||||
|
|
||||||
from . import debug
|
|
||||||
from .formatters import default_formatter
|
|
||||||
from .loose import call, ensure_list, ensure_dict
|
|
||||||
from .match import Match
|
|
||||||
from .remodule import re, REGEX_AVAILABLE
|
|
||||||
from .utils import find_all, is_iterable, get_first_defined
|
|
||||||
from .validators import allways_true
|
|
||||||
|
|
||||||
|
|
||||||
@six.add_metaclass(ABCMeta)
|
|
||||||
class BasePattern(object):
|
|
||||||
"""
|
|
||||||
Base class for Pattern like objects
|
|
||||||
"""
|
|
||||||
|
|
||||||
@abstractmethod
|
|
||||||
def matches(self, input_string, context=None, with_raw_matches=False):
|
|
||||||
"""
|
|
||||||
Computes all matches for a given input
|
|
||||||
|
|
||||||
:param input_string: the string to parse
|
|
||||||
:type input_string: str
|
|
||||||
:param context: the context
|
|
||||||
:type context: dict
|
|
||||||
:param with_raw_matches: should return details
|
|
||||||
:type with_raw_matches: dict
|
|
||||||
:return: matches based on input_string for this pattern
|
|
||||||
:rtype: iterator[Match]
|
|
||||||
"""
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
@six.add_metaclass(ABCMeta)
|
|
||||||
class Pattern(BasePattern):
|
|
||||||
"""
|
|
||||||
Definition of a particular pattern to search for.
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self, name=None, tags=None, formatter=None, value=None, validator=None, children=False, every=False,
|
|
||||||
private_parent=False, private_children=False, private=False, private_names=None, ignore_names=None,
|
|
||||||
marker=False, format_all=False, validate_all=False, disabled=lambda context: False, log_level=None,
|
|
||||||
properties=None, post_processor=None, pre_match_processor=None, post_match_processor=None, **kwargs):
|
|
||||||
"""
|
|
||||||
:param name: Name of this pattern
|
|
||||||
:type name: str
|
|
||||||
:param tags: List of tags related to this pattern
|
|
||||||
:type tags: list[str]
|
|
||||||
:param formatter: dict (name, func) of formatter to use with this pattern. name is the match name to support,
|
|
||||||
and func a function(input_string) that returns the formatted string. A single formatter function can also be
|
|
||||||
passed as a shortcut for {None: formatter}. The returned formatted string with be set in Match.value property.
|
|
||||||
:type formatter: dict[str, func] || func
|
|
||||||
:param value: dict (name, value) of value to use with this pattern. name is the match name to support,
|
|
||||||
and value an object for the match value. A single object value can also be
|
|
||||||
passed as a shortcut for {None: value}. The value with be set in Match.value property.
|
|
||||||
:type value: dict[str, object] || object
|
|
||||||
:param validator: dict (name, func) of validator to use with this pattern. name is the match name to support,
|
|
||||||
and func a function(match) that returns the a boolean. A single validator function can also be
|
|
||||||
passed as a shortcut for {None: validator}. If return value is False, match will be ignored.
|
|
||||||
:param children: generates children instead of parent
|
|
||||||
:type children: bool
|
|
||||||
:param every: generates both parent and children.
|
|
||||||
:type every: bool
|
|
||||||
:param private: flag this pattern as beeing private.
|
|
||||||
:type private: bool
|
|
||||||
:param private_parent: force return of parent and flag parent matches as private.
|
|
||||||
:type private_parent: bool
|
|
||||||
:param private_children: force return of children and flag children matches as private.
|
|
||||||
:type private_children: bool
|
|
||||||
:param private_names: force return of named matches as private.
|
|
||||||
:type private_names: bool
|
|
||||||
:param ignore_names: drop some named matches after validation.
|
|
||||||
:type ignore_names: bool
|
|
||||||
:param marker: flag this pattern as beeing a marker.
|
|
||||||
:type private: bool
|
|
||||||
:param format_all if True, pattern will format every match in the hierarchy (even match not yield).
|
|
||||||
:type format_all: bool
|
|
||||||
:param validate_all if True, pattern will validate every match in the hierarchy (even match not yield).
|
|
||||||
:type validate_all: bool
|
|
||||||
:param disabled: if True, this pattern is disabled. Can also be a function(context).
|
|
||||||
:type disabled: bool|function
|
|
||||||
:param log_lvl: Log level associated to this pattern
|
|
||||||
:type log_lvl: int
|
|
||||||
:param post_processor: Post processing function
|
|
||||||
:type post_processor: func
|
|
||||||
:param pre_match_processor: Pre match processing function
|
|
||||||
:type pre_match_processor: func
|
|
||||||
:param post_match_processor: Post match processing function
|
|
||||||
:type post_match_processor: func
|
|
||||||
"""
|
|
||||||
# pylint:disable=too-many-locals,unused-argument
|
|
||||||
self.name = name
|
|
||||||
self.tags = ensure_list(tags)
|
|
||||||
self.formatters, self._default_formatter = ensure_dict(formatter, default_formatter)
|
|
||||||
self.values, self._default_value = ensure_dict(value, None)
|
|
||||||
self.validators, self._default_validator = ensure_dict(validator, allways_true)
|
|
||||||
self.every = every
|
|
||||||
self.children = children
|
|
||||||
self.private = private
|
|
||||||
self.private_names = private_names if private_names else []
|
|
||||||
self.ignore_names = ignore_names if ignore_names else []
|
|
||||||
self.private_parent = private_parent
|
|
||||||
self.private_children = private_children
|
|
||||||
self.marker = marker
|
|
||||||
self.format_all = format_all
|
|
||||||
self.validate_all = validate_all
|
|
||||||
if not callable(disabled):
|
|
||||||
self.disabled = lambda context: disabled
|
|
||||||
else:
|
|
||||||
self.disabled = disabled
|
|
||||||
self._log_level = log_level
|
|
||||||
self._properties = properties
|
|
||||||
self.defined_at = debug.defined_at()
|
|
||||||
if not callable(post_processor):
|
|
||||||
self.post_processor = None
|
|
||||||
else:
|
|
||||||
self.post_processor = post_processor
|
|
||||||
if not callable(pre_match_processor):
|
|
||||||
self.pre_match_processor = None
|
|
||||||
else:
|
|
||||||
self.pre_match_processor = pre_match_processor
|
|
||||||
if not callable(post_match_processor):
|
|
||||||
self.post_match_processor = None
|
|
||||||
else:
|
|
||||||
self.post_match_processor = post_match_processor
|
|
||||||
|
|
||||||
@property
|
|
||||||
def log_level(self):
|
|
||||||
"""
|
|
||||||
Log level for this pattern.
|
|
||||||
:return:
|
|
||||||
:rtype:
|
|
||||||
"""
|
|
||||||
return self._log_level if self._log_level is not None else debug.LOG_LEVEL
|
|
||||||
|
|
||||||
def matches(self, input_string, context=None, with_raw_matches=False):
|
|
||||||
"""
|
|
||||||
Computes all matches for a given input
|
|
||||||
|
|
||||||
:param input_string: the string to parse
|
|
||||||
:type input_string: str
|
|
||||||
:param context: the context
|
|
||||||
:type context: dict
|
|
||||||
:param with_raw_matches: should return details
|
|
||||||
:type with_raw_matches: dict
|
|
||||||
:return: matches based on input_string for this pattern
|
|
||||||
:rtype: iterator[Match]
|
|
||||||
"""
|
|
||||||
# pylint: disable=too-many-branches
|
|
||||||
|
|
||||||
matches = []
|
|
||||||
raw_matches = []
|
|
||||||
|
|
||||||
for pattern in self.patterns:
|
|
||||||
match_index = 0
|
|
||||||
for match in self._match(pattern, input_string, context):
|
|
||||||
raw_matches.append(match)
|
|
||||||
matches.extend(self._process_matches(match, match_index))
|
|
||||||
match_index += 1
|
|
||||||
|
|
||||||
matches = self._post_process_matches(matches)
|
|
||||||
|
|
||||||
if with_raw_matches:
|
|
||||||
return matches, raw_matches
|
|
||||||
return matches
|
|
||||||
|
|
||||||
@property
|
|
||||||
def _should_include_children(self):
|
|
||||||
"""
|
|
||||||
Check if children matches from this pattern should be included in matches results.
|
|
||||||
:param match:
|
|
||||||
:type match:
|
|
||||||
:return:
|
|
||||||
:rtype:
|
|
||||||
"""
|
|
||||||
return self.children or self.every
|
|
||||||
|
|
||||||
@property
|
|
||||||
def _should_include_parent(self):
|
|
||||||
"""
|
|
||||||
Check is a match from this pattern should be included in matches results.
|
|
||||||
:param match:
|
|
||||||
:type match:
|
|
||||||
:return:
|
|
||||||
:rtype:
|
|
||||||
"""
|
|
||||||
return not self.children or self.every
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def _match_config_property_keys(match, child=False):
|
|
||||||
if match.name:
|
|
||||||
yield match.name
|
|
||||||
if child:
|
|
||||||
yield '__children__'
|
|
||||||
else:
|
|
||||||
yield '__parent__'
|
|
||||||
yield None
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def _process_match_index(match, match_index):
|
|
||||||
"""
|
|
||||||
Process match index from this pattern process state.
|
|
||||||
|
|
||||||
:param match:
|
|
||||||
:return:
|
|
||||||
"""
|
|
||||||
match.match_index = match_index
|
|
||||||
|
|
||||||
def _process_match_private(self, match, child=False):
|
|
||||||
"""
|
|
||||||
Process match privacy from this pattern configuration.
|
|
||||||
|
|
||||||
:param match:
|
|
||||||
:param child:
|
|
||||||
:return:
|
|
||||||
"""
|
|
||||||
|
|
||||||
if match.name and match.name in self.private_names or \
|
|
||||||
not child and self.private_parent or \
|
|
||||||
child and self.private_children:
|
|
||||||
match.private = True
|
|
||||||
|
|
||||||
def _process_match_value(self, match, child=False):
|
|
||||||
"""
|
|
||||||
Process match value from this pattern configuration.
|
|
||||||
:param match:
|
|
||||||
:return:
|
|
||||||
"""
|
|
||||||
keys = self._match_config_property_keys(match, child=child)
|
|
||||||
pattern_value = get_first_defined(self.values, keys, self._default_value)
|
|
||||||
if pattern_value:
|
|
||||||
match.value = pattern_value
|
|
||||||
|
|
||||||
def _process_match_formatter(self, match, child=False):
|
|
||||||
"""
|
|
||||||
Process match formatter from this pattern configuration.
|
|
||||||
|
|
||||||
:param match:
|
|
||||||
:return:
|
|
||||||
"""
|
|
||||||
included = self._should_include_children if child else self._should_include_parent
|
|
||||||
if included or self.format_all:
|
|
||||||
keys = self._match_config_property_keys(match, child=child)
|
|
||||||
match.formatter = get_first_defined(self.formatters, keys, self._default_formatter)
|
|
||||||
|
|
||||||
def _process_match_validator(self, match, child=False):
|
|
||||||
"""
|
|
||||||
Process match validation from this pattern configuration.
|
|
||||||
|
|
||||||
:param match:
|
|
||||||
:return: True if match is validated by the configured validator, False otherwise.
|
|
||||||
"""
|
|
||||||
included = self._should_include_children if child else self._should_include_parent
|
|
||||||
if included or self.validate_all:
|
|
||||||
keys = self._match_config_property_keys(match, child=child)
|
|
||||||
validator = get_first_defined(self.validators, keys, self._default_validator)
|
|
||||||
if validator and not validator(match):
|
|
||||||
return False
|
|
||||||
return True
|
|
||||||
|
|
||||||
def _process_match(self, match, match_index, child=False):
|
|
||||||
"""
|
|
||||||
Process match from this pattern by setting all properties from defined configuration
|
|
||||||
(index, private, value, formatter, validator, ...).
|
|
||||||
|
|
||||||
:param match:
|
|
||||||
:type match:
|
|
||||||
:return: True if match is validated by the configured validator, False otherwise.
|
|
||||||
:rtype:
|
|
||||||
"""
|
|
||||||
self._process_match_index(match, match_index)
|
|
||||||
self._process_match_private(match, child)
|
|
||||||
self._process_match_value(match, child)
|
|
||||||
self._process_match_formatter(match, child)
|
|
||||||
return self._process_match_validator(match, child)
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def _process_match_processor(match, processor):
|
|
||||||
if processor:
|
|
||||||
ret = processor(match)
|
|
||||||
if ret is not None:
|
|
||||||
return ret
|
|
||||||
return match
|
|
||||||
|
|
||||||
def _process_matches(self, match, match_index):
|
|
||||||
"""
|
|
||||||
Process and generate all matches for the given unprocessed match.
|
|
||||||
:param match:
|
|
||||||
:param match_index:
|
|
||||||
:return: Process and dispatched matches.
|
|
||||||
"""
|
|
||||||
match = self._process_match_processor(match, self.pre_match_processor)
|
|
||||||
if not match:
|
|
||||||
return
|
|
||||||
|
|
||||||
if not self._process_match(match, match_index):
|
|
||||||
return
|
|
||||||
|
|
||||||
for child in match.children:
|
|
||||||
if not self._process_match(child, match_index, child=True):
|
|
||||||
return
|
|
||||||
|
|
||||||
match = self._process_match_processor(match, self.post_match_processor)
|
|
||||||
if not match:
|
|
||||||
return
|
|
||||||
|
|
||||||
if (self._should_include_parent or self.private_parent) and match.name not in self.ignore_names:
|
|
||||||
yield match
|
|
||||||
if self._should_include_children or self.private_children:
|
|
||||||
children = [x for x in match.children if x.name not in self.ignore_names]
|
|
||||||
for child in children:
|
|
||||||
yield child
|
|
||||||
|
|
||||||
def _post_process_matches(self, matches):
|
|
||||||
"""
|
|
||||||
Post process matches with user defined function
|
|
||||||
:param matches:
|
|
||||||
:type matches:
|
|
||||||
:return:
|
|
||||||
:rtype:
|
|
||||||
"""
|
|
||||||
if self.post_processor:
|
|
||||||
return self.post_processor(matches, self)
|
|
||||||
return matches
|
|
||||||
|
|
||||||
@abstractproperty
|
|
||||||
def patterns(self): # pragma: no cover
|
|
||||||
"""
|
|
||||||
List of base patterns defined
|
|
||||||
|
|
||||||
:return: A list of base patterns
|
|
||||||
:rtype: list
|
|
||||||
"""
|
|
||||||
pass
|
|
||||||
|
|
||||||
@property
|
|
||||||
def properties(self):
|
|
||||||
"""
|
|
||||||
Properties names and values that can ben retrieved by this pattern.
|
|
||||||
:return:
|
|
||||||
:rtype:
|
|
||||||
"""
|
|
||||||
if self._properties:
|
|
||||||
return self._properties
|
|
||||||
return {}
|
|
||||||
|
|
||||||
@abstractproperty
|
|
||||||
def match_options(self): # pragma: no cover
|
|
||||||
"""
|
|
||||||
dict of default options for generated Match objects
|
|
||||||
|
|
||||||
:return: **options to pass to Match constructor
|
|
||||||
:rtype: dict
|
|
||||||
"""
|
|
||||||
pass
|
|
||||||
|
|
||||||
@abstractmethod
|
|
||||||
def _match(self, pattern, input_string, context=None): # pragma: no cover
|
|
||||||
"""
|
|
||||||
Computes all unprocess matches for a given pattern and input.
|
|
||||||
|
|
||||||
:param pattern: the pattern to use
|
|
||||||
:param input_string: the string to parse
|
|
||||||
:type input_string: str
|
|
||||||
:param context: the context
|
|
||||||
:type context: dict
|
|
||||||
:return: matches based on input_string for this pattern
|
|
||||||
:rtype: iterator[Match]
|
|
||||||
"""
|
|
||||||
pass
|
|
||||||
|
|
||||||
def __repr__(self):
|
|
||||||
defined = ""
|
|
||||||
if self.defined_at:
|
|
||||||
defined = "@%s" % (self.defined_at,)
|
|
||||||
return "<%s%s:%s>" % (self.__class__.__name__, defined, self.__repr__patterns__)
|
|
||||||
|
|
||||||
@property
|
|
||||||
def __repr__patterns__(self):
|
|
||||||
return self.patterns
|
|
||||||
|
|
||||||
|
|
||||||
class StringPattern(Pattern):
|
|
||||||
"""
|
|
||||||
Definition of one or many strings to search for.
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self, *patterns, **kwargs):
|
|
||||||
super(StringPattern, self).__init__(**kwargs)
|
|
||||||
self._patterns = patterns
|
|
||||||
self._kwargs = kwargs
|
|
||||||
self._match_kwargs = filter_match_kwargs(kwargs)
|
|
||||||
|
|
||||||
@property
|
|
||||||
def patterns(self):
|
|
||||||
return self._patterns
|
|
||||||
|
|
||||||
@property
|
|
||||||
def match_options(self):
|
|
||||||
return self._match_kwargs
|
|
||||||
|
|
||||||
def _match(self, pattern, input_string, context=None):
|
|
||||||
for index in find_all(input_string, pattern, **self._kwargs):
|
|
||||||
match = Match(index, index + len(pattern), pattern=self, input_string=input_string, **self._match_kwargs)
|
|
||||||
if match:
|
|
||||||
yield match
|
|
||||||
|
|
||||||
|
|
||||||
class RePattern(Pattern):
|
|
||||||
"""
|
|
||||||
Definition of one or many regular expression pattern to search for.
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self, *patterns, **kwargs):
|
|
||||||
super(RePattern, self).__init__(**kwargs)
|
|
||||||
self.repeated_captures = REGEX_AVAILABLE
|
|
||||||
if 'repeated_captures' in kwargs:
|
|
||||||
self.repeated_captures = kwargs.get('repeated_captures')
|
|
||||||
if self.repeated_captures and not REGEX_AVAILABLE: # pragma: no cover
|
|
||||||
raise NotImplementedError("repeated_capture is available only with regex module.")
|
|
||||||
self.abbreviations = kwargs.get('abbreviations', [])
|
|
||||||
self._kwargs = kwargs
|
|
||||||
self._match_kwargs = filter_match_kwargs(kwargs)
|
|
||||||
self._children_match_kwargs = filter_match_kwargs(kwargs, children=True)
|
|
||||||
self._patterns = []
|
|
||||||
for pattern in patterns:
|
|
||||||
if isinstance(pattern, six.string_types):
|
|
||||||
if self.abbreviations and pattern:
|
|
||||||
for key, replacement in self.abbreviations:
|
|
||||||
pattern = pattern.replace(key, replacement)
|
|
||||||
pattern = call(re.compile, pattern, **self._kwargs)
|
|
||||||
elif isinstance(pattern, dict):
|
|
||||||
if self.abbreviations and 'pattern' in pattern:
|
|
||||||
for key, replacement in self.abbreviations:
|
|
||||||
pattern['pattern'] = pattern['pattern'].replace(key, replacement)
|
|
||||||
pattern = re.compile(**pattern)
|
|
||||||
elif hasattr(pattern, '__iter__'):
|
|
||||||
pattern = re.compile(*pattern)
|
|
||||||
self._patterns.append(pattern)
|
|
||||||
|
|
||||||
@property
|
|
||||||
def patterns(self):
|
|
||||||
return self._patterns
|
|
||||||
|
|
||||||
@property
|
|
||||||
def __repr__patterns__(self):
|
|
||||||
return [pattern.pattern for pattern in self.patterns]
|
|
||||||
|
|
||||||
@property
|
|
||||||
def match_options(self):
|
|
||||||
return self._match_kwargs
|
|
||||||
|
|
||||||
def _match(self, pattern, input_string, context=None):
|
|
||||||
names = dict((v, k) for k, v in pattern.groupindex.items())
|
|
||||||
for match_object in pattern.finditer(input_string):
|
|
||||||
start = match_object.start()
|
|
||||||
end = match_object.end()
|
|
||||||
main_match = Match(start, end, pattern=self, input_string=input_string, **self._match_kwargs)
|
|
||||||
|
|
||||||
if pattern.groups:
|
|
||||||
for i in range(1, pattern.groups + 1):
|
|
||||||
name = names.get(i, main_match.name)
|
|
||||||
if self.repeated_captures:
|
|
||||||
for start, end in match_object.spans(i):
|
|
||||||
child_match = Match(start, end, name=name, parent=main_match, pattern=self,
|
|
||||||
input_string=input_string, **self._children_match_kwargs)
|
|
||||||
if child_match:
|
|
||||||
main_match.children.append(child_match)
|
|
||||||
else:
|
|
||||||
start, end = match_object.span(i)
|
|
||||||
if start > -1 and end > -1:
|
|
||||||
child_match = Match(start, end, name=name, parent=main_match, pattern=self,
|
|
||||||
input_string=input_string, **self._children_match_kwargs)
|
|
||||||
if child_match:
|
|
||||||
main_match.children.append(child_match)
|
|
||||||
|
|
||||||
if main_match:
|
|
||||||
yield main_match
|
|
||||||
|
|
||||||
|
|
||||||
class FunctionalPattern(Pattern):
|
|
||||||
"""
|
|
||||||
Definition of one or many functional pattern to search for.
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self, *patterns, **kwargs):
|
|
||||||
super(FunctionalPattern, self).__init__(**kwargs)
|
|
||||||
self._patterns = patterns
|
|
||||||
self._kwargs = kwargs
|
|
||||||
self._match_kwargs = filter_match_kwargs(kwargs)
|
|
||||||
|
|
||||||
@property
|
|
||||||
def patterns(self):
|
|
||||||
return self._patterns
|
|
||||||
|
|
||||||
@property
|
|
||||||
def match_options(self):
|
|
||||||
return self._match_kwargs
|
|
||||||
|
|
||||||
def _match(self, pattern, input_string, context=None):
|
|
||||||
ret = call(pattern, input_string, context, **self._kwargs)
|
|
||||||
if ret:
|
|
||||||
if not is_iterable(ret) or isinstance(ret, dict) \
|
|
||||||
or (is_iterable(ret) and hasattr(ret, '__getitem__') and isinstance(ret[0], int)):
|
|
||||||
args_iterable = [ret]
|
|
||||||
else:
|
|
||||||
args_iterable = ret
|
|
||||||
for args in args_iterable:
|
|
||||||
if isinstance(args, dict):
|
|
||||||
options = args
|
|
||||||
options.pop('input_string', None)
|
|
||||||
options.pop('pattern', None)
|
|
||||||
if self._match_kwargs:
|
|
||||||
options = self._match_kwargs.copy()
|
|
||||||
options.update(args)
|
|
||||||
match = Match(pattern=self, input_string=input_string, **options)
|
|
||||||
if match:
|
|
||||||
yield match
|
|
||||||
else:
|
|
||||||
kwargs = self._match_kwargs
|
|
||||||
if isinstance(args[-1], dict):
|
|
||||||
kwargs = dict(kwargs)
|
|
||||||
kwargs.update(args[-1])
|
|
||||||
args = args[:-1]
|
|
||||||
match = Match(*args, pattern=self, input_string=input_string, **kwargs)
|
|
||||||
if match:
|
|
||||||
yield match
|
|
||||||
|
|
||||||
|
|
||||||
def filter_match_kwargs(kwargs, children=False):
|
|
||||||
"""
|
|
||||||
Filters out kwargs for Match construction
|
|
||||||
|
|
||||||
:param kwargs:
|
|
||||||
:type kwargs: dict
|
|
||||||
:param children:
|
|
||||||
:type children: Flag to filter children matches
|
|
||||||
:return: A filtered dict
|
|
||||||
:rtype: dict
|
|
||||||
"""
|
|
||||||
kwargs = kwargs.copy()
|
|
||||||
for key in ('pattern', 'start', 'end', 'parent', 'formatter', 'value'):
|
|
||||||
if key in kwargs:
|
|
||||||
del kwargs[key]
|
|
||||||
if children:
|
|
||||||
for key in ('name',):
|
|
||||||
if key in kwargs:
|
|
||||||
del kwargs[key]
|
|
||||||
return kwargs
|
|
||||||
@@ -1,107 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
"""
|
|
||||||
Processor functions
|
|
||||||
"""
|
|
||||||
from logging import getLogger
|
|
||||||
|
|
||||||
from .utils import IdentitySet
|
|
||||||
|
|
||||||
from .rules import Rule, RemoveMatch
|
|
||||||
|
|
||||||
log = getLogger(__name__).log
|
|
||||||
|
|
||||||
DEFAULT = '__default__'
|
|
||||||
|
|
||||||
POST_PROCESS = -2048
|
|
||||||
PRE_PROCESS = 2048
|
|
||||||
|
|
||||||
|
|
||||||
def _default_conflict_solver(match, conflicting_match):
|
|
||||||
"""
|
|
||||||
Default conflict solver for matches, shorter matches if they conflicts with longer ones
|
|
||||||
|
|
||||||
:param conflicting_match:
|
|
||||||
:type conflicting_match:
|
|
||||||
:param match:
|
|
||||||
:type match:
|
|
||||||
:return:
|
|
||||||
:rtype:
|
|
||||||
"""
|
|
||||||
if len(conflicting_match.initiator) < len(match.initiator):
|
|
||||||
return conflicting_match
|
|
||||||
if len(match.initiator) < len(conflicting_match.initiator):
|
|
||||||
return match
|
|
||||||
return None
|
|
||||||
|
|
||||||
|
|
||||||
class ConflictSolver(Rule):
|
|
||||||
"""
|
|
||||||
Remove conflicting matches.
|
|
||||||
"""
|
|
||||||
priority = PRE_PROCESS
|
|
||||||
|
|
||||||
consequence = RemoveMatch
|
|
||||||
|
|
||||||
@property
|
|
||||||
def default_conflict_solver(self): # pylint:disable=no-self-use
|
|
||||||
"""
|
|
||||||
Default conflict solver to use.
|
|
||||||
"""
|
|
||||||
return _default_conflict_solver
|
|
||||||
|
|
||||||
def when(self, matches, context):
|
|
||||||
# pylint:disable=too-many-nested-blocks
|
|
||||||
to_remove_matches = IdentitySet()
|
|
||||||
|
|
||||||
public_matches = [match for match in matches if not match.private]
|
|
||||||
public_matches.sort(key=len)
|
|
||||||
|
|
||||||
for match in public_matches:
|
|
||||||
conflicting_matches = matches.conflicting(match)
|
|
||||||
|
|
||||||
if conflicting_matches:
|
|
||||||
# keep the match only if it's the longest
|
|
||||||
conflicting_matches = [conflicting_match for conflicting_match in conflicting_matches if
|
|
||||||
not conflicting_match.private]
|
|
||||||
conflicting_matches.sort(key=len)
|
|
||||||
|
|
||||||
for conflicting_match in conflicting_matches:
|
|
||||||
conflict_solvers = [(self.default_conflict_solver, False)]
|
|
||||||
|
|
||||||
if match.conflict_solver:
|
|
||||||
conflict_solvers.append((match.conflict_solver, False))
|
|
||||||
if conflicting_match.conflict_solver:
|
|
||||||
conflict_solvers.append((conflicting_match.conflict_solver, True))
|
|
||||||
|
|
||||||
for conflict_solver, reverse in reversed(conflict_solvers):
|
|
||||||
if reverse:
|
|
||||||
to_remove = conflict_solver(conflicting_match, match)
|
|
||||||
else:
|
|
||||||
to_remove = conflict_solver(match, conflicting_match)
|
|
||||||
if to_remove == DEFAULT:
|
|
||||||
continue
|
|
||||||
if to_remove and to_remove not in to_remove_matches:
|
|
||||||
both_matches = [match, conflicting_match]
|
|
||||||
both_matches.remove(to_remove)
|
|
||||||
to_keep = both_matches[0]
|
|
||||||
|
|
||||||
if to_keep not in to_remove_matches:
|
|
||||||
log(self.log_level, "Conflicting match %s will be removed in favor of match %s",
|
|
||||||
to_remove, to_keep)
|
|
||||||
|
|
||||||
to_remove_matches.add(to_remove)
|
|
||||||
break
|
|
||||||
return to_remove_matches
|
|
||||||
|
|
||||||
|
|
||||||
class PrivateRemover(Rule):
|
|
||||||
"""
|
|
||||||
Removes private matches rule.
|
|
||||||
"""
|
|
||||||
priority = POST_PROCESS
|
|
||||||
|
|
||||||
consequence = RemoveMatch
|
|
||||||
|
|
||||||
def when(self, matches, context):
|
|
||||||
return [match for match in matches if match.private]
|
|
||||||
@@ -1,190 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
"""
|
|
||||||
Entry point functions and classes for Rebulk
|
|
||||||
"""
|
|
||||||
from logging import getLogger
|
|
||||||
|
|
||||||
from .builder import Builder
|
|
||||||
from .match import Matches
|
|
||||||
from .processors import ConflictSolver, PrivateRemover
|
|
||||||
from .rules import Rules
|
|
||||||
from .utils import extend_safe
|
|
||||||
|
|
||||||
log = getLogger(__name__).log
|
|
||||||
|
|
||||||
|
|
||||||
class Rebulk(Builder):
|
|
||||||
r"""
|
|
||||||
Regular expression, string and function based patterns are declared in a ``Rebulk`` object. It use a fluent API to
|
|
||||||
chain ``string``, ``regex``, and ``functional`` methods to define various patterns types.
|
|
||||||
|
|
||||||
.. code-block:: python
|
|
||||||
|
|
||||||
>>> from rebulk import Rebulk
|
|
||||||
>>> bulk = Rebulk().string('brown').regex(r'qu\w+').functional(lambda s: (20, 25))
|
|
||||||
|
|
||||||
When ``Rebulk`` object is fully configured, you can call ``matches`` method with an input string to retrieve all
|
|
||||||
``Match`` objects found by registered pattern.
|
|
||||||
|
|
||||||
.. code-block:: python
|
|
||||||
|
|
||||||
>>> bulk.matches("The quick brown fox jumps over the lazy dog")
|
|
||||||
[<brown:(10, 15)>, <quick:(4, 9)>, <jumps:(20, 25)>]
|
|
||||||
|
|
||||||
If multiple ``Match`` objects are found at the same position, only the longer one is kept.
|
|
||||||
|
|
||||||
.. code-block:: python
|
|
||||||
|
|
||||||
>>> bulk = Rebulk().string('lakers').string('la')
|
|
||||||
>>> bulk.matches("the lakers are from la")
|
|
||||||
[<lakers:(4, 10)>, <la:(20, 22)>]
|
|
||||||
"""
|
|
||||||
|
|
||||||
# pylint:disable=protected-access
|
|
||||||
|
|
||||||
def __init__(self, disabled=lambda context: False, default_rules=True):
|
|
||||||
"""
|
|
||||||
Creates a new Rebulk object.
|
|
||||||
:param disabled: if True, this pattern is disabled. Can also be a function(context).
|
|
||||||
:type disabled: bool|function
|
|
||||||
:param default_rules: use default rules
|
|
||||||
:type default_rules:
|
|
||||||
:return:
|
|
||||||
:rtype:
|
|
||||||
"""
|
|
||||||
super(Rebulk, self).__init__()
|
|
||||||
if not callable(disabled):
|
|
||||||
self.disabled = lambda context: disabled
|
|
||||||
else:
|
|
||||||
self.disabled = disabled
|
|
||||||
self._patterns = []
|
|
||||||
self._rules = Rules()
|
|
||||||
if default_rules:
|
|
||||||
self.rules(ConflictSolver, PrivateRemover)
|
|
||||||
self._rebulks = []
|
|
||||||
|
|
||||||
def pattern(self, *pattern):
|
|
||||||
"""
|
|
||||||
Add patterns objects
|
|
||||||
|
|
||||||
:param pattern:
|
|
||||||
:type pattern: rebulk.pattern.Pattern
|
|
||||||
:return: self
|
|
||||||
:rtype: Rebulk
|
|
||||||
"""
|
|
||||||
self._patterns.extend(pattern)
|
|
||||||
return self
|
|
||||||
|
|
||||||
def rules(self, *rules):
|
|
||||||
"""
|
|
||||||
Add rules as a module, class or instance.
|
|
||||||
:param rules:
|
|
||||||
:type rules: list[Rule]
|
|
||||||
:return:
|
|
||||||
"""
|
|
||||||
self._rules.load(*rules)
|
|
||||||
return self
|
|
||||||
|
|
||||||
def rebulk(self, *rebulks):
|
|
||||||
"""
|
|
||||||
Add a children rebulk object
|
|
||||||
:param rebulks:
|
|
||||||
:type rebulks: Rebulk
|
|
||||||
:return:
|
|
||||||
"""
|
|
||||||
self._rebulks.extend(rebulks)
|
|
||||||
return self
|
|
||||||
|
|
||||||
def matches(self, string, context=None):
|
|
||||||
"""
|
|
||||||
Search for all matches with current configuration against input_string
|
|
||||||
:param string: string to search into
|
|
||||||
:type string: str
|
|
||||||
:param context: context to use
|
|
||||||
:type context: dict
|
|
||||||
:return: A custom list of matches
|
|
||||||
:rtype: Matches
|
|
||||||
"""
|
|
||||||
matches = Matches(input_string=string)
|
|
||||||
if context is None:
|
|
||||||
context = {}
|
|
||||||
|
|
||||||
self._matches_patterns(matches, context)
|
|
||||||
|
|
||||||
self._execute_rules(matches, context)
|
|
||||||
|
|
||||||
return matches
|
|
||||||
|
|
||||||
def effective_rules(self, context=None):
|
|
||||||
"""
|
|
||||||
Get effective rules for this rebulk object and its children.
|
|
||||||
:param context:
|
|
||||||
:type context:
|
|
||||||
:return:
|
|
||||||
:rtype:
|
|
||||||
"""
|
|
||||||
rules = Rules()
|
|
||||||
rules.extend(self._rules)
|
|
||||||
for rebulk in self._rebulks:
|
|
||||||
if not rebulk.disabled(context):
|
|
||||||
extend_safe(rules, rebulk._rules)
|
|
||||||
return rules
|
|
||||||
|
|
||||||
def _execute_rules(self, matches, context):
|
|
||||||
"""
|
|
||||||
Execute rules for this rebulk and children.
|
|
||||||
:param matches:
|
|
||||||
:type matches:
|
|
||||||
:param context:
|
|
||||||
:type context:
|
|
||||||
:return:
|
|
||||||
:rtype:
|
|
||||||
"""
|
|
||||||
if not self.disabled(context):
|
|
||||||
rules = self.effective_rules(context)
|
|
||||||
rules.execute_all_rules(matches, context)
|
|
||||||
|
|
||||||
def effective_patterns(self, context=None):
|
|
||||||
"""
|
|
||||||
Get effective patterns for this rebulk object and its children.
|
|
||||||
:param context:
|
|
||||||
:type context:
|
|
||||||
:return:
|
|
||||||
:rtype:
|
|
||||||
"""
|
|
||||||
patterns = list(self._patterns)
|
|
||||||
for rebulk in self._rebulks:
|
|
||||||
if not rebulk.disabled(context):
|
|
||||||
extend_safe(patterns, rebulk._patterns)
|
|
||||||
return patterns
|
|
||||||
|
|
||||||
def _matches_patterns(self, matches, context):
|
|
||||||
"""
|
|
||||||
Search for all matches with current paterns agains input_string
|
|
||||||
:param matches: matches list
|
|
||||||
:type matches: Matches
|
|
||||||
:param context: context to use
|
|
||||||
:type context: dict
|
|
||||||
:return:
|
|
||||||
:rtype:
|
|
||||||
"""
|
|
||||||
if not self.disabled(context):
|
|
||||||
patterns = self.effective_patterns(context)
|
|
||||||
for pattern in patterns:
|
|
||||||
if not pattern.disabled(context):
|
|
||||||
pattern_matches = pattern.matches(matches.input_string, context)
|
|
||||||
if pattern_matches:
|
|
||||||
log(pattern.log_level, "Pattern has %s match(es). (%s)", len(pattern_matches), pattern)
|
|
||||||
else:
|
|
||||||
pass
|
|
||||||
# log(pattern.log_level, "Pattern doesn't match. (%s)" % (pattern,))
|
|
||||||
for match in pattern_matches:
|
|
||||||
if match.marker:
|
|
||||||
log(pattern.log_level, "Marker found. (%s)", match)
|
|
||||||
matches.markers.append(match)
|
|
||||||
else:
|
|
||||||
log(pattern.log_level, "Match found. (%s)", match)
|
|
||||||
matches.append(match)
|
|
||||||
else:
|
|
||||||
log(pattern.log_level, "Pattern is disabled. (%s)", pattern)
|
|
||||||
@@ -1,17 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
"""
|
|
||||||
Uniform re module
|
|
||||||
"""
|
|
||||||
# pylint: disable-all
|
|
||||||
import os
|
|
||||||
|
|
||||||
REGEX_AVAILABLE = False
|
|
||||||
if os.environ.get('REGEX_DISABLED') in ["1", "true", "True", "Y"]:
|
|
||||||
import re
|
|
||||||
else:
|
|
||||||
try:
|
|
||||||
import regex as re
|
|
||||||
REGEX_AVAILABLE = True
|
|
||||||
except ImportError:
|
|
||||||
import re
|
|
||||||
@@ -1,373 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
"""
|
|
||||||
Abstract rule class definition and rule engine implementation
|
|
||||||
"""
|
|
||||||
from abc import ABCMeta, abstractmethod
|
|
||||||
import inspect
|
|
||||||
from itertools import groupby
|
|
||||||
from logging import getLogger
|
|
||||||
|
|
||||||
import six
|
|
||||||
from .utils import is_iterable
|
|
||||||
|
|
||||||
from .toposort import toposort
|
|
||||||
|
|
||||||
from . import debug
|
|
||||||
|
|
||||||
log = getLogger(__name__).log
|
|
||||||
|
|
||||||
|
|
||||||
@six.add_metaclass(ABCMeta)
|
|
||||||
class Consequence(object):
|
|
||||||
"""
|
|
||||||
Definition of a consequence to apply.
|
|
||||||
"""
|
|
||||||
@abstractmethod
|
|
||||||
def then(self, matches, when_response, context): # pragma: no cover
|
|
||||||
"""
|
|
||||||
Action implementation.
|
|
||||||
|
|
||||||
:param matches:
|
|
||||||
:type matches: rebulk.match.Matches
|
|
||||||
:param context:
|
|
||||||
:type context:
|
|
||||||
:param when_response: return object from when call.
|
|
||||||
:type when_response: object
|
|
||||||
:return: True if the action was runned, False if it wasn't.
|
|
||||||
:rtype: bool
|
|
||||||
"""
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
@six.add_metaclass(ABCMeta)
|
|
||||||
class Condition(object):
|
|
||||||
"""
|
|
||||||
Definition of a condition to check.
|
|
||||||
"""
|
|
||||||
@abstractmethod
|
|
||||||
def when(self, matches, context): # pragma: no cover
|
|
||||||
"""
|
|
||||||
Condition implementation.
|
|
||||||
|
|
||||||
:param matches:
|
|
||||||
:type matches: rebulk.match.Matches
|
|
||||||
:param context:
|
|
||||||
:type context:
|
|
||||||
:return: truthy if rule should be triggered and execute then action, falsy if it should not.
|
|
||||||
:rtype: object
|
|
||||||
"""
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
@six.add_metaclass(ABCMeta)
|
|
||||||
class CustomRule(Condition, Consequence):
|
|
||||||
"""
|
|
||||||
Definition of a rule to apply
|
|
||||||
"""
|
|
||||||
# pylint: disable=no-self-use, unused-argument, abstract-method
|
|
||||||
priority = 0
|
|
||||||
name = None
|
|
||||||
dependency = None
|
|
||||||
properties = {}
|
|
||||||
|
|
||||||
def __init__(self, log_level=None):
|
|
||||||
self.defined_at = debug.defined_at()
|
|
||||||
if log_level is None and not hasattr(self, 'log_level'):
|
|
||||||
self.log_level = debug.LOG_LEVEL
|
|
||||||
|
|
||||||
def enabled(self, context):
|
|
||||||
"""
|
|
||||||
Disable rule.
|
|
||||||
|
|
||||||
:param context:
|
|
||||||
:type context:
|
|
||||||
:return: True if rule is enabled, False if disabled
|
|
||||||
:rtype: bool
|
|
||||||
"""
|
|
||||||
return True
|
|
||||||
|
|
||||||
def __lt__(self, other):
|
|
||||||
return self.priority > other.priority
|
|
||||||
|
|
||||||
def __repr__(self):
|
|
||||||
defined = ""
|
|
||||||
if self.defined_at:
|
|
||||||
defined = "@%s" % (self.defined_at,)
|
|
||||||
return "<%s%s>" % (self.name if self.name else self.__class__.__name__, defined)
|
|
||||||
|
|
||||||
def __eq__(self, other):
|
|
||||||
return self.__class__ == other.__class__
|
|
||||||
|
|
||||||
def __hash__(self):
|
|
||||||
return hash(self.__class__)
|
|
||||||
|
|
||||||
|
|
||||||
class Rule(CustomRule):
|
|
||||||
"""
|
|
||||||
Definition of a rule to apply
|
|
||||||
"""
|
|
||||||
# pylint:disable=abstract-method
|
|
||||||
consequence = None
|
|
||||||
|
|
||||||
def then(self, matches, when_response, context):
|
|
||||||
assert self.consequence
|
|
||||||
if is_iterable(self.consequence):
|
|
||||||
if not is_iterable(when_response):
|
|
||||||
when_response = [when_response]
|
|
||||||
iterator = iter(when_response)
|
|
||||||
for cons in self.consequence: #pylint: disable=not-an-iterable
|
|
||||||
if inspect.isclass(cons):
|
|
||||||
cons = cons()
|
|
||||||
cons.then(matches, next(iterator), context)
|
|
||||||
else:
|
|
||||||
cons = self.consequence
|
|
||||||
if inspect.isclass(cons):
|
|
||||||
cons = cons() # pylint:disable=not-callable
|
|
||||||
cons.then(matches, when_response, context)
|
|
||||||
|
|
||||||
|
|
||||||
class RemoveMatch(Consequence): # pylint: disable=abstract-method
|
|
||||||
"""
|
|
||||||
Remove matches returned by then
|
|
||||||
"""
|
|
||||||
def then(self, matches, when_response, context):
|
|
||||||
if is_iterable(when_response):
|
|
||||||
ret = []
|
|
||||||
when_response = list(when_response)
|
|
||||||
for match in when_response:
|
|
||||||
if match in matches:
|
|
||||||
matches.remove(match)
|
|
||||||
ret.append(match)
|
|
||||||
return ret
|
|
||||||
if when_response in matches:
|
|
||||||
matches.remove(when_response)
|
|
||||||
return when_response
|
|
||||||
|
|
||||||
|
|
||||||
class AppendMatch(Consequence): # pylint: disable=abstract-method
|
|
||||||
"""
|
|
||||||
Append matches returned by then
|
|
||||||
"""
|
|
||||||
def __init__(self, match_name=None):
|
|
||||||
self.match_name = match_name
|
|
||||||
|
|
||||||
def then(self, matches, when_response, context):
|
|
||||||
if is_iterable(when_response):
|
|
||||||
ret = []
|
|
||||||
when_response = list(when_response)
|
|
||||||
for match in when_response:
|
|
||||||
if match not in matches:
|
|
||||||
if self.match_name:
|
|
||||||
match.name = self.match_name
|
|
||||||
matches.append(match)
|
|
||||||
ret.append(match)
|
|
||||||
return ret
|
|
||||||
if self.match_name:
|
|
||||||
when_response.name = self.match_name
|
|
||||||
if when_response not in matches:
|
|
||||||
matches.append(when_response)
|
|
||||||
return when_response
|
|
||||||
|
|
||||||
|
|
||||||
class RenameMatch(Consequence): # pylint: disable=abstract-method
|
|
||||||
"""
|
|
||||||
Rename matches returned by then
|
|
||||||
"""
|
|
||||||
def __init__(self, match_name):
|
|
||||||
self.match_name = match_name
|
|
||||||
self.remove = RemoveMatch()
|
|
||||||
self.append = AppendMatch()
|
|
||||||
|
|
||||||
def then(self, matches, when_response, context):
|
|
||||||
removed = self.remove.then(matches, when_response, context)
|
|
||||||
if is_iterable(removed):
|
|
||||||
removed = list(removed)
|
|
||||||
for match in removed:
|
|
||||||
match.name = self.match_name
|
|
||||||
elif removed:
|
|
||||||
removed.name = self.match_name
|
|
||||||
if removed:
|
|
||||||
self.append.then(matches, removed, context)
|
|
||||||
|
|
||||||
|
|
||||||
class AppendTags(Consequence): # pylint: disable=abstract-method
|
|
||||||
"""
|
|
||||||
Add tags to returned matches
|
|
||||||
"""
|
|
||||||
def __init__(self, tags):
|
|
||||||
self.tags = tags
|
|
||||||
self.remove = RemoveMatch()
|
|
||||||
self.append = AppendMatch()
|
|
||||||
|
|
||||||
def then(self, matches, when_response, context):
|
|
||||||
removed = self.remove.then(matches, when_response, context)
|
|
||||||
if is_iterable(removed):
|
|
||||||
removed = list(removed)
|
|
||||||
for match in removed:
|
|
||||||
match.tags.extend(self.tags)
|
|
||||||
elif removed:
|
|
||||||
removed.tags.extend(self.tags) # pylint: disable=no-member
|
|
||||||
if removed:
|
|
||||||
self.append.then(matches, removed, context)
|
|
||||||
|
|
||||||
|
|
||||||
class RemoveTags(Consequence): # pylint: disable=abstract-method
|
|
||||||
"""
|
|
||||||
Remove tags from returned matches
|
|
||||||
"""
|
|
||||||
def __init__(self, tags):
|
|
||||||
self.tags = tags
|
|
||||||
self.remove = RemoveMatch()
|
|
||||||
self.append = AppendMatch()
|
|
||||||
|
|
||||||
def then(self, matches, when_response, context):
|
|
||||||
removed = self.remove.then(matches, when_response, context)
|
|
||||||
if is_iterable(removed):
|
|
||||||
removed = list(removed)
|
|
||||||
for match in removed:
|
|
||||||
for tag in self.tags:
|
|
||||||
if tag in match.tags:
|
|
||||||
match.tags.remove(tag)
|
|
||||||
elif removed:
|
|
||||||
for tag in self.tags:
|
|
||||||
if tag in removed.tags: # pylint: disable=no-member
|
|
||||||
removed.tags.remove(tag) # pylint: disable=no-member
|
|
||||||
if removed:
|
|
||||||
self.append.then(matches, removed, context)
|
|
||||||
|
|
||||||
|
|
||||||
class Rules(list):
|
|
||||||
"""
|
|
||||||
list of rules ready to execute.
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self, *rules):
|
|
||||||
super(Rules, self).__init__()
|
|
||||||
self.load(*rules)
|
|
||||||
|
|
||||||
def load(self, *rules):
|
|
||||||
"""
|
|
||||||
Load rules from a Rule module, class or instance
|
|
||||||
|
|
||||||
:param rules:
|
|
||||||
:type rules:
|
|
||||||
:return:
|
|
||||||
:rtype:
|
|
||||||
"""
|
|
||||||
for rule in rules:
|
|
||||||
if inspect.ismodule(rule):
|
|
||||||
self.load_module(rule)
|
|
||||||
elif inspect.isclass(rule):
|
|
||||||
self.load_class(rule)
|
|
||||||
else:
|
|
||||||
self.append(rule)
|
|
||||||
|
|
||||||
def load_module(self, module):
|
|
||||||
"""
|
|
||||||
Load a rules module
|
|
||||||
|
|
||||||
:param module:
|
|
||||||
:type module:
|
|
||||||
:return:
|
|
||||||
:rtype:
|
|
||||||
"""
|
|
||||||
# pylint: disable=unused-variable
|
|
||||||
for name, obj in inspect.getmembers(module,
|
|
||||||
lambda member: hasattr(member, '__module__')
|
|
||||||
and member.__module__ == module.__name__
|
|
||||||
and inspect.isclass):
|
|
||||||
self.load_class(obj)
|
|
||||||
|
|
||||||
def load_class(self, class_):
|
|
||||||
"""
|
|
||||||
Load a Rule class.
|
|
||||||
|
|
||||||
:param class_:
|
|
||||||
:type class_:
|
|
||||||
:return:
|
|
||||||
:rtype:
|
|
||||||
"""
|
|
||||||
self.append(class_())
|
|
||||||
|
|
||||||
def execute_all_rules(self, matches, context):
|
|
||||||
"""
|
|
||||||
Execute all rules from this rules list. All when condition with same priority will be performed before
|
|
||||||
calling then actions.
|
|
||||||
|
|
||||||
:param matches:
|
|
||||||
:type matches:
|
|
||||||
:param context:
|
|
||||||
:type context:
|
|
||||||
:return:
|
|
||||||
:rtype:
|
|
||||||
"""
|
|
||||||
ret = []
|
|
||||||
for priority, priority_rules in groupby(sorted(self), lambda rule: rule.priority):
|
|
||||||
sorted_rules = toposort_rules(list(priority_rules)) # Group by dependency graph toposort
|
|
||||||
for rules_group in sorted_rules:
|
|
||||||
rules_group = list(sorted(rules_group, key=self.index)) # Sort rules group based on initial ordering.
|
|
||||||
group_log_level = None
|
|
||||||
for rule in rules_group:
|
|
||||||
if group_log_level is None or group_log_level < rule.log_level:
|
|
||||||
group_log_level = rule.log_level
|
|
||||||
log(group_log_level, "%s independent rule(s) at priority %s.", len(rules_group), priority)
|
|
||||||
for rule in rules_group:
|
|
||||||
when_response = execute_rule(rule, matches, context)
|
|
||||||
if when_response is not None:
|
|
||||||
ret.append((rule, when_response))
|
|
||||||
|
|
||||||
return ret
|
|
||||||
|
|
||||||
|
|
||||||
def execute_rule(rule, matches, context):
|
|
||||||
"""
|
|
||||||
Execute the given rule.
|
|
||||||
:param rule:
|
|
||||||
:type rule:
|
|
||||||
:param matches:
|
|
||||||
:type matches:
|
|
||||||
:param context:
|
|
||||||
:type context:
|
|
||||||
:return:
|
|
||||||
:rtype:
|
|
||||||
"""
|
|
||||||
if rule.enabled(context):
|
|
||||||
log(rule.log_level, "Checking rule condition: %s", rule)
|
|
||||||
when_response = rule.when(matches, context)
|
|
||||||
if when_response:
|
|
||||||
log(rule.log_level, "Rule was triggered: %s", when_response)
|
|
||||||
log(rule.log_level, "Running rule consequence: %s %s", rule, when_response)
|
|
||||||
rule.then(matches, when_response, context)
|
|
||||||
return when_response
|
|
||||||
else:
|
|
||||||
log(rule.log_level, "Rule is disabled: %s", rule)
|
|
||||||
|
|
||||||
def toposort_rules(rules):
|
|
||||||
"""
|
|
||||||
Sort given rules using toposort with dependency parameter.
|
|
||||||
:param rules:
|
|
||||||
:type rules:
|
|
||||||
:return:
|
|
||||||
:rtype:
|
|
||||||
"""
|
|
||||||
graph = {}
|
|
||||||
class_dict = {}
|
|
||||||
for rule in rules:
|
|
||||||
if rule.__class__ in class_dict:
|
|
||||||
raise ValueError("Duplicate class rules are not allowed: %s" % rule.__class__)
|
|
||||||
class_dict[rule.__class__] = rule
|
|
||||||
for rule in rules:
|
|
||||||
if not is_iterable(rule.dependency) and rule.dependency:
|
|
||||||
rule_dependencies = [rule.dependency]
|
|
||||||
else:
|
|
||||||
rule_dependencies = rule.dependency
|
|
||||||
dependencies = set()
|
|
||||||
if rule_dependencies:
|
|
||||||
for dependency in rule_dependencies:
|
|
||||||
if inspect.isclass(dependency):
|
|
||||||
dependency = class_dict.get(dependency)
|
|
||||||
if dependency:
|
|
||||||
dependencies.add(dependency)
|
|
||||||
graph[rule] = dependencies
|
|
||||||
return toposort(graph)
|
|
||||||
@@ -1,84 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
# Copyright 2014 True Blade Systems, Inc.
|
|
||||||
#
|
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
# you may not use this file except in compliance with the License.
|
|
||||||
# You may obtain a copy of the License at
|
|
||||||
#
|
|
||||||
# http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
#
|
|
||||||
# Original:
|
|
||||||
# - https://bitbucket.org/ericvsmith/toposort (1.4)
|
|
||||||
# Modifications:
|
|
||||||
# - merged Pull request #2 for CyclicDependency error
|
|
||||||
# - import reduce as original name
|
|
||||||
# - support python 2.6 dict comprehension
|
|
||||||
|
|
||||||
# pylint: skip-file
|
|
||||||
from functools import reduce
|
|
||||||
|
|
||||||
|
|
||||||
class CyclicDependency(ValueError):
|
|
||||||
def __init__(self, cyclic):
|
|
||||||
s = 'Cyclic dependencies exist among these items: {0}'.format(', '.join(repr(x) for x in cyclic.items()))
|
|
||||||
super(CyclicDependency, self).__init__(s)
|
|
||||||
self.cyclic = cyclic
|
|
||||||
|
|
||||||
|
|
||||||
def toposort(data):
|
|
||||||
"""
|
|
||||||
Dependencies are expressed as a dictionary whose keys are items
|
|
||||||
and whose values are a set of dependent items. Output is a list of
|
|
||||||
sets in topological order. The first set consists of items with no
|
|
||||||
dependences, each subsequent set consists of items that depend upon
|
|
||||||
items in the preceeding sets.
|
|
||||||
:param data:
|
|
||||||
:type data:
|
|
||||||
:return:
|
|
||||||
:rtype:
|
|
||||||
"""
|
|
||||||
|
|
||||||
# Special case empty input.
|
|
||||||
if len(data) == 0:
|
|
||||||
return
|
|
||||||
|
|
||||||
# Copy the input so as to leave it unmodified.
|
|
||||||
data = data.copy()
|
|
||||||
|
|
||||||
# Ignore self dependencies.
|
|
||||||
for k, v in data.items():
|
|
||||||
v.discard(k)
|
|
||||||
# Find all items that don't depend on anything.
|
|
||||||
extra_items_in_deps = reduce(set.union, data.values()) - set(data.keys())
|
|
||||||
# Add empty dependences where needed.
|
|
||||||
data.update(dict((item, set()) for item in extra_items_in_deps))
|
|
||||||
while True:
|
|
||||||
ordered = set(item for item, dep in data.items() if len(dep) == 0)
|
|
||||||
if not ordered:
|
|
||||||
break
|
|
||||||
yield ordered
|
|
||||||
data = dict((item, (dep - ordered))
|
|
||||||
for item, dep in data.items()
|
|
||||||
if item not in ordered)
|
|
||||||
if len(data) != 0:
|
|
||||||
raise CyclicDependency(data)
|
|
||||||
|
|
||||||
|
|
||||||
def toposort_flatten(data, sort=True):
|
|
||||||
"""
|
|
||||||
Returns a single list of dependencies. For any set returned by
|
|
||||||
toposort(), those items are sorted and appended to the result (just to
|
|
||||||
make the results deterministic).
|
|
||||||
:param data:
|
|
||||||
:type data:
|
|
||||||
:param sort:
|
|
||||||
:type sort:
|
|
||||||
:return: Single list of dependencies.
|
|
||||||
:rtype: list
|
|
||||||
"""
|
|
||||||
|
|
||||||
result = []
|
|
||||||
for d in toposort(data):
|
|
||||||
result.extend((sorted if sort else list)(d))
|
|
||||||
return result
|
|
||||||
@@ -1,156 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
"""
|
|
||||||
Various utilities functions
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
from collections.abc import MutableSet
|
|
||||||
except ImportError:
|
|
||||||
from collections import MutableSet
|
|
||||||
|
|
||||||
from types import GeneratorType
|
|
||||||
|
|
||||||
|
|
||||||
def find_all(string, sub, start=None, end=None, ignore_case=False, **kwargs):
|
|
||||||
"""
|
|
||||||
Return all indices in string s where substring sub is
|
|
||||||
found, such that sub is contained in the slice s[start:end].
|
|
||||||
|
|
||||||
>>> list(find_all('The quick brown fox jumps over the lazy dog', 'fox'))
|
|
||||||
[16]
|
|
||||||
|
|
||||||
>>> list(find_all('The quick brown fox jumps over the lazy dog', 'mountain'))
|
|
||||||
[]
|
|
||||||
|
|
||||||
>>> list(find_all('The quick brown fox jumps over the lazy dog', 'The'))
|
|
||||||
[0]
|
|
||||||
|
|
||||||
>>> list(find_all(
|
|
||||||
... 'Carved symbols in a mountain hollow on the bank of an inlet irritated an eccentric person',
|
|
||||||
... 'an'))
|
|
||||||
[44, 51, 70]
|
|
||||||
|
|
||||||
>>> list(find_all(
|
|
||||||
... 'Carved symbols in a mountain hollow on the bank of an inlet irritated an eccentric person',
|
|
||||||
... 'an',
|
|
||||||
... 50,
|
|
||||||
... 60))
|
|
||||||
[51]
|
|
||||||
|
|
||||||
:param string: the input string
|
|
||||||
:type string: str
|
|
||||||
:param sub: the substring
|
|
||||||
:type sub: str
|
|
||||||
:return: all indices in the input string
|
|
||||||
:rtype: __generator[str]
|
|
||||||
"""
|
|
||||||
#pylint: disable=unused-argument
|
|
||||||
if ignore_case:
|
|
||||||
sub = sub.lower()
|
|
||||||
string = string.lower()
|
|
||||||
while True:
|
|
||||||
start = string.find(sub, start, end)
|
|
||||||
if start == -1:
|
|
||||||
return
|
|
||||||
yield start
|
|
||||||
start += len(sub)
|
|
||||||
|
|
||||||
|
|
||||||
def get_first_defined(data, keys, default_value=None):
|
|
||||||
"""
|
|
||||||
Get the first defined key in data.
|
|
||||||
:param data:
|
|
||||||
:type data:
|
|
||||||
:param keys:
|
|
||||||
:type keys:
|
|
||||||
:param default_value:
|
|
||||||
:type default_value:
|
|
||||||
:return:
|
|
||||||
:rtype:
|
|
||||||
"""
|
|
||||||
for key in keys:
|
|
||||||
if key in data:
|
|
||||||
return data[key]
|
|
||||||
return default_value
|
|
||||||
|
|
||||||
|
|
||||||
def is_iterable(obj):
|
|
||||||
"""
|
|
||||||
Are we being asked to look up a list of things, instead of a single thing?
|
|
||||||
We check for the `__iter__` attribute so that this can cover types that
|
|
||||||
don't have to be known by this module, such as NumPy arrays.
|
|
||||||
|
|
||||||
Strings, however, should be considered as atomic values to look up, not
|
|
||||||
iterables.
|
|
||||||
|
|
||||||
We don't need to check for the Python 2 `unicode` type, because it doesn't
|
|
||||||
have an `__iter__` attribute anyway.
|
|
||||||
"""
|
|
||||||
# pylint: disable=consider-using-ternary
|
|
||||||
return hasattr(obj, '__iter__') and not isinstance(obj, str) or isinstance(obj, GeneratorType)
|
|
||||||
|
|
||||||
|
|
||||||
def extend_safe(target, source):
|
|
||||||
"""
|
|
||||||
Extends source list to target list only if elements doesn't exists in target list.
|
|
||||||
:param target:
|
|
||||||
:type target: list
|
|
||||||
:param source:
|
|
||||||
:type source: list
|
|
||||||
"""
|
|
||||||
for elt in source:
|
|
||||||
if elt not in target:
|
|
||||||
target.append(elt)
|
|
||||||
|
|
||||||
|
|
||||||
class _Ref(object):
|
|
||||||
"""
|
|
||||||
Reference for IdentitySet
|
|
||||||
"""
|
|
||||||
def __init__(self, value):
|
|
||||||
self.value = value
|
|
||||||
|
|
||||||
def __eq__(self, other):
|
|
||||||
return self.value is other.value
|
|
||||||
|
|
||||||
def __hash__(self):
|
|
||||||
return id(self.value)
|
|
||||||
|
|
||||||
|
|
||||||
class IdentitySet(MutableSet): # pragma: no cover
|
|
||||||
"""
|
|
||||||
Set based on identity
|
|
||||||
"""
|
|
||||||
def __init__(self, items=None): # pylint: disable=super-init-not-called
|
|
||||||
if items is None:
|
|
||||||
items = []
|
|
||||||
self.refs = set(map(_Ref, items))
|
|
||||||
|
|
||||||
def __contains__(self, elem):
|
|
||||||
return _Ref(elem) in self.refs
|
|
||||||
|
|
||||||
def __iter__(self):
|
|
||||||
return (ref.value for ref in self.refs)
|
|
||||||
|
|
||||||
def __len__(self):
|
|
||||||
return len(self.refs)
|
|
||||||
|
|
||||||
def add(self, value):
|
|
||||||
self.refs.add(_Ref(value))
|
|
||||||
|
|
||||||
def discard(self, value):
|
|
||||||
self.refs.discard(_Ref(value))
|
|
||||||
|
|
||||||
def update(self, iterable):
|
|
||||||
"""
|
|
||||||
Update set with iterable
|
|
||||||
:param iterable:
|
|
||||||
:type iterable:
|
|
||||||
:return:
|
|
||||||
:rtype:
|
|
||||||
"""
|
|
||||||
for elem in iterable:
|
|
||||||
self.add(elem)
|
|
||||||
|
|
||||||
def __repr__(self): # pragma: no cover
|
|
||||||
return "%s(%s)" % (type(self).__name__, list(self))
|
|
||||||
@@ -1,81 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
"""
|
|
||||||
Validator functions to use in patterns.
|
|
||||||
|
|
||||||
All those function have last argument as match, so it's possible to use functools.partial to bind previous arguments.
|
|
||||||
"""
|
|
||||||
|
|
||||||
|
|
||||||
def chars_before(chars, match):
|
|
||||||
"""
|
|
||||||
Validate the match if left character is in a given sequence.
|
|
||||||
|
|
||||||
:param chars:
|
|
||||||
:type chars:
|
|
||||||
:param match:
|
|
||||||
:type match:
|
|
||||||
:return:
|
|
||||||
:rtype:
|
|
||||||
"""
|
|
||||||
if match.start <= 0:
|
|
||||||
return True
|
|
||||||
return match.input_string[match.start - 1] in chars
|
|
||||||
|
|
||||||
|
|
||||||
def chars_after(chars, match):
|
|
||||||
"""
|
|
||||||
Validate the match if right character is in a given sequence.
|
|
||||||
|
|
||||||
:param chars:
|
|
||||||
:type chars:
|
|
||||||
:param match:
|
|
||||||
:type match:
|
|
||||||
:return:
|
|
||||||
:rtype:
|
|
||||||
"""
|
|
||||||
if match.end >= len(match.input_string):
|
|
||||||
return True
|
|
||||||
return match.input_string[match.end] in chars
|
|
||||||
|
|
||||||
|
|
||||||
def chars_surround(chars, match):
|
|
||||||
"""
|
|
||||||
Validate the match if surrounding characters are in a given sequence.
|
|
||||||
|
|
||||||
:param chars:
|
|
||||||
:type chars:
|
|
||||||
:param match:
|
|
||||||
:type match:
|
|
||||||
:return:
|
|
||||||
:rtype:
|
|
||||||
"""
|
|
||||||
return chars_before(chars, match) and chars_after(chars, match)
|
|
||||||
|
|
||||||
|
|
||||||
def validators(*chained_validators):
|
|
||||||
"""
|
|
||||||
Creates a validator chain from several validator functions.
|
|
||||||
|
|
||||||
:param chained_validators:
|
|
||||||
:type chained_validators:
|
|
||||||
:return:
|
|
||||||
:rtype:
|
|
||||||
"""
|
|
||||||
|
|
||||||
def validator_chain(match): # pylint:disable=missing-docstring
|
|
||||||
for chained_validator in chained_validators:
|
|
||||||
if not chained_validator(match):
|
|
||||||
return False
|
|
||||||
return True
|
|
||||||
|
|
||||||
return validator_chain
|
|
||||||
|
|
||||||
|
|
||||||
def allways_true(match): # pylint:disable=unused-argument
|
|
||||||
"""
|
|
||||||
A validator which is allways true
|
|
||||||
:param match:
|
|
||||||
:return:
|
|
||||||
"""
|
|
||||||
return True
|
|
||||||
Reference in New Issue
Block a user