ilcorsaronero titoli con caratteri speciali e aggiornamento libreria babelfish

This commit is contained in:
marco
2021-11-06 12:17:51 +01:00
parent d21ba2dcb7
commit aafb008d5a
7 changed files with 41 additions and 46 deletions

View File

@@ -53,10 +53,11 @@ def peliculas(item):
patron = r'>(?P<quality>[^"<]+)'
patron += '<TD[^>]+><A class="tab" HREF="(?P<url>[^"]+)"\s*>[^<]+<[^>]+>[^>]+>[^>]+>[^>]+>[^>]+>(?P<size>[^<]+)<[^>]+>[^>]+>[^>]+><form action="[^"]+/\d+/(?P<title>[^"]+)[^>]+>[^>]+>[^>]+>[^>]+>[^>]+>[^>]+>[^>]+>[^>]+>[^>]+>[^>]+>[^>]+>[^>]+>[^>]+>(?P<seed>[^<]+)'
if not sceneTitle:
def itemHook(item):
def itemHook(item):
if not sceneTitle:
item.title = item.title.replace('_', ' ')
return item
item.title = support.urlparse.unquote(item.title)
return item
if 'search' not in item.args:
item.url += str(item.args[0])

View File

@@ -4,12 +4,6 @@
# Use of this source code is governed by the 3-clause BSD license
# that can be found in the LICENSE file.
#
__title__ = 'babelfish'
__version__ = '0.5.5-dev'
__author__ = 'Antoine Bertin'
__license__ = 'BSD'
__copyright__ = 'Copyright 2015 the BabelFish authors'
import sys
if sys.version_info[0] >= 3:

View File

@@ -2,20 +2,22 @@
# Use of this source code is governed by the 3-clause BSD license
# that can be found in the LICENSE file.
#
import collections
import functools
from importlib import import_module
# from pkg_resources import iter_entry_points, EntryPoint
from pkg_resources import iter_entry_points, EntryPoint
from ..exceptions import LanguageConvertError, LanguageReverseError
try:
# Python 3.3+
from collections.abc import Mapping, MutableMapping
except ImportError:
from collections import Mapping, MutableMapping
# from https://github.com/kennethreitz/requests/blob/master/requests/structures.py
class CaseInsensitiveDict(collections.MutableMapping):
class CaseInsensitiveDict(MutableMapping):
"""A case-insensitive ``dict``-like object.
Implements all methods and operations of
``collections.MutableMapping`` as well as dict's ``copy``. Also
``collections.abc.MutableMapping`` as well as dict's ``copy``. Also
provides ``lower_items``.
All keys are expected to be strings. The structure remembers the
@@ -66,7 +68,7 @@ class CaseInsensitiveDict(collections.MutableMapping):
)
def __eq__(self, other):
if isinstance(other, collections.Mapping):
if isinstance(other, Mapping):
other = CaseInsensitiveDict(other)
else:
return NotImplemented
@@ -238,19 +240,21 @@ class ConverterManager(object):
"""Get a converter, lazy loading it if necessary"""
if name in self.converters:
return self.converters[name]
# for ep in iter_entry_points(self.entry_point):
# if ep.name == name:
# self.converters[ep.name] = ep.load()()
# return self.converters[ep.name]
def parse(str):
import re
match = re.match('(?P<name>\w+) = (?P<module>[a-z0-9.]+):(?P<class>\w+)', str)
return match.groupdict()
for ep in (parse(c) for c in self.registered_converters + self.internal_converters):
if ep.get('name') == name:
cl = getattr(import_module(ep.get('module')), ep.get('class'))
self.converters[ep.get('name')] = cl()
return self.converters[ep.get('name')]
for ep in iter_entry_points(self.entry_point):
if ep.name == name:
self.converters[ep.name] = ep.load()()
return self.converters[ep.name]
for ep in (EntryPoint.parse(c) for c in self.registered_converters + self.internal_converters):
if ep.name == name:
# `require` argument of ep.load() is deprecated in newer versions of setuptools
if hasattr(ep, 'resolve'):
plugin = ep.resolve()
elif hasattr(ep, '_load'):
plugin = ep._load()
else:
plugin = ep.load(require=False)
self.converters[ep.name] = plugin()
return self.converters[ep.name]
raise KeyError(name)
def __setitem__(self, name, converter):

View File

@@ -14,10 +14,10 @@ class OpenSubtitlesConverter(LanguageReverseConverter):
def __init__(self):
self.alpha3b_converter = language_converters['alpha3b']
self.alpha2_converter = language_converters['alpha2']
self.to_opensubtitles = {('por', 'BR'): 'pob', ('gre', None): 'ell', ('srp', None): 'scc', ('srp', 'ME'): 'mne'}
self.to_opensubtitles = {('por', 'BR'): 'pob', ('gre', None): 'ell', ('srp', None): 'scc', ('srp', 'ME'): 'mne', ('chi', 'TW'): 'zht'}
self.from_opensubtitles = CaseInsensitiveDict({'pob': ('por', 'BR'), 'pb': ('por', 'BR'), 'ell': ('ell', None),
'scc': ('srp', None), 'mne': ('srp', 'ME')})
self.codes = (self.alpha2_converter.codes | self.alpha3b_converter.codes | set(['pob', 'pb', 'scc', 'mne']))
'scc': ('srp', None), 'mne': ('srp', 'ME'), 'zht': ('zho', 'TW')})
self.codes = (self.alpha2_converter.codes | self.alpha3b_converter.codes | set(self.from_opensubtitles.keys()))
def convert(self, alpha3, country=None, script=None):
alpha3b = self.alpha3b_converter.convert(alpha3, country, script)

View File

@@ -7,8 +7,7 @@
from __future__ import unicode_literals
from collections import namedtuple
from functools import partial
# from pkg_resources import resource_stream # @UnresolvedImport
import os, io
from pkg_resources import resource_stream # @UnresolvedImport
from .converters import ConverterManager
from . import basestr
@@ -19,10 +18,10 @@ COUNTRY_MATRIX = []
#: The namedtuple used in the :data:`COUNTRY_MATRIX`
IsoCountry = namedtuple('IsoCountry', ['name', 'alpha2'])
f = io.open(os.path.join(os.path.dirname(__file__), 'data/iso-3166-1.txt'), encoding='utf-8')
f = resource_stream('babelfish', 'data/iso-3166-1.txt')
f.readline()
for l in f:
iso_country = IsoCountry(*l.strip().split(';'))
iso_country = IsoCountry(*l.decode('utf-8').strip().split(';'))
COUNTRIES[iso_country.alpha2] = iso_country.name
COUNTRY_MATRIX.append(iso_country)
f.close()

View File

@@ -7,8 +7,7 @@
from __future__ import unicode_literals
from collections import namedtuple
from functools import partial
import os, io
# from pkg_resources import resource_stream # @UnresolvedImport
from pkg_resources import resource_stream # @UnresolvedImport
from .converters import ConverterManager
from .country import Country
from .exceptions import LanguageConvertError
@@ -22,10 +21,10 @@ LANGUAGE_MATRIX = []
#: The namedtuple used in the :data:`LANGUAGE_MATRIX`
IsoLanguage = namedtuple('IsoLanguage', ['alpha3', 'alpha3b', 'alpha3t', 'alpha2', 'scope', 'type', 'name', 'comment'])
f = io.open(os.path.join(os.path.dirname(__file__), 'data/iso-639-3.tab'), encoding='utf-8')
f = resource_stream('babelfish', 'data/iso-639-3.tab')
f.readline()
for l in f:
iso_language = IsoLanguage(*l.split('\t'))
iso_language = IsoLanguage(*l.decode('utf-8').split('\t'))
LANGUAGES.add(iso_language.alpha3)
LANGUAGE_MATRIX.append(iso_language)
f.close()

View File

@@ -5,10 +5,8 @@
# that can be found in the LICENSE file.
#
from __future__ import unicode_literals
import os, io
from collections import namedtuple
# from pkg_resources import resource_stream # @UnresolvedImport
from pkg_resources import resource_stream # @UnresolvedImport
from . import basestr
#: Script code to script name mapping
@@ -20,10 +18,10 @@ SCRIPT_MATRIX = []
#: The namedtuple used in the :data:`SCRIPT_MATRIX`
IsoScript = namedtuple('IsoScript', ['code', 'number', 'name', 'french_name', 'pva', 'date'])
f = io.open(os.path.join(os.path.dirname(__file__), 'data/iso15924-utf8-20131012.txt'), encoding='utf-8')
f = resource_stream('babelfish', 'data/iso15924-utf8-20131012.txt')
f.readline()
for l in f:
l = l.strip()
l = l.decode('utf-8').strip()
if not l or l.startswith('#'):
continue
script = IsoScript._make(l.split(';'))