TMDB: miglioria riconoscimento

This commit is contained in:
mac12m99
2021-04-17 14:15:41 +02:00
parent 7fa8212fed
commit 1e55f25b05
4 changed files with 38 additions and 66 deletions
+24
View File
@@ -478,3 +478,27 @@ def get_md5(cadena):
devuelve = binascii.hexlify(md5.new(cadena).digest())
return devuelve
def title_unify(title):
import unicodedata
u_title = ''
if type(title) == str: title = u'' + title
for c in unicodedata.normalize('NFD', title):
cat = unicodedata.category(c)
if cat != 'Mn':
if cat == 'Pd':
c_new = '-'
elif cat in ['Ll', 'Lu'] or c == ':':
c_new = c
else:
c_new = ' '
u_title += c_new
if (u_title.count(':') + u_title.count('-')) == 1:
# subtitle, split but only if there's one, it might be part of title
spl = u_title.replace(':', '-').split('-')
u_title = spl[0] if len(spl[0]) > 5 else spl[1]
return u_title.strip()
+11 -39
View File
@@ -411,51 +411,24 @@ def set_infoLabels_item(item, seekTmdb=True, idioma_busqueda=def_lang, lock=None
__leer_datos(otmdb)
return len(item.infoLabels)
# title might contain - or : --> try to search only second title
def splitTitle():
if '-' in item.fulltitle:
item.infoLabels['tvshowtitle'] = item.fulltitle.split('-')[1]
item.infoLabels['title'] = item.infoLabels['tvshowtitle']
elif ':' in item.fulltitle:
item.infoLabels['tvshowtitle'] = item.fulltitle.split(':')[1]
item.infoLabels['title'] = item.infoLabels['tvshowtitle']
else:
return False
return True
def unify():
new_title = scrapertools.title_unify(item.fulltitle)
if new_title != item.fulltitle:
item.infoLabels['tvshowtitle'] = scrapertools.title_unify(item.infoLabels['tvshowtitle'])
item.infoLabels['title'] = scrapertools.title_unify(item.infoLabels['title'])
item.fulltitle = new_title
return True
# We check what type of content it is...
if item.contentType == 'movie':
tipo_busqueda = 'movie'
elif item.contentType == 'undefined': # don't know
def detect():
# try movie first
results = search(otmdb_global, 'movie')
if results:
item.contentType = 'movie'
infoMovie = item.infoLabels
if infoMovie['title'] == item.fulltitle: # exact match -> it's probably correct
return results
# try tvshow then
item.infoLabels = {'tvshowtitle': item.infoLabels['tvshowtitle']} # reset infolabels
results = search(otmdb_global, 'tv')
if results:
item.contentType = 'tvshow'
else:
item.infoLabels = infoMovie
return results
results = detect()
if not results:
if splitTitle():
results = detect()
return results
tipo_busqueda = 'multi'
else:
tipo_busqueda = 'tv'
ret = search(otmdb_global, tipo_busqueda)
if not ret:
if splitTitle():
if not ret: # try with unified title
if unify():
ret = search(otmdb_global, tipo_busqueda)
return ret
# Search in tmdb is deactivated or has not given result
@@ -1028,7 +1001,7 @@ class Tmdb(object):
# We sort result based on fuzzy match to detect most similar
if len(results) > 1:
from lib.fuzzy_match import algorithims
results.sort(key=lambda r: algorithims.trigram(text_simple, r['title'] if self.busqueda_tipo == 'movie' else r['name']), reverse=True)
results.sort(key=lambda r: algorithims.trigram(text_simple, r.get('name', '') if self.busqueda_tipo == 'tv' else r.get('title', '')), reverse=True)
# We return the number of results of this page
self.results = results
@@ -1043,7 +1016,6 @@ class Tmdb(object):
logger.error(msg)
return 0
def __discover(self, index_results=0):
self.result = ResultDictDefault()
results = []