TMDB: miglioria riconoscimento

2021-04-17 14:15:41 +02:00
parent 7fa8212fed
commit 1e55f25b05
4 changed files with 38 additions and 66 deletions
@@ -478,3 +478,27 @@ def get_md5(cadena):
        devuelve = binascii.hexlify(md5.new(cadena).digest())

    return devuelve
+
+
+def title_unify(title):
+    import unicodedata
+
+    u_title = ''
+    if type(title) == str: title = u'' + title
+    for c in unicodedata.normalize('NFD', title):
+        cat = unicodedata.category(c)
+        if cat != 'Mn':
+            if cat == 'Pd':
+                c_new = '-'
+            elif cat in ['Ll', 'Lu'] or c == ':':
+                c_new = c
+            else:
+                c_new = ' '
+            u_title += c_new
+
+    if (u_title.count(':') + u_title.count('-')) == 1:
+        # subtitle, split but only if there's one, it might be part of title
+        spl = u_title.replace(':', '-').split('-')
+        u_title = spl[0] if len(spl[0]) > 5 else spl[1]
+
+    return u_title.strip()
@@ -411,51 +411,24 @@ def set_infoLabels_item(item, seekTmdb=True, idioma_busqueda=def_lang, lock=None
                    __leer_datos(otmdb)
                    return len(item.infoLabels)

-        # title might contain - or : --> try to search only second title
-        def splitTitle():
-            if '-' in item.fulltitle:
-                item.infoLabels['tvshowtitle'] = item.fulltitle.split('-')[1]
-                item.infoLabels['title'] = item.infoLabels['tvshowtitle']
-            elif ':' in item.fulltitle:
-                item.infoLabels['tvshowtitle'] = item.fulltitle.split(':')[1]
-                item.infoLabels['title'] = item.infoLabels['tvshowtitle']
-            else:
-                return False
-            return True
+        def unify():
+            new_title = scrapertools.title_unify(item.fulltitle)
+            if new_title != item.fulltitle:
+                item.infoLabels['tvshowtitle'] = scrapertools.title_unify(item.infoLabels['tvshowtitle'])
+                item.infoLabels['title'] = scrapertools.title_unify(item.infoLabels['title'])
+                item.fulltitle = new_title
+                return True
        # We check what type of content it is...
        if item.contentType == 'movie':
            tipo_busqueda = 'movie'
        elif item.contentType == 'undefined':  # don't know
-            def detect():
-                # try movie first
-                results = search(otmdb_global, 'movie')
-                if results:
-                    item.contentType = 'movie'
-                infoMovie = item.infoLabels
-                if infoMovie['title'] == item.fulltitle:  # exact match -> it's probably correct
-                    return results
-
-                # try tvshow then
-                item.infoLabels = {'tvshowtitle': item.infoLabels['tvshowtitle']}  # reset infolabels
-                results = search(otmdb_global, 'tv')
-                if results:
-                    item.contentType = 'tvshow'
-                else:
-                    item.infoLabels = infoMovie
-
-                return results
-
-            results = detect()
-            if not results:
-                if splitTitle():
-                    results = detect()
-            return results
+            tipo_busqueda = 'multi'
        else:
            tipo_busqueda = 'tv'

        ret = search(otmdb_global, tipo_busqueda)
-        if not ret:
-            if splitTitle():
+        if not ret:  # try with unified title
+            if unify():
                ret = search(otmdb_global, tipo_busqueda)
        return ret
    # Search in tmdb is deactivated or has not given result
@@ -1028,7 +1001,7 @@ class Tmdb(object):
            # We sort result based on fuzzy match to detect most similar
            if len(results) > 1:
                from lib.fuzzy_match import algorithims
-                results.sort(key=lambda r: algorithims.trigram(text_simple, r['title'] if self.busqueda_tipo == 'movie' else r['name']), reverse=True)
+                results.sort(key=lambda r: algorithims.trigram(text_simple, r.get('name', '') if self.busqueda_tipo == 'tv' else r.get('title', '')), reverse=True)

            # We return the number of results of this page
            self.results = results
@@ -1043,7 +1016,6 @@ class Tmdb(object):
            logger.error(msg)
            return 0

-
    def __discover(self, index_results=0):
        self.result = ResultDictDefault()
        results = []