From d4f83a97bb4eef0cb9893fad543f41b30dd52662 Mon Sep 17 00:00:00 2001
From: marco <m.toma99@gmail.com>
Date: Mon, 20 Jan 2020 19:30:55 +0100
Subject: [PATCH] fix cb01 -> sezione film (causa oscar..)

---
 channels/cineblog01.py |  2 +-
 core/scrapertools.py   | 27 +++------------------------
 2 files changed, 4 insertions(+), 25 deletions(-)

diff --git a/channels/cineblog01.py b/channels/cineblog01.py
index 64a82ac5..82889be8 100644
--- a/channels/cineblog01.py
+++ b/channels/cineblog01.py
@@ -131,7 +131,7 @@ def peliculas(item):
     # esclusione degli articoli 'di servizio'
     blacklist = ['BENVENUTI', 'Richieste Serie TV', 'CB01.UNO &#x25b6; TROVA L&#8217;INDIRIZZO UFFICIALE ',
                  'Aggiornamento Quotidiano Serie TV', 'OSCAR 2019 ▶ CB01.UNO: Vota il tuo film preferito! 🎬',
-                 'Openload: la situazione. Benvenuto Verystream', 'Openload: lo volete ancora?']
+                 'Openload: la situazione. Benvenuto Verystream', 'Openload: lo volete ancora?', 'OSCAR 2020 &#x25b6; VOTA IL TUO FILM PREFERITO! &#x1f3ac;']
     # debug = True
     if 'newest' in item.args:
         if '/serietv/' not in item.url:
diff --git a/core/scrapertools.py b/core/scrapertools.py
index 5bb50a7b..5b6d4bac 100644
--- a/core/scrapertools.py
+++ b/core/scrapertools.py
@@ -39,30 +39,9 @@ def find_multiple_matches_groups(text, pattern):
 
 # Convierte los codigos html "&ntilde;" y lo reemplaza por "ñ" caracter unicode utf-8
 def decodeHtmlentities(data):
-    entity_re = re.compile("&(#?)(\d{1,5}|\w{1,8})(;?)")
-
-    def substitute_entity(match):
-        ent = match.group(2) + match.group(3)
-        res = ""
-        while not ent in html5 and not ent.endswith(";") and match.group(1) != "#":
-            # Excepción para cuando '&' se usa como argumento en la urls contenidas en los datos
-            try:
-                res = ent[-1] + res
-                ent = ent[:-1]
-            except:
-                break
-
-        if match.group(1) == "#":
-            ent = unichr(int(ent.replace(";", "")))
-            return ent.encode('utf-8')
-        else:
-            cp = html5.get(ent)
-            if cp:
-                return cp.decode("unicode-escape").encode('utf-8') + res
-            else:
-                return match.group()
-
-    return entity_re.subn(substitute_entity, data)[0]
+    import HTMLParser
+    parser = HTMLParser.HTMLParser()
+    return parser.unescape(data)
 
 
 def unescape(text):