fix cineblog01 and some changes to support

2019-03-30 15:33:25 +01:00
parent 30c4bab694
commit f04f584b39
2 changed files with 77 additions and 282 deletions
@@ -6,7 +6,7 @@
 import re
 import urlparse

-from channels import autoplay, filtertools
+from channels import autoplay, filtertools, support
 from core import scrapertoolsV2, httptools, servertools, tmdb
 from core.item import Item
 from lib import unshortenit
@@ -24,13 +24,13 @@ headers = [['Referer', host]]
 IDIOMAS = {'Italiano': 'IT'}
 list_language = IDIOMAS.values()
 list_servers = ['openload', 'streamango', 'wstream']
-list_quality = ['HD', 'SD']
+list_quality = ['HD', 'default']

 __comprueba_enlaces__ = config.get_setting('comprueba_enlaces', 'cineblog01')
 __comprueba_enlaces_num__ = config.get_setting('comprueba_enlaces_num', 'cineblog01')

 #esclusione degli articoli 'di servizio'
-blacklist = ['BENVENUTI ', 'Richieste Serie TV', 'CB01.UNO &#x25b6; TROVA L&#8217;INDIRIZZO UFFICIALE ', 'Aggiornamento Quotidiano Serie TV', 'OSCAR 2019 ▶ CB01.UNO: Vota il tuo film preferito! 🎬']
+blacklist = ['BENVENUTI', 'Richieste Serie TV', 'CB01.UNO &#x25b6; TROVA L&#8217;INDIRIZZO UFFICIALE ', 'Aggiornamento Quotidiano Serie TV', 'OSCAR 2019 ▶ CB01.UNO: Vota il tuo film preferito! 🎬']


 def mainlist(item):
@@ -39,79 +39,34 @@ def mainlist(item):
    autoplay.init(item.channel, list_servers, list_quality)

    # Main options
-    itemlist = [Item(channel=item.channel,
-                     action="video",
-                     title="[B]Film[/B]",
-                     url=host,
-                     contentType="movie"),
-                Item(channel=item.channel,
-                     action="menu",
-                     title="[B] > Film HD[/B]",
-                     extra='Film HD Streaming',
-                     url=host,
-                     contentType="movie"),
-                Item(channel=item.channel,
-                     action="menu",
-                     title="[B] > Film per Genere[/B]",
-                     extra='Film per Genere',
-                     url=host,
-                     contentType="movie"),
-                Item(channel=item.channel,
-                     action="menu",
-                     title="[B] > Film per Anno[/B]",
-                     extra='Film per Anno',
-                     url=host,
-                     contentType="movie"),
-                Item(channel=item.channel,
-                     action="search",
-                     title="[COLOR blue] > Cerca Film[/COLOR]",
-                     contentType="movie",
-                     url=host,),
-               
-                Item(channel=item.channel,
-                     action="video",
-                     title="[B]Serie TV[/B]",
-                     url=host + '/serietv/',
-                     contentType="episode"),
-                Item(channel=item.channel,
-                     action="menu",
-                     title="[B] > Serie-Tv per Lettera[/B]",
-                     extra='Serie-Tv per Lettera',
-                     url=host + '/serietv/',
-                     contentType="episode"),
-                Item(channel=item.channel,
-                     action="menu",
-                     title="[B] > Serie-Tv per Genere[/B]",
-                     extra='Serie-Tv per Genere',
-                     url=host + '/serietv/',
-                     contentType="episode"),
-                Item(channel=item.channel,
-                     action="menu",
-                     title="[B] > Serie-Tv per Anno[/B]",
-                     extra='Serie-Tv per Anno',
-                     url=host + '/serietv/',
-                     contentType="episode"),
-                Item(channel=item.channel,
-                     action="search",
-                     title="[COLOR blue] > Cerca Serie TV[/COLOR]",
-                     contentType="episode",
-                     url=host + '/serietv/'),                
-                ]
+    itemlist = []
+    support.menu(itemlist, '[B]Film[/B]', 'peliculas', host)
+    support.menu(itemlist, '[B] > HD [/B]', 'menu', host, args="Film HD Streaming")
+    support.menu(itemlist, '[B] > Per genere [/B]', 'menu', host, args="Film per Genere")
+    support.menu(itemlist, '[B] > Per anno [/B]', 'menu', host, args="Film per Anno")
+    support.menu(itemlist, '[COLOR blue] > Cerca [/COLOR]', 'search', host)
+
+    support.menu(itemlist, '[B]Serie TV[/B]', 'peliculas', host + '/serietv/', contentType='episode')
+    support.menu(itemlist, '[B] > Per lettera[/B]', 'menu', host + '/serietv/', contentType='episode', args="Serie-Tv per Lettera")
+    support.menu(itemlist, '[B] > Per genere[/B]', 'menu', host + '/serietv/', contentType='episode', args="Serie-Tv per Genere")
+    support.menu(itemlist, '[B] > Per Anno[/B]', 'menu', host + '/serietv/', contentType='episode', args="Serie-Tv per Anno")
+    support.menu(itemlist, '[COLOR blue] > Cerca [/COLOR]', 'search', host + '/serietv/', contentType='episode')
    
    autoplay.show_option(item.channel, itemlist)

    # auto thumb
-    itemlist=thumb(itemlist) 
+    itemlist = thumb(itemlist)

    return itemlist

+
 def menu(item):
    itemlist= []
    data = httptools.downloadpage(item.url, headers=headers).data
-    data = re.sub('\n|\t','',data)
-    block =  scrapertoolsV2.get_match(data, item.extra + r'<span.*?><\/span>.*?<ul.*?>(.*?)<\/ul>')
+    data = re.sub('\n|\t', '', data)
+    block = scrapertoolsV2.get_match(data, item.args + r'<span.*?><\/span>.*?<ul.*?>(.*?)<\/ul>')
    logger.info('MENU BLOCK= '+block)
-    patron = r'href=([^>]+)>(.*?)<\/a>'
+    patron = r'href="?([^">]+)"?>(.*?)<\/a>'
    matches = re.compile(patron, re.DOTALL).findall(block)
    for scrapedurl, scrapedtitle in matches:
        itemlist.append(
@@ -119,18 +74,19 @@ def menu(item):
                channel=item.channel,
                title=scrapedtitle,
                contentType=item.contentType,
-                action='video',
+                action='peliculas',
                url=host + scrapedurl
            )
        )
    return itemlist

+
 def search(item, text):
    logger.info("[cineblog01.py] " + item.url + " search " + text)

    try:
        item.url = item.url + "/?s=" + text
-        return video(item)
+        return peliculas(item)

    # Continua la ricerca in caso di errore 
    except:
@@ -139,179 +95,47 @@ def search(item, text):
            logger.error("%s" % line)
        return []

+
 def newest(categoria):
    logger.info("[cineblog01.py] newest")
    itemlist = []
    item = Item()
-    if categoria == "peliculas":
-        item.url = host + '/lista-film-ultimi-100-film-aggiunti/'
-        item.extra = "movie"
-        try:
-            # Carica la pagina 
-            data = httptools.downloadpage(item.url).data
-            logger.info("[cineblog01.py] DATA: "+data)
-            blocco = scrapertoolsV2.get_match(data, r'Ultimi 100 film aggiunti:.*?<\/td>')
-            patron = r'<a href=([^>]+)>([^<]+)<\/a>'
-            matches = re.compile(patron, re.DOTALL).findall(blocco)
-
-            for scrapedurl, scrapedtitle in matches:
-                itemlist.append(
-                    Item(channel=item.channel,
-                         action="findvideos",
-                         contentType="movie",
-                         fulltitle=scrapedtitle,
-                         show=scrapedtitle,
-                         title=scrapedtitle,
-                         text_color="azure",
-                         url=scrapedurl,
-                         extra=item.extra,
-                         viewmode="movie_with_plot"))
-        except:
-            import sys
-            for line in sys.exc_info():
-                logger.error("{0}".format(line))
-            return []
-    return itemlist
+    item.url = host + '/lista-film-ultimi-100-film-aggiunti/'
+    return support.scrape(item, r'<a href=([^>]+)>([^<([]+)(?:\[([A-Z]+)\])?\s\(([0-9]{4})\)<\/a>',
+                   ['url', 'title', 'quality', 'year'],
+                   patron_block=r'Ultimi 100 film aggiunti:.*?<\/td>')


-def video(item):
-    logger.info("[cineblog01.py] video")
-    itemlist = []
-
-    data = httptools.downloadpage(item.url, headers=headers).data
-    data = re.sub('\n|\t','',data)
-    # block = scrapertoolsV2.get_match(data, r'<div class="sequex-page-left">(.*?)<aside class="sequex-page-right">')
-    block = scrapertoolsV2.get_match(data, r'<div class=sequex-page-left>(.*?)<aside class=sequex-page-right>')
-    logger.info('DATA= '+data)
+def peliculas(item):
+    logger.info("[cineblog01.py] peliculas")
    if item.contentType == 'movie' or '/serietv/' not in item.url:
-        action = 'findvideos'     
-        logger.info("### FILM ###")
-        # patron = r'type-post.*?>.*?<img src="([^"]+)".*?<h3.*?<a href="([^"]+)">([^<]+)<\/a>.*?<strong>([^<]+)<.*?br \/>\s+(.*?)   '
-        patron = r'<div class=card-image>.*?<img src=(.*?)alt.*?<h3.*?<a href=(.*?)\/>([^<]+)<\/a><\/h3>(.*?)<br \/>(.*?)<\/a>'
-        matches = re.compile(patron, re.DOTALL).findall(block)
-
-        logger.info("### MATCHES ###" + str(matches))
-        for scrapedthumb, scrapedurl, scrapedtitle, scrapedinfo, scrapedplot in matches:
-            title = re.sub(r'(?:\[HD/?3?D?\]|\[Sub-ITA\])', '', scrapedtitle)
-            year = scrapertoolsV2.find_single_match(scrapedtitle, r'\((\d{4})\)')
-            quality = scrapertoolsV2.find_single_match(scrapedtitle, r'\[(.*?)\]')
-            genre = scrapertoolsV2.remove_htmltags(scrapertoolsV2.find_single_match(scrapedinfo, '([A-Z]+) &'))
-            duration = scrapertoolsV2.find_single_match(scrapedinfo,'DURATA ([0-9]+)&')
-
-            infolabels = {}
-            if year:
-                title = title.replace("(%s)" % year, '').strip()
-                infolabels['Year'] = year
-            if duration:
-                infolabels['Duration'] = int(duration)*60
-            if genre:
-                infolabels['Genre'] = genre
-            if quality:
-                longtitle = '[B]' + title + '[/B] [COLOR blue][' + quality + '][/COLOR]'
-            else:
-                longtitle = '[B]' + title + '[/B]'
-
-            infolabels['Plot'] = scrapertoolsV2.decodeHtmlentities(scrapedplot) + '...'
-            
-            if not scrapedtitle in blacklist:
-                itemlist.append(
-                    Item(channel=item.channel,
-                        action="findvideos",
-                        contentType=item.contentType,
-                        title=longtitle,
-                        fulltitle=title,
-                        show=title,
-                        url=scrapedurl,
-                        infoLabels=infolabels,
-                        thumbnail=scrapedthumb
-                        )
-                )
+        patron = r'<div class=card-image>.*?<img src=([^ ]+) alt.*?<a href=([^ >]+)\/>([^<[(]+)(?:\[([A-Za-z0-9/-]+)])? (?:\(([0-9]{4})\))?.*?<strong>([^<>]+)DURATA ([0-9]+).*?<br>([^<>]+)'
+        listGroups = ['thumb', 'url', 'title', 'quality', 'year', 'genre', 'duration', 'plot']
+        action = 'findvideos'
    else:
+        patron = r'div class="card-image">.*?<img src="([^ ]+)" alt.*?<a href="([^ >]+)">([^<[(]+)</a>.*?<strong><span style="[^"]+">([^<>0-9(]+)\(([0-9]{4}).*?</p>([^<>]+)'
+        listGroups = ['thumb', 'url', 'title', 'genre', 'year', 'plot']
        action = 'episodios'
-        patron = 'type-post.*?>(.*?)<div class=card-action>'
-        matches = re.compile(patron, re.DOTALL).findall(block)

-        for match in matches:
-            # patron = r'<img src="([^"]+)".*?<h3.*?<a href="([^"]+)">([^<]+)<\/a>.*?<p>(.*?)\(([0-9]+).*?\).*?<\/p>([^<>]*)(?:<\/p>)?'
-            patron = r'<img src=(.*?)alt.*?.*?<h3.*?<a href=(.*?)\/>([^<]+)<\/a>.*?<span.*?>(.*?)\(([0-9]+).*?\).*?<\/p>([^<>]*)(?:<\/p>)?'
-            matches = re.compile(patron, re.DOTALL).findall(match)
-            for scrapedthumb, scrapedurl, scrapedtitle, scrapedgenre, scrapedyear, scrapedplot in matches:
-                longtitle = '[B]' + scrapedtitle + '[/B]'
-                title = scrapedtitle
-                infolabels = {}
-                infolabels['Year'] = scrapedyear
-                infolabels['Genre'] = scrapertoolsV2.remove_htmltags(scrapedgenre)
-                infolabels['Plot'] = scrapertoolsV2.decodeHtmlentities(scrapedplot)
-                if not scrapedtitle in blacklist:
-                    itemlist.append(
-                        Item(channel=item.channel,
-                            action=action,
-                            contentType=item.contentType,
-                            title=longtitle,
-                            fulltitle=title,
-                            show=title,
-                            url=scrapedurl,
-                            infoLabels=infolabels,
-                            thumbnail=scrapedthumb
-                            )
-                    )
-
-    tmdb.set_infoLabels_itemlist(itemlist, seekTmdb=True)
-
-    patron =  '<a class=page-link href=([^>]+)><i class="fa fa-angle-right">'
-    next_page = scrapertoolsV2.find_single_match(data, patron)
-    logger.info('NEXT '+next_page) 
-
-    if next_page != "":
-        itemlist.append(
-            Item(channel=item.channel,
-                action="video",
-                contentType=item.contentType,
-                title="[COLOR blue]" + config.get_localized_string(30992) + "[/COLOR]",
-                url=next_page,
-                thumbnail=thumb()))
-      
-    return itemlist
+    return support.scrape(item, patron_block=[r'<div class="?sequex-page-left"?>(.*?)<aside class="?sequex-page-right"?>',
+                                              '<div class="?card-image"?>.*?(?=<div class="?card-image"?>|<div class="?rating"?>)'],
+                          patron=patron, listGroups=listGroups,
+                          patronNext='<a class="?page-link"? href="?([^>]+)"?><i class="fa fa-angle-right">', blacklist=blacklist, action=action)


 def episodios(item):
    logger.info("[cineblog01.py] episodios")
-    itemlist = []
-    data = httptools.downloadpage(item.url, headers=headers).data
-    data = re.sub('\n|\t','',data)
-    block = scrapertoolsV2.get_match(data, r'<article class="sequex-post-content">(.*?)<\/article>').replace('&#215;','x').replace(' &#8211; ','')
-    logger.info(block)
-    blockSeason = scrapertoolsV2.find_multiple_matches(block, r'<div class="sp-head[a-z ]*?" title="Espandi">([^<>]*?)</div>(.*?)<div class="spdiv">\[riduci\]</div>')
-    for season, block in blockSeason:
-        patron = r'(?:<p>)?([0-9]+x[0-9]+)(.*?)(?:</p>|<br)'
-        matches = re.compile(patron, re.DOTALL).findall(block)
-        for scrapedtitle, scrapedurl in matches:
-            title = '[B]' + scrapedtitle + '[/B] - ' + item.title + (' (SUB ITA)' if 'SUB ITA' in season else ' (ITA)')
-            itemlist.append(
-                    Item(channel=item.channel,
-                        action="findvideos",
-                        contentType=item.contentType,
-                        title=title,
-                        fulltitle=item.fulltitle,
-                        show=item.fulltitle,
-                        url=scrapedurl,
-                        )
-                    )
+    return support.scrape(item, patron_block=[r'<article class="sequex-post-content">(.*?)<\/article>',
+                                              r'<div class="sp-head[a-z ]*?" title="Espandi">[^<>]*?</div>(.*?)<div class="spdiv">\[riduci\]</div>'],
+                          patron='(?:<p>)?([0-9]+&#215;[0-9]+)(.*?)(?:</p>|<br)', listGroups=['title', 'url'])

-    tmdb.set_infoLabels_itemlist(itemlist, seekTmdb=True)
-
-    if config.get_videolibrary_support() and len(itemlist) > 0 and item.extra != 'findvideos':
-        itemlist.append(
-            Item(channel=item.channel, title='[COLOR yellow][B]'+config.get_localized_string(30161)+'[/B][/COLOR]', url=item.url,
-                 action="add_serie_to_library", extra="episodios", show=item.show))
-
-    return itemlist

 def findvideos(item):
    if item.contentType == "episode":
        return findvid_serie(item)
+
    def load_links(itemlist, re_txt, color, desc_txt, quality=""):
-        streaming = scrapertoolsV2.find_single_match(data, re_txt).replace('"','')
+        streaming = scrapertoolsV2.find_single_match(data, re_txt).replace('"', '')
        logger.info('STREAMING='+streaming)
        patron = '<td><a[^h]href=(.*?) target[^>]+>([^<]+)<'
        matches = re.compile(patron, re.DOTALL).findall(streaming)
@@ -335,7 +159,7 @@ def findvideos(item):

    itemlist = []

-    # Carica la pagina 
+    # Carica la pagina
    data = httptools.downloadpage(item.url).data
    logger.info("DATA= "+data)

@@ -380,32 +204,26 @@ def findvideos(item):

    autoplay.start(itemlist, item)

-    if item.contentType != 'episode':
-        if config.get_videolibrary_support() and len(itemlist) > 0 and item.extra != 'findvideos':
-            itemlist.append(
-                Item(channel=item.channel, title='[COLOR yellow][B]'+config.get_localized_string(30161)+'[/B][/COLOR]', url=item.url,
-                     action="add_pelicula_to_library", extra="findvideos", contentTitle=item.fulltitle))
+    support.videolibrary(itemlist, item)

    return itemlist

+
 def findvid_serie(item):
    def load_vid_series(html, item, itemlist, blktxt):
-        if len(blktxt) > 2:
-            vtype = blktxt.strip()[:-1] + " - "
-        else:
-            vtype = ''
        patron = '<a href="([^"]+)"[^=]+="_blank"[^>]+>(.*?)</a>'
        # Estrae i contenuti 
        matches = re.compile(patron, re.DOTALL).finditer(html)
        for match in matches:
            scrapedurl = match.group(1)
            scrapedtitle = match.group(2)
-            title = item.title + " [COLOR blue][" + vtype + scrapedtitle + "][/COLOR]"
+            title = item.title + " [COLOR blue][" + scrapedtitle + "][/COLOR]"
            itemlist.append(
                Item(channel=item.channel,
                     action="play",
                     title=title,
                     url=scrapedurl,
+                     server=scrapedtitle,
                     fulltitle=item.fulltitle,
                     show=item.show,
                     contentType=item.contentType,
@@ -447,6 +265,8 @@ def findvid_serie(item):
        else:
            load_vid_series(data[lnkblkp[i]:lnkblkp[i + 1]], item, itemlist, lnkblk[i])

+    autoplay.start(itemlist, item)
+
    return itemlist

 def play(item):
@@ -477,42 +297,7 @@ def play(item):
            data = scrapertoolsV2.get_match(data, r'<a href="([^"]+)".*?class="btn-wrapper">.*?licca.*?</a>')
        
        logger.debug("##### play go.php data ##\n%s\n##" % data)
-    elif "/link/" in item.url:
-        data = httptools.downloadpage(item.url).data
-        if "link =" in data:
-            data = scrapertoolsV2.get_match(data, 'link = "([^"]+)"')
-        else:
-            from lib import jsunpack
-
-            try:
-                data = scrapertoolsV2.get_match(data, r"(eval\(function\(p,a,c,k,e,d.*?)</script>")
-                data = jsunpack.unpack(data)
-                logger.debug("##### play /link/ unpack ##\n%s\n##" % data)
-            except IndexError:
-                logger.debug("##### The content is yet unpacked ##\n%s\n##" % data)
-
-            data = scrapertoolsV2.find_single_match(data, r'var link(?:\s)?=(?:\s)?"([^"]+)";')
-            data, c = unshortenit.unwrap_30x_only(data)
-        if data.startswith('/'):
-            data = urlparse.urljoin("http://swzz.xyz", data)
-            data = httptools.downloadpage(data).data
-        logger.debug("##### play /link/ data ##\n%s\n##" % data)
    else:
-        data = item.url
-        logger.debug("##### play else data ##\n%s\n##" % data)
-    logger.debug("##############################################################")
+        data = support.swzz_get_url(item)

-    try:
-        itemlist = servertools.find_video_items(data=data)
-
-        for videoitem in itemlist:
-            videoitem.title = item.show
-            videoitem.fulltitle = item.fulltitle
-            videoitem.show = item.show
-            videoitem.thumbnail = item.thumbnail
-            videoitem.contentType = item.contentType
-            videoitem.channel = item.channel
-    except AttributeError:
-        logger.error("vcrypt data doesn't contain expected URL")
-
-    return itemlist
+    return support.server(item, data, headers)
@@ -128,7 +128,7 @@ def scrape(item, patron = '', listGroups = [], headers="", blacklist="", data=""
                block = ""
                for b in blocks:
                    block += "\n" + b
-                logger.info('BLOCK '+str(n)+'=' + data)
+                logger.info('BLOCK '+str(n)+'=' + block)
    else:
        block = data
    if patron and listGroups:
@@ -157,18 +157,22 @@ def scrape(item, patron = '', listGroups = [], headers="", blacklist="", data=""
            else:
                longtitle = '[B]' + title + '[/B]'

-            infolabels = {}
-            if scrapedyear:
-                infolabels['year'] = scrapedyear
-            if scrapedplot:
-                infolabels['plot'] = plot
-            if scrapedduration:
-                infolabels['duration'] = scrapedduration
-            if scrapedgenre:
-                genres = scrapertoolsV2.find_multiple_matches(scrapedgenre, '[A-Za-z]+')
-                infolabels['genre'] = ", ".join(genres)
-            if scrapedrating:
-                infolabels['rating'] = scrapertoolsV2.decodeHtmlentities(scrapedrating)
+            if item.infoLabels["title"] or item.fulltitle:  # if title is set, probably this is a list of episodes or video sources
+                infolabels = item.infoLabels
+            else:
+                infolabels = {}
+                if scrapedyear:
+                    infolabels['year'] = scrapedyear
+                if scrapedplot:
+                    infolabels['plot'] = plot
+                if scrapedduration:
+                    infolabels['duration'] = scrapedduration
+                if scrapedgenre:
+                    genres = scrapertoolsV2.find_multiple_matches(scrapedgenre, '[A-Za-z]+')
+                    infolabels['genre'] = ", ".join(genres)
+                if scrapedrating:
+                    infolabels['rating'] = scrapertoolsV2.decodeHtmlentities(scrapedrating)
+
            if not scrapedtitle in blacklist:
                itemlist.append(
                    Item(channel=item.channel,
@@ -189,6 +193,10 @@ def scrape(item, patron = '', listGroups = [], headers="", blacklist="", data=""
        if patronNext:
            nextPage(itemlist, item, data, patronNext)

+        if item.infoLabels["title"] or item.fulltitle:
+            item.fulltitle = item.infoLabels["title"]
+            videolibrary(itemlist, item)
+
    return itemlist


@@ -266,7 +274,7 @@ def swzz_get_url(item):
    return data


-def menu(itemlist, title='', action='', url='', contentType='movie'):    
+def menu(itemlist, title='', action='', url='', contentType='movie', args=[]):
    frame = inspect.stack()[1]
    filename = frame[0].f_code.co_filename
    filename = os.path.basename(filename).replace('.py','')
@@ -284,6 +292,7 @@ def menu(itemlist, title='', action='', url='', contentType='movie'):
        action = action,
        url = url,
        extra = extra,
+        args = args,
        contentType = contentType
    ))
    from channelselector import thumb
@@ -360,6 +369,7 @@ def nextPage(itemlist, item, data, patron):

    return itemlist

+
 def server(item, data='', headers=''):
    
    if not data: