regex debugger e altre modifiche varie

2019-07-30 21:14:12 +02:00
parent 4dc1d6b91d
commit cbb45094a5
4 changed files with 31 additions and 42 deletions
--- a/channels.json
+++ b/channels.json
@@ -17,13 +17,10 @@
    "casacinemainfo": "https://www.casacinema.info",
    "cb01anime": "https://www.cineblog01.ink",
    "cinemalibero": "https://www.cinemalibero.best",
-    "cinemastreaming": "https://cinemastreaming.icu",
    "documentaristreamingda": "https://documentari-streaming-da.com",
    "dreamsub": "https://www.dreamsub.stream",
    "eurostreaming": "https://eurostreaming.pink",
-    "eurostreaming_video": "https://www.eurostreaming.best",
    "fastsubita": "http://fastsubita.com",
-    "ffilms":"https://ffilms.org",
    "filmigratis": "https://filmigratis.net",
    "filmgratis": "https://www.filmaltadefinizione.net",
    "filmontv": "https://www.comingsoon.it",
--- a/channels/cineblog01.py
+++ b/channels/cineblog01.py
@@ -9,9 +9,8 @@ from core import scrapertoolsV2, httptools, servertools, tmdb, support
 from core.item import Item
 from lib import unshortenit
 from platformcode import logger, config
-from specials import autoplay

-#impostati dinamicamente da getUrl()
+#impostati dinamicamente da findhost()
 host = ""
 headers = ""

@@ -56,31 +55,15 @@ def mainlist(item):
    return locals()


+@support.scrape
 def menu(item):
    findhost()
-    itemlist= []
-    data = httptools.downloadpage(item.url, headers=headers).data
-    data = re.sub('\n|\t', '', data)
-    block = scrapertoolsV2.find_single_match(data, item.args + r'<span.*?><\/span>.*?<ul.*?>(.*?)<\/ul>')
-    support.log('MENU BLOCK= ',block)
-    patron = r'href="?([^">]+)"?>(.*?)<\/a>'
-    matches = re.compile(patron, re.DOTALL).findall(block)
-    for scrapedurl, scrapedtitle in matches:
-        itemlist.append(
-            Item(
-                channel=item.channel,
-                title=scrapedtitle,
-                contentType=item.contentType,
-                action='peliculas',
-                url=host + scrapedurl
-            )
-        )
-    
-    return support.thumb(itemlist)
-
-
-
+    patronBlock = item.args + r'<span.*?><\/span>.*?<ul.*?>(.*?)<\/ul>'
+    patron = r'href="?(?P<url>[^">]+)"?>(?P<title>.*?)<\/a>'
+    thumb = ''
+    action = 'peliculas'

+    return locals()


 def newest(categoria):
--- a/core/support.py
+++ b/core/support.py
@@ -112,21 +112,15 @@ def dbg():
    web_pdb.set_trace()


-def scrape2(item, patron = '', listGroups = [], headers="", blacklist="", data="", patronBlock="",
-           patronNext="", action="findvideos", addVideolibrary = True, typeContentDict={}, typeActionDict={}):
+def regexDbg(item, patron, headers, data=''):
    import json, urllib2, webbrowser
    url = 'https://regex101.com'

-    html = httptools.downloadpage(item.url, headers=headers, ignore_response_code=True).data.replace("'", '"')
-    html = re.sub('\n|\t', ' ', html)
-
-    m = re.search(r'\((?!\?)', patron)
-    n = 0
-    dbg()
-    while m:
-        patron = patron[:m.end()] + '?P<' + listGroups[n] + '>' + patron[m.end():]
-        m = re.search(r'\((?!\?)', patron)
-        n += 1
+    if not data:
+        html = httptools.downloadpage(item.url, headers=headers, ignore_response_code=True).data.replace("'", '"')
+        html = re.sub('\n|\t', ' ', html)
+    else:
+        html = data

    headers = {'content-type': 'application/json'}
    data = {
@@ -141,6 +135,17 @@ def scrape2(item, patron = '', listGroups = [], headers="", blacklist="", data="
    permaLink = json.loads(r)['permalinkFragment']
    webbrowser.open(url + "/r/" + permaLink)

+
+def scrape2(item, patron = '', listGroups = [], headers="", blacklist="", data="", patronBlock="",
+           patronNext="", action="findvideos", addVideolibrary = True, typeContentDict={}, typeActionDict={}):
+    m = re.search(r'\((?!\?)', patron)
+    n = 0
+    while m:
+        patron = patron[:m.end()] + '?P<' + listGroups[n] + '>' + patron[m.end():]
+        m = re.search(r'\((?!\?)', patron)
+        n += 1
+    regexDbg(item, patron, headers)
+
    return

 def scrape(func):
@@ -182,7 +187,7 @@ def scrape(func):
        addVideolibrary = args['addVideolibrary'] if 'addVideolibrary' in args else True
        blacklist = args['blacklist'] if 'blacklist' in args else ''
        data = args['data'] if 'data' in args else ''
-        headers = args['headers'] if 'headers' in args else ''
+        headers = args['headers'] if 'headers' in args else func.__globals__['headers']
        patron = args['patron'] if 'patron' in args else ''
        patronNext = args['patronNext'] if 'patronNext' in args else ''
        patronBlock = args['patronBlock'] if 'patronBlock' in args else ''
@@ -190,6 +195,7 @@ def scrape(func):
        typeContentDict = args['type_content_dict'] if 'type_content_dict' in args else {}
        if 'pagination' in args: pagination = args['pagination'] if args['pagination'] else 20
        else: pagination = ''
+
        log('PATRON= ', patron)
        if not data:
            data = httptools.downloadpage(item.url, headers=headers, ignore_response_code=True).data.replace("'", '"')
@@ -215,11 +221,14 @@ def scrape(func):
            matches = scrapertoolsV2.find_multiple_matches_groups(block, patron)
            log('MATCHES =', matches)

+            if 'debug' in args:
+                regexDbg(item, patron, headers, block)
+
            known_keys = ['url', 'title', 'title2', 'episode', 'thumb', 'quality', 'year', 'plot', 'duration', 'genere',
                          'rating', 'type', 'lang']  # by greko aggiunto episode
            lang = '' # aggiunto per gestire i siti con pagine di serietv dove si hanno i video in ita e in subita
            
-            pag  = item.page if item.page else 1  # pagination
+            pag = item.page if item.page else 1  # pagination

            for i, match in enumerate(matches):
                if pagination and (pag - 1) * pagination > i: continue  # pagination
--- a/specials/autoplay.py
+++ b/specials/autoplay.py
@@ -645,7 +645,7 @@ def get_languages(channel):
    :return: list
    '''
    logger.info()
-    list_language = ['No filtrar']
+    list_language = ['Non filtrare']
    list_controls, dict_settings = channeltools.get_channel_controls_settings(channel)
    for control in list_controls:
        try: