From cbb45094a5a7d677e8bf90b0900cd838be07b0f6 Mon Sep 17 00:00:00 2001 From: marco Date: Tue, 30 Jul 2019 21:14:12 +0200 Subject: [PATCH] regex debugger e altre modifiche varie --- channels.json | 3 --- channels/cineblog01.py | 31 +++++++------------------------ core/support.py | 37 +++++++++++++++++++++++-------------- specials/autoplay.py | 2 +- 4 files changed, 31 insertions(+), 42 deletions(-) diff --git a/channels.json b/channels.json index f1e8d571..d0096f4d 100644 --- a/channels.json +++ b/channels.json @@ -17,13 +17,10 @@ "casacinemainfo": "https://www.casacinema.info", "cb01anime": "https://www.cineblog01.ink", "cinemalibero": "https://www.cinemalibero.best", - "cinemastreaming": "https://cinemastreaming.icu", "documentaristreamingda": "https://documentari-streaming-da.com", "dreamsub": "https://www.dreamsub.stream", "eurostreaming": "https://eurostreaming.pink", - "eurostreaming_video": "https://www.eurostreaming.best", "fastsubita": "http://fastsubita.com", - "ffilms":"https://ffilms.org", "filmigratis": "https://filmigratis.net", "filmgratis": "https://www.filmaltadefinizione.net", "filmontv": "https://www.comingsoon.it", diff --git a/channels/cineblog01.py b/channels/cineblog01.py index df4d39c8..099fe09a 100644 --- a/channels/cineblog01.py +++ b/channels/cineblog01.py @@ -9,9 +9,8 @@ from core import scrapertoolsV2, httptools, servertools, tmdb, support from core.item import Item from lib import unshortenit from platformcode import logger, config -from specials import autoplay -#impostati dinamicamente da getUrl() +#impostati dinamicamente da findhost() host = "" headers = "" @@ -56,31 +55,15 @@ def mainlist(item): return locals() +@support.scrape def menu(item): findhost() - itemlist= [] - data = httptools.downloadpage(item.url, headers=headers).data - data = re.sub('\n|\t', '', data) - block = scrapertoolsV2.find_single_match(data, item.args + r'<\/span>.*?(.*?)<\/ul>') - support.log('MENU BLOCK= ',block) - patron = r'href="?([^">]+)"?>(.*?)<\/a>' - matches = re.compile(patron, re.DOTALL).findall(block) - for scrapedurl, scrapedtitle in matches: - itemlist.append( - Item( - channel=item.channel, - title=scrapedtitle, - contentType=item.contentType, - action='peliculas', - url=host + scrapedurl - ) - ) - - return support.thumb(itemlist) - - - + patronBlock = item.args + r'<\/span>.*?(.*?)<\/ul>' + patron = r'href="?(?P[^">]+)"?>(?P.*?)<\/a>' + thumb = '' + action = 'peliculas' + return locals() def newest(categoria): diff --git a/core/support.py b/core/support.py index fe65e275..2bb7584e 100644 --- a/core/support.py +++ b/core/support.py @@ -112,21 +112,15 @@ def dbg(): web_pdb.set_trace() -def scrape2(item, patron = '', listGroups = [], headers="", blacklist="", data="", patronBlock="", - patronNext="", action="findvideos", addVideolibrary = True, typeContentDict={}, typeActionDict={}): +def regexDbg(item, patron, headers, data=''): import json, urllib2, webbrowser url = 'https://regex101.com' - html = httptools.downloadpage(item.url, headers=headers, ignore_response_code=True).data.replace("'", '"') - html = re.sub('\n|\t', ' ', html) - - m = re.search(r'\((?!\?)', patron) - n = 0 - dbg() - while m: - patron = patron[:m.end()] + '?P<' + listGroups[n] + '>' + patron[m.end():] - m = re.search(r'\((?!\?)', patron) - n += 1 + if not data: + html = httptools.downloadpage(item.url, headers=headers, ignore_response_code=True).data.replace("'", '"') + html = re.sub('\n|\t', ' ', html) + else: + html = data headers = {'content-type': 'application/json'} data = { @@ -141,6 +135,17 @@ def scrape2(item, patron = '', listGroups = [], headers="", blacklist="", data=" permaLink = json.loads(r)['permalinkFragment'] webbrowser.open(url + "/r/" + permaLink) + +def scrape2(item, patron = '', listGroups = [], headers="", blacklist="", data="", patronBlock="", + patronNext="", action="findvideos", addVideolibrary = True, typeContentDict={}, typeActionDict={}): + m = re.search(r'\((?!\?)', patron) + n = 0 + while m: + patron = patron[:m.end()] + '?P<' + listGroups[n] + '>' + patron[m.end():] + m = re.search(r'\((?!\?)', patron) + n += 1 + regexDbg(item, patron, headers) + return def scrape(func): @@ -182,7 +187,7 @@ def scrape(func): addVideolibrary = args['addVideolibrary'] if 'addVideolibrary' in args else True blacklist = args['blacklist'] if 'blacklist' in args else '' data = args['data'] if 'data' in args else '' - headers = args['headers'] if 'headers' in args else '' + headers = args['headers'] if 'headers' in args else func.__globals__['headers'] patron = args['patron'] if 'patron' in args else '' patronNext = args['patronNext'] if 'patronNext' in args else '' patronBlock = args['patronBlock'] if 'patronBlock' in args else '' @@ -190,6 +195,7 @@ def scrape(func): typeContentDict = args['type_content_dict'] if 'type_content_dict' in args else {} if 'pagination' in args: pagination = args['pagination'] if args['pagination'] else 20 else: pagination = '' + log('PATRON= ', patron) if not data: data = httptools.downloadpage(item.url, headers=headers, ignore_response_code=True).data.replace("'", '"') @@ -215,11 +221,14 @@ def scrape(func): matches = scrapertoolsV2.find_multiple_matches_groups(block, patron) log('MATCHES =', matches) + if 'debug' in args: + regexDbg(item, patron, headers, block) + known_keys = ['url', 'title', 'title2', 'episode', 'thumb', 'quality', 'year', 'plot', 'duration', 'genere', 'rating', 'type', 'lang'] # by greko aggiunto episode lang = '' # aggiunto per gestire i siti con pagine di serietv dove si hanno i video in ita e in subita - pag = item.page if item.page else 1 # pagination + pag = item.page if item.page else 1 # pagination for i, match in enumerate(matches): if pagination and (pag - 1) * pagination > i: continue # pagination diff --git a/specials/autoplay.py b/specials/autoplay.py index 5333fb4e..0a9098ac 100644 --- a/specials/autoplay.py +++ b/specials/autoplay.py @@ -645,7 +645,7 @@ def get_languages(channel): :return: list ''' logger.info() - list_language = ['No filtrar'] + list_language = ['Non filtrare'] list_controls, dict_settings = channeltools.get_channel_controls_settings(channel) for control in list_controls: try: