regex debugger e altre modifiche varie

This commit is contained in:
marco
2019-07-30 21:14:12 +02:00
committed by mac12m99
parent 4dc1d6b91d
commit cbb45094a5
4 changed files with 31 additions and 42 deletions
-3
View File
@@ -17,13 +17,10 @@
"casacinemainfo": "https://www.casacinema.info", "casacinemainfo": "https://www.casacinema.info",
"cb01anime": "https://www.cineblog01.ink", "cb01anime": "https://www.cineblog01.ink",
"cinemalibero": "https://www.cinemalibero.best", "cinemalibero": "https://www.cinemalibero.best",
"cinemastreaming": "https://cinemastreaming.icu",
"documentaristreamingda": "https://documentari-streaming-da.com", "documentaristreamingda": "https://documentari-streaming-da.com",
"dreamsub": "https://www.dreamsub.stream", "dreamsub": "https://www.dreamsub.stream",
"eurostreaming": "https://eurostreaming.pink", "eurostreaming": "https://eurostreaming.pink",
"eurostreaming_video": "https://www.eurostreaming.best",
"fastsubita": "http://fastsubita.com", "fastsubita": "http://fastsubita.com",
"ffilms":"https://ffilms.org",
"filmigratis": "https://filmigratis.net", "filmigratis": "https://filmigratis.net",
"filmgratis": "https://www.filmaltadefinizione.net", "filmgratis": "https://www.filmaltadefinizione.net",
"filmontv": "https://www.comingsoon.it", "filmontv": "https://www.comingsoon.it",
+7 -24
View File
@@ -9,9 +9,8 @@ from core import scrapertoolsV2, httptools, servertools, tmdb, support
from core.item import Item from core.item import Item
from lib import unshortenit from lib import unshortenit
from platformcode import logger, config from platformcode import logger, config
from specials import autoplay
#impostati dinamicamente da getUrl() #impostati dinamicamente da findhost()
host = "" host = ""
headers = "" headers = ""
@@ -56,31 +55,15 @@ def mainlist(item):
return locals() return locals()
@support.scrape
def menu(item): def menu(item):
findhost() findhost()
itemlist= [] patronBlock = item.args + r'<span.*?><\/span>.*?<ul.*?>(.*?)<\/ul>'
data = httptools.downloadpage(item.url, headers=headers).data patron = r'href="?(?P<url>[^">]+)"?>(?P<title>.*?)<\/a>'
data = re.sub('\n|\t', '', data) thumb = ''
block = scrapertoolsV2.find_single_match(data, item.args + r'<span.*?><\/span>.*?<ul.*?>(.*?)<\/ul>') action = 'peliculas'
support.log('MENU BLOCK= ',block)
patron = r'href="?([^">]+)"?>(.*?)<\/a>'
matches = re.compile(patron, re.DOTALL).findall(block)
for scrapedurl, scrapedtitle in matches:
itemlist.append(
Item(
channel=item.channel,
title=scrapedtitle,
contentType=item.contentType,
action='peliculas',
url=host + scrapedurl
)
)
return support.thumb(itemlist)
return locals()
def newest(categoria): def newest(categoria):
+23 -14
View File
@@ -112,21 +112,15 @@ def dbg():
web_pdb.set_trace() web_pdb.set_trace()
def scrape2(item, patron = '', listGroups = [], headers="", blacklist="", data="", patronBlock="", def regexDbg(item, patron, headers, data=''):
patronNext="", action="findvideos", addVideolibrary = True, typeContentDict={}, typeActionDict={}):
import json, urllib2, webbrowser import json, urllib2, webbrowser
url = 'https://regex101.com' url = 'https://regex101.com'
html = httptools.downloadpage(item.url, headers=headers, ignore_response_code=True).data.replace("'", '"') if not data:
html = re.sub('\n|\t', ' ', html) html = httptools.downloadpage(item.url, headers=headers, ignore_response_code=True).data.replace("'", '"')
html = re.sub('\n|\t', ' ', html)
m = re.search(r'\((?!\?)', patron) else:
n = 0 html = data
dbg()
while m:
patron = patron[:m.end()] + '?P<' + listGroups[n] + '>' + patron[m.end():]
m = re.search(r'\((?!\?)', patron)
n += 1
headers = {'content-type': 'application/json'} headers = {'content-type': 'application/json'}
data = { data = {
@@ -141,6 +135,17 @@ def scrape2(item, patron = '', listGroups = [], headers="", blacklist="", data="
permaLink = json.loads(r)['permalinkFragment'] permaLink = json.loads(r)['permalinkFragment']
webbrowser.open(url + "/r/" + permaLink) webbrowser.open(url + "/r/" + permaLink)
def scrape2(item, patron = '', listGroups = [], headers="", blacklist="", data="", patronBlock="",
patronNext="", action="findvideos", addVideolibrary = True, typeContentDict={}, typeActionDict={}):
m = re.search(r'\((?!\?)', patron)
n = 0
while m:
patron = patron[:m.end()] + '?P<' + listGroups[n] + '>' + patron[m.end():]
m = re.search(r'\((?!\?)', patron)
n += 1
regexDbg(item, patron, headers)
return return
def scrape(func): def scrape(func):
@@ -182,7 +187,7 @@ def scrape(func):
addVideolibrary = args['addVideolibrary'] if 'addVideolibrary' in args else True addVideolibrary = args['addVideolibrary'] if 'addVideolibrary' in args else True
blacklist = args['blacklist'] if 'blacklist' in args else '' blacklist = args['blacklist'] if 'blacklist' in args else ''
data = args['data'] if 'data' in args else '' data = args['data'] if 'data' in args else ''
headers = args['headers'] if 'headers' in args else '' headers = args['headers'] if 'headers' in args else func.__globals__['headers']
patron = args['patron'] if 'patron' in args else '' patron = args['patron'] if 'patron' in args else ''
patronNext = args['patronNext'] if 'patronNext' in args else '' patronNext = args['patronNext'] if 'patronNext' in args else ''
patronBlock = args['patronBlock'] if 'patronBlock' in args else '' patronBlock = args['patronBlock'] if 'patronBlock' in args else ''
@@ -190,6 +195,7 @@ def scrape(func):
typeContentDict = args['type_content_dict'] if 'type_content_dict' in args else {} typeContentDict = args['type_content_dict'] if 'type_content_dict' in args else {}
if 'pagination' in args: pagination = args['pagination'] if args['pagination'] else 20 if 'pagination' in args: pagination = args['pagination'] if args['pagination'] else 20
else: pagination = '' else: pagination = ''
log('PATRON= ', patron) log('PATRON= ', patron)
if not data: if not data:
data = httptools.downloadpage(item.url, headers=headers, ignore_response_code=True).data.replace("'", '"') data = httptools.downloadpage(item.url, headers=headers, ignore_response_code=True).data.replace("'", '"')
@@ -215,11 +221,14 @@ def scrape(func):
matches = scrapertoolsV2.find_multiple_matches_groups(block, patron) matches = scrapertoolsV2.find_multiple_matches_groups(block, patron)
log('MATCHES =', matches) log('MATCHES =', matches)
if 'debug' in args:
regexDbg(item, patron, headers, block)
known_keys = ['url', 'title', 'title2', 'episode', 'thumb', 'quality', 'year', 'plot', 'duration', 'genere', known_keys = ['url', 'title', 'title2', 'episode', 'thumb', 'quality', 'year', 'plot', 'duration', 'genere',
'rating', 'type', 'lang'] # by greko aggiunto episode 'rating', 'type', 'lang'] # by greko aggiunto episode
lang = '' # aggiunto per gestire i siti con pagine di serietv dove si hanno i video in ita e in subita lang = '' # aggiunto per gestire i siti con pagine di serietv dove si hanno i video in ita e in subita
pag = item.page if item.page else 1 # pagination pag = item.page if item.page else 1 # pagination
for i, match in enumerate(matches): for i, match in enumerate(matches):
if pagination and (pag - 1) * pagination > i: continue # pagination if pagination and (pag - 1) * pagination > i: continue # pagination
+1 -1
View File
@@ -645,7 +645,7 @@ def get_languages(channel):
:return: list :return: list
''' '''
logger.info() logger.info()
list_language = ['No filtrar'] list_language = ['Non filtrare']
list_controls, dict_settings = channeltools.get_channel_controls_settings(channel) list_controls, dict_settings = channeltools.get_channel_controls_settings(channel)
for control in list_controls: for control in list_controls:
try: try: