regex debugger e altre modifiche varie
This commit is contained in:
@@ -17,13 +17,10 @@
|
|||||||
"casacinemainfo": "https://www.casacinema.info",
|
"casacinemainfo": "https://www.casacinema.info",
|
||||||
"cb01anime": "https://www.cineblog01.ink",
|
"cb01anime": "https://www.cineblog01.ink",
|
||||||
"cinemalibero": "https://www.cinemalibero.best",
|
"cinemalibero": "https://www.cinemalibero.best",
|
||||||
"cinemastreaming": "https://cinemastreaming.icu",
|
|
||||||
"documentaristreamingda": "https://documentari-streaming-da.com",
|
"documentaristreamingda": "https://documentari-streaming-da.com",
|
||||||
"dreamsub": "https://www.dreamsub.stream",
|
"dreamsub": "https://www.dreamsub.stream",
|
||||||
"eurostreaming": "https://eurostreaming.pink",
|
"eurostreaming": "https://eurostreaming.pink",
|
||||||
"eurostreaming_video": "https://www.eurostreaming.best",
|
|
||||||
"fastsubita": "http://fastsubita.com",
|
"fastsubita": "http://fastsubita.com",
|
||||||
"ffilms":"https://ffilms.org",
|
|
||||||
"filmigratis": "https://filmigratis.net",
|
"filmigratis": "https://filmigratis.net",
|
||||||
"filmgratis": "https://www.filmaltadefinizione.net",
|
"filmgratis": "https://www.filmaltadefinizione.net",
|
||||||
"filmontv": "https://www.comingsoon.it",
|
"filmontv": "https://www.comingsoon.it",
|
||||||
|
|||||||
+7
-24
@@ -9,9 +9,8 @@ from core import scrapertoolsV2, httptools, servertools, tmdb, support
|
|||||||
from core.item import Item
|
from core.item import Item
|
||||||
from lib import unshortenit
|
from lib import unshortenit
|
||||||
from platformcode import logger, config
|
from platformcode import logger, config
|
||||||
from specials import autoplay
|
|
||||||
|
|
||||||
#impostati dinamicamente da getUrl()
|
#impostati dinamicamente da findhost()
|
||||||
host = ""
|
host = ""
|
||||||
headers = ""
|
headers = ""
|
||||||
|
|
||||||
@@ -56,31 +55,15 @@ def mainlist(item):
|
|||||||
return locals()
|
return locals()
|
||||||
|
|
||||||
|
|
||||||
|
@support.scrape
|
||||||
def menu(item):
|
def menu(item):
|
||||||
findhost()
|
findhost()
|
||||||
itemlist= []
|
patronBlock = item.args + r'<span.*?><\/span>.*?<ul.*?>(.*?)<\/ul>'
|
||||||
data = httptools.downloadpage(item.url, headers=headers).data
|
patron = r'href="?(?P<url>[^">]+)"?>(?P<title>.*?)<\/a>'
|
||||||
data = re.sub('\n|\t', '', data)
|
thumb = ''
|
||||||
block = scrapertoolsV2.find_single_match(data, item.args + r'<span.*?><\/span>.*?<ul.*?>(.*?)<\/ul>')
|
action = 'peliculas'
|
||||||
support.log('MENU BLOCK= ',block)
|
|
||||||
patron = r'href="?([^">]+)"?>(.*?)<\/a>'
|
|
||||||
matches = re.compile(patron, re.DOTALL).findall(block)
|
|
||||||
for scrapedurl, scrapedtitle in matches:
|
|
||||||
itemlist.append(
|
|
||||||
Item(
|
|
||||||
channel=item.channel,
|
|
||||||
title=scrapedtitle,
|
|
||||||
contentType=item.contentType,
|
|
||||||
action='peliculas',
|
|
||||||
url=host + scrapedurl
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
return support.thumb(itemlist)
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
return locals()
|
||||||
|
|
||||||
|
|
||||||
def newest(categoria):
|
def newest(categoria):
|
||||||
|
|||||||
+23
-14
@@ -112,21 +112,15 @@ def dbg():
|
|||||||
web_pdb.set_trace()
|
web_pdb.set_trace()
|
||||||
|
|
||||||
|
|
||||||
def scrape2(item, patron = '', listGroups = [], headers="", blacklist="", data="", patronBlock="",
|
def regexDbg(item, patron, headers, data=''):
|
||||||
patronNext="", action="findvideos", addVideolibrary = True, typeContentDict={}, typeActionDict={}):
|
|
||||||
import json, urllib2, webbrowser
|
import json, urllib2, webbrowser
|
||||||
url = 'https://regex101.com'
|
url = 'https://regex101.com'
|
||||||
|
|
||||||
html = httptools.downloadpage(item.url, headers=headers, ignore_response_code=True).data.replace("'", '"')
|
if not data:
|
||||||
html = re.sub('\n|\t', ' ', html)
|
html = httptools.downloadpage(item.url, headers=headers, ignore_response_code=True).data.replace("'", '"')
|
||||||
|
html = re.sub('\n|\t', ' ', html)
|
||||||
m = re.search(r'\((?!\?)', patron)
|
else:
|
||||||
n = 0
|
html = data
|
||||||
dbg()
|
|
||||||
while m:
|
|
||||||
patron = patron[:m.end()] + '?P<' + listGroups[n] + '>' + patron[m.end():]
|
|
||||||
m = re.search(r'\((?!\?)', patron)
|
|
||||||
n += 1
|
|
||||||
|
|
||||||
headers = {'content-type': 'application/json'}
|
headers = {'content-type': 'application/json'}
|
||||||
data = {
|
data = {
|
||||||
@@ -141,6 +135,17 @@ def scrape2(item, patron = '', listGroups = [], headers="", blacklist="", data="
|
|||||||
permaLink = json.loads(r)['permalinkFragment']
|
permaLink = json.loads(r)['permalinkFragment']
|
||||||
webbrowser.open(url + "/r/" + permaLink)
|
webbrowser.open(url + "/r/" + permaLink)
|
||||||
|
|
||||||
|
|
||||||
|
def scrape2(item, patron = '', listGroups = [], headers="", blacklist="", data="", patronBlock="",
|
||||||
|
patronNext="", action="findvideos", addVideolibrary = True, typeContentDict={}, typeActionDict={}):
|
||||||
|
m = re.search(r'\((?!\?)', patron)
|
||||||
|
n = 0
|
||||||
|
while m:
|
||||||
|
patron = patron[:m.end()] + '?P<' + listGroups[n] + '>' + patron[m.end():]
|
||||||
|
m = re.search(r'\((?!\?)', patron)
|
||||||
|
n += 1
|
||||||
|
regexDbg(item, patron, headers)
|
||||||
|
|
||||||
return
|
return
|
||||||
|
|
||||||
def scrape(func):
|
def scrape(func):
|
||||||
@@ -182,7 +187,7 @@ def scrape(func):
|
|||||||
addVideolibrary = args['addVideolibrary'] if 'addVideolibrary' in args else True
|
addVideolibrary = args['addVideolibrary'] if 'addVideolibrary' in args else True
|
||||||
blacklist = args['blacklist'] if 'blacklist' in args else ''
|
blacklist = args['blacklist'] if 'blacklist' in args else ''
|
||||||
data = args['data'] if 'data' in args else ''
|
data = args['data'] if 'data' in args else ''
|
||||||
headers = args['headers'] if 'headers' in args else ''
|
headers = args['headers'] if 'headers' in args else func.__globals__['headers']
|
||||||
patron = args['patron'] if 'patron' in args else ''
|
patron = args['patron'] if 'patron' in args else ''
|
||||||
patronNext = args['patronNext'] if 'patronNext' in args else ''
|
patronNext = args['patronNext'] if 'patronNext' in args else ''
|
||||||
patronBlock = args['patronBlock'] if 'patronBlock' in args else ''
|
patronBlock = args['patronBlock'] if 'patronBlock' in args else ''
|
||||||
@@ -190,6 +195,7 @@ def scrape(func):
|
|||||||
typeContentDict = args['type_content_dict'] if 'type_content_dict' in args else {}
|
typeContentDict = args['type_content_dict'] if 'type_content_dict' in args else {}
|
||||||
if 'pagination' in args: pagination = args['pagination'] if args['pagination'] else 20
|
if 'pagination' in args: pagination = args['pagination'] if args['pagination'] else 20
|
||||||
else: pagination = ''
|
else: pagination = ''
|
||||||
|
|
||||||
log('PATRON= ', patron)
|
log('PATRON= ', patron)
|
||||||
if not data:
|
if not data:
|
||||||
data = httptools.downloadpage(item.url, headers=headers, ignore_response_code=True).data.replace("'", '"')
|
data = httptools.downloadpage(item.url, headers=headers, ignore_response_code=True).data.replace("'", '"')
|
||||||
@@ -215,11 +221,14 @@ def scrape(func):
|
|||||||
matches = scrapertoolsV2.find_multiple_matches_groups(block, patron)
|
matches = scrapertoolsV2.find_multiple_matches_groups(block, patron)
|
||||||
log('MATCHES =', matches)
|
log('MATCHES =', matches)
|
||||||
|
|
||||||
|
if 'debug' in args:
|
||||||
|
regexDbg(item, patron, headers, block)
|
||||||
|
|
||||||
known_keys = ['url', 'title', 'title2', 'episode', 'thumb', 'quality', 'year', 'plot', 'duration', 'genere',
|
known_keys = ['url', 'title', 'title2', 'episode', 'thumb', 'quality', 'year', 'plot', 'duration', 'genere',
|
||||||
'rating', 'type', 'lang'] # by greko aggiunto episode
|
'rating', 'type', 'lang'] # by greko aggiunto episode
|
||||||
lang = '' # aggiunto per gestire i siti con pagine di serietv dove si hanno i video in ita e in subita
|
lang = '' # aggiunto per gestire i siti con pagine di serietv dove si hanno i video in ita e in subita
|
||||||
|
|
||||||
pag = item.page if item.page else 1 # pagination
|
pag = item.page if item.page else 1 # pagination
|
||||||
|
|
||||||
for i, match in enumerate(matches):
|
for i, match in enumerate(matches):
|
||||||
if pagination and (pag - 1) * pagination > i: continue # pagination
|
if pagination and (pag - 1) * pagination > i: continue # pagination
|
||||||
|
|||||||
@@ -645,7 +645,7 @@ def get_languages(channel):
|
|||||||
:return: list
|
:return: list
|
||||||
'''
|
'''
|
||||||
logger.info()
|
logger.info()
|
||||||
list_language = ['No filtrar']
|
list_language = ['Non filtrare']
|
||||||
list_controls, dict_settings = channeltools.get_channel_controls_settings(channel)
|
list_controls, dict_settings = channeltools.get_channel_controls_settings(channel)
|
||||||
for control in list_controls:
|
for control in list_controls:
|
||||||
try:
|
try:
|
||||||
|
|||||||
Reference in New Issue
Block a user