Fixes to fit scrapertools changes

This commit is contained in:
thepasto
2019-04-13 13:37:54 +02:00
parent a9090a1ef3
commit ceffb74c73
38 changed files with 74 additions and 75 deletions

View File

@@ -83,7 +83,7 @@ def menu(item):
logger.info("[altadefinizionehd.py] DATA"+data)
patron = r'<li id="menu.*?><a href="#">FILM PER ' + item.extra + r'<\/a><ul class="sub-menu">(.*?)<\/ul>'
logger.info("[altadefinizionehd.py] BLOCK"+patron)
block = scrapertools.get_match(data, patron)
block = scrapertools.find_single_match(data, patron)
logger.info("[altadefinizionehd.py] BLOCK"+block)
patron = r'<li id=[^>]+><a href="(.*?)">(.*?)<\/a><\/li>'
matches = re.compile(patron, re.DOTALL).findall(block)

View File

@@ -321,7 +321,7 @@ def findvideos(item):
if item.extra:
data = httptools.downloadpage(item.url, headers=headers).data
blocco = scrapertools.get_match(data, r'%s(.*?)</tr>' % item.extra)
blocco = scrapertools.find_single_match(data, r'%s(.*?)</tr>' % item.extra)
url = scrapertools.find_single_match(blocco, r'<a href="([^"]+)"[^>]*>')
if 'vvvvid' in url.lower():
itemlist = [Item(title='I Video VVVVID Non sono supportati', text_color="red")]

View File

@@ -311,7 +311,7 @@ def findvideos(item):
if item.extra:
data = httptools.downloadpage(item.url, headers=headers).data
blocco = scrapertools.get_match(data, r'%s(.*?)</tr>' % item.extra)
blocco = scrapertools.find_single_match(data, r'%s(.*?)</tr>' % item.extra)
item.url = scrapertools.find_single_match(blocco, r'<a href="([^"]+)"[^>]+>')
patron = r'http:\/\/link[^a]+animesubita[^o]+org\/[^\/]+\/.*?(episodio\d*)[^p]+php(\?.*)'

View File

@@ -128,7 +128,7 @@ def lista_genere(item):
data = httptools.downloadpage(item.url).data
bloque = scrapertools.get_match(data,
bloque = scrapertools.find_single_match(data,
'<div class="hentry page post-1 odd author-admin clear-block">(.*?)<div id="disqus_thread">')
patron = '<li class="cat-item cat-item.*?"><a href="(.*?)" >(.*?)</a>'

View File

@@ -60,7 +60,7 @@ def build_menu(item):
data = re.sub(r'\n|\t','',data)
data = re.sub(r'>\s*<','><',data)
block = scrapertoolsV2.get_match(data, r'<form class="filters.*?>(.*?)<\/form>')
block = scrapertoolsV2.find_single_match(data, r'<form class="filters.*?>(.*?)<\/form>')
matches = re.compile(r'<button class="btn btn-sm btn-default dropdown-toggle" data-toggle="dropdown"> (.*?) <span.*?>(.*?)<\/ul>', re.DOTALL).findall(block)
@@ -152,7 +152,7 @@ def alfabetico(item):
data = re.sub(r'\n|\t','',data)
data = re.sub(r'>\s*<','><',data)
block = scrapertoolsV2.get_match(data, r'<span>.*?A alla Z.<\/span>.*?<ul>(.*?)<\/ul>')
block = scrapertoolsV2.find_single_match(data, r'<span>.*?A alla Z.<\/span>.*?<ul>(.*?)<\/ul>')
matches = re.compile('<a href="([^"]+)" title="([^"]+)">', re.DOTALL).findall(block)
scrapertoolsV2.printMatches(matches)

View File

@@ -237,7 +237,7 @@ def categorias(item):
data = httptools.downloadpage(item.url, headers=headers).data
# Narrow search by selecting only the combo
bloque = scrapertools.get_match(data, 'Categorie(.*?)</ul>')
bloque = scrapertools.find_single_match(data, 'Categorie(.*?)</ul>')
# The categories are the options for the combo
patron = '<a href="(.*?)">(.*?)</a></li>'
@@ -283,7 +283,7 @@ def episodios(item):
# Carica la pagina
data = httptools.downloadpage(item.url).data
data = scrapertools.decodeHtmlentities(data)
data = scrapertools.get_match(data, '<p>(?:<strong>|)(.*?)<div id="disqus_thread">')
data = scrapertools.find_single_match(data, '<p>(?:<strong>|)(.*?)<div id="disqus_thread">')
lang_titles = []
starts = []

View File

@@ -82,7 +82,7 @@ def build_itemlist(item, re_bloque, re_patron, iaction):
data = httptools.downloadpage(item.url).data
# Narrow search by selecting only the combo
bloque = scrapertools.get_match(data, re_bloque)
bloque = scrapertools.find_single_match(data, re_bloque)
# The categories are the options for the combo
matches = re.compile(re_patron, re.DOTALL).findall(bloque)
@@ -160,7 +160,7 @@ def list_titles(item):
# Put the next page mark
try:
next_page = scrapertools.get_match(data, "<link rel='next' href='([^']+)'")
next_page = scrapertools.find_single_match(data, "<link rel='next' href='([^']+)'")
itemlist.append(
Item(channel=item.channel,
action="list_titles",

View File

@@ -62,7 +62,7 @@ def menu(item):
itemlist= []
data = httptools.downloadpage(item.url, headers=headers).data
data = re.sub('\n|\t', '', data)
block = scrapertoolsV2.get_match(data, item.args + r'<span.*?><\/span>.*?<ul.*?>(.*?)<\/ul>')
block = scrapertoolsV2.find_single_match(data, item.args + r'<span.*?><\/span>.*?<ul.*?>(.*?)<\/ul>')
support.log('MENU BLOCK= ',block)
patron = r'href="?([^">]+)"?>(.*?)<\/a>'
matches = re.compile(patron, re.DOTALL).findall(block)
@@ -290,12 +290,12 @@ def play(item):
data = httptools.downloadpage(item.url).data
if "window.location.href" in data:
try:
data = scrapertoolsV2.get_match(data, 'window.location.href = "([^"]+)";')
data = scrapertoolsV2.find_single_match(data, 'window.location.href = "([^"]+)";')
except IndexError:
data = httptools.downloadpage(item.url, only_headers=True, follow_redirects=False).headers.get("location", "")
data, c = unshortenit.unwrap_30x_only(data)
else:
data = scrapertoolsV2.get_match(data, r'<a href="([^"]+)".*?class="btn-wrapper">.*?licca.*?</a>')
data = scrapertoolsV2.find_single_match(data, r'<a href="([^"]+)".*?class="btn-wrapper">.*?licca.*?</a>')
logger.debug("##### play go.php data ##\n%s\n##" % data)
else:

View File

@@ -90,7 +90,7 @@ def categorie(item):
itemlist = []
data = httptools.downloadpage(item.url).data
blocco = scrapertools.get_match(data, r'<ul>\s*<li class="drop">(.*?)</ul>')
blocco = scrapertools.find_single_match(data, r'<ul>\s*<li class="drop">(.*?)</ul>')
patron = r'<li><a href="([^"]+)">([^"]+)</a></li>'
matches = re.compile(patron, re.DOTALL).findall(blocco)
@@ -113,7 +113,7 @@ def filmperanno(item):
itemlist = []
data = httptools.downloadpage(item.url).data
blocco = scrapertools.get_match(data, r'<li class="drop"><a.*?class="link1"><b>Film per anno</b></a>(.*?)</ul>')
blocco = scrapertools.find_single_match(data, r'<li class="drop"><a.*?class="link1"><b>Film per anno</b></a>(.*?)</ul>')
patron = r'<li><a href="([^"]+)">([^"]+)</a></li>'
matches = re.compile(patron, re.DOTALL).findall(blocco)

View File

@@ -39,7 +39,7 @@ def video(item):
# Carica la pagina
data = httptools.downloadpage(item.url, headers=headers).data
block = scrapertools.get_match(data, r'<main>(.*?)<\/main>')
block = scrapertools.find_single_match(data, r'<main>(.*?)<\/main>')
block = re.sub('\t|\n', '', block)
patron = r'<article.*?class="TPost C">.*?<a href="([^"]+)">.*?src="([^"]+)".*?>.*?<h3 class="Title">([^<]+)<\/h3>(.*?)<\/article>'

View File

@@ -97,7 +97,7 @@ def categorias(item):
# Carica la pagina
data = httptools.downloadpage(item.url, headers=headers).data
bloque = scrapertools.get_match(data, '<h4>Genere</h4>(.*?)<li class="genre">')
bloque = scrapertools.find_single_match(data, '<h4>Genere</h4>(.*?)<li class="genre">')
# Estrae i contenuti
patron = r'<a href="([^"]+)" title="([^"]+)">'

View File

@@ -90,7 +90,7 @@ def epoche(item):
data = httptools.downloadpage(item.url, headers=headers).data
# Narrow search by selecting only the combo
bloque = scrapertools.get_match(data, '<h1 class="pagetitle">Epoche</h1>(.*?)</ul>')
bloque = scrapertools.find_single_match(data, '<h1 class="pagetitle">Epoche</h1>(.*?)</ul>')
# The categories are the options for the combo
patron = '<a href="([^"]+)">(.*?)<'

View File

@@ -62,7 +62,7 @@ def categorias(item):
# Carica la pagina
data = httptools.downloadpage(item.url).data
bloque = scrapertools.get_match(data, 'Categorie</a></li>(.*?)</ul>')
bloque = scrapertools.find_single_match(data, 'Categorie</a></li>(.*?)</ul>')
# Estrae i contenuti
patron = '<a href="([^"]+)">([^<]+)</a></li>'

View File

@@ -67,7 +67,7 @@ def serietv(item):
# Carica la pagina
data = httptools.downloadpage(item.url).data
bloque = scrapertools.get_match(data,
bloque = scrapertools.find_single_match(data,
'<input type="submit" value="Vai!" class="blueButton">(.*?)<div class="footer">')
# Estrae i contenuti
@@ -117,7 +117,7 @@ def ultimiep(item):
# Carica la pagina
data = httptools.downloadpage(item.url).data
bloque = scrapertools.get_match(data, '<ul class="last" id="recentAddedEpisodesAnimeDDM">(.*?)</ul>')
bloque = scrapertools.find_single_match(data, '<ul class="last" id="recentAddedEpisodesAnimeDDM">(.*?)</ul>')
# Estrae i contenuti
patron = '<li><a href="([^"]+)"[^>]+>([^<]+)<br>'
@@ -193,7 +193,7 @@ def episodios(item):
itemlist = []
data = httptools.downloadpage(item.url).data
bloque = scrapertools.get_match(data, '<div class="seasonEp">(.*?)<div class="footer">')
bloque = scrapertools.find_single_match(data, '<div class="seasonEp">(.*?)<div class="footer">')
patron = '<li><a href="([^"]+)"[^<]+<b>(.*?)<\/b>[^>]+>([^<]+)<\/i>(.*?)<'
matches = re.compile(patron, re.DOTALL).findall(bloque)

View File

@@ -155,11 +155,11 @@ def episodios(item):
data = scrapertools.decodeHtmlentities(data)
link = False
if scrapertoolsV2.get_match(data, '<div class="nano_cp_container"><span.*?CLICCA QUI'):
if scrapertoolsV2.find_single_match(data, '<div class="nano_cp_container"><span.*?CLICCA QUI'):
item.url = scrapertoolsV2.find_single_match(data, '<script type="text\/javascript">.*?var nano_ajax_object =.*?"go_to":"(.*?)"').replace('\\', '')
link = True
else:
match = scrapertoolsV2.get_match(data, '<h3 style="text-align: center;">.*?<a href="(.*?)">.{0,5}<span.*?CLICCA QUI.*?</a></h3>')
match = scrapertoolsV2.find_single_match(data, '<h3 style="text-align: center;">.*?<a href="(.*?)">.{0,5}<span.*?CLICCA QUI.*?</a></h3>')
if match != '':
item.url = match
link = True
@@ -167,7 +167,7 @@ def episodios(item):
data = httptools.downloadpage(item.url).data
data = scrapertools.decodeHtmlentities(data)
data = scrapertoolsV2.get_match(data, '<div class="su-accordion">(.+?)<div class="clear">')
data = scrapertoolsV2.find_single_match(data, '<div class="su-accordion">(.+?)<div class="clear">')
lang_titles = []
starts = []

View File

@@ -207,7 +207,7 @@ def findvideos(item):
itemlist = []
data = httptools.downloadpage(item.url, headers=headers).data
bloque = scrapertools.get_match(data, '<div class="entry-content">(.*?)<footer class="entry-footer">')
bloque = scrapertools.find_single_match(data, '<div class="entry-content">(.*?)<footer class="entry-footer">')
patron = r'<a href="([^"]+)">'
matches = re.compile(patron, re.DOTALL).findall(bloque)

View File

@@ -113,7 +113,7 @@ def annoattuale(item):
itemlist = []
data = httptools.downloadpage(item.url).data
blocco = scrapertools.get_match(data, r'<div class="left-menu-main">(.*?)</div>')
blocco = scrapertools.find_single_match(data, r'<div class="left-menu-main">(.*?)</div>')
patron = r'<a href="([^"]+)">Film\s*\d{4}</a>'
item.url = urlparse.urljoin(host, scrapertools.find_single_match(blocco, patron))
@@ -127,7 +127,7 @@ def categorie(item):
itemlist = []
data = httptools.downloadpage(item.url).data
blocco = scrapertools.get_match(data, r'<div class="menu-janr-content">(.*?)</div>')
blocco = scrapertools.find_single_match(data, r'<div class="menu-janr-content">(.*?)</div>')
patron = r'<a href="([^"]+)">([^<]+)</a>'
matches = re.compile(patron, re.DOTALL).findall(blocco)
@@ -152,7 +152,7 @@ def peranno(item):
itemlist = []
data = httptools.downloadpage(item.url).data
blocco = scrapertools.get_match(data, r'<div class="sort-menu-title">\s*Anno di pubblicazione:\s*</div>(.*?)</div>')
blocco = scrapertools.find_single_match(data, r'<div class="sort-menu-title">\s*Anno di pubblicazione:\s*</div>(.*?)</div>')
patron = r'<a href="([^"]+)">([^<]+)</a>'
matches = re.compile(patron, re.DOTALL).findall(blocco)
@@ -176,7 +176,7 @@ def perpaese(item):
itemlist = []
data = httptools.downloadpage(item.url).data
blocco = scrapertools.get_match(data, r'<div class="sort-menu-title">\s*Paesi di produzione:\s*</div>(.*?)</div>')
blocco = scrapertools.find_single_match(data, r'<div class="sort-menu-title">\s*Paesi di produzione:\s*</div>(.*?)</div>')
patron = r'<a href="([^"]+)">([^<]+)</a>'
matches = re.compile(patron, re.DOTALL).findall(blocco)

View File

@@ -97,7 +97,7 @@ def categorie(item):
# Carica la pagina
data = httptools.downloadpage(item.url, headers=headers).data
bloque = scrapertools.get_match(data,
bloque = scrapertools.find_single_match(data,
'GENERI<span class="mega-indicator">(.*?)<\/ul>')
# Estrae i contenuti

View File

@@ -117,7 +117,7 @@ def peliculas(item):
itemlist = []
data = httptools.downloadpage(item.url, headers=headers).data
block = scrapertoolsV2.get_match(data, r'<ul class="posts">(.*)<\/ul>')
block = scrapertoolsV2.find_single_match(data, r'<ul class="posts">(.*)<\/ul>')
patron = r'<li><a href="([^"]+)" data-thumbnail="([^"]+)">.*?<div class="title">([^<]+)<\/div>'
matches = scrapertoolsV2.find_multiple_matches(block, patron)
@@ -170,7 +170,7 @@ def episodios(item):
if 'accordion-item' in data:
block = scrapertoolsV2.get_match(data, 'accordion-item.*?>(.*?)<div id="disqus_thread">')
block = scrapertoolsV2.find_single_match(data, 'accordion-item.*?>(.*?)<div id="disqus_thread">')
patron = r'<img src="([^"]+)">.*?<li class="season-no">(.*?)<\/li>(.*?)<\/table>'
matches = scrapertoolsV2.find_multiple_matches(block, patron)
@@ -193,7 +193,7 @@ def episodios(item):
else:
block = scrapertoolsV2.get_match(data, '<div id="info" class="pad">(.*?)<div id="disqus_thread">').replace('</p>','<br />').replace('×','x')
block = scrapertoolsV2.find_single_match(data, '<div id="info" class="pad">(.*?)<div id="disqus_thread">').replace('</p>','<br />').replace('×','x')
matches = scrapertoolsV2.find_multiple_matches(block, r'<strong>(.*?)<\/strong>.*?<p>(.*?)<span')
for lang, seasons in matches:
lang = re.sub('.*?Stagione[^a-zA-Z]+', '', lang)

View File

@@ -283,7 +283,7 @@ def categorias(item):
data = httptools.downloadpage(item.url, headers=headers).data
# Narrow search by selecting only the combo
bloque = scrapertools.get_match(data, '<ul class="sub-menu">(.*?)</ul>')
bloque = scrapertools.find_single_match(data, '<ul class="sub-menu">(.*?)</ul>')
# The categories are the options for the combo
patron = '<a href="([^"]+)">(.*?)</a></li>'

View File

@@ -94,7 +94,7 @@ def categorias(item):
data = httptools.downloadpage(item.url, headers=headers).data
# Narrow search by selecting only the combo
bloque = scrapertools.get_match(data, '<ul class="reset dropmenu">(.*?)</ul>')
bloque = scrapertools.find_single_match(data, '<ul class="reset dropmenu">(.*?)</ul>')
# The categories are the options for the combo
patron = '<li><a href="([^"]+)">(.*?)</a></li>'

View File

@@ -100,7 +100,7 @@ def nuoveserie(item):
itemlist = []
data = httptools.downloadpage(item.url, headers=headers).data
blocco = scrapertools.get_match(data, '<div\s*class="container container-title-serie-new container-scheda" meta-slug="new">(.*?)</div></div><div')
blocco = scrapertools.find_single_match(data, '<div\s*class="container container-title-serie-new container-scheda" meta-slug="new">(.*?)</div></div><div')
patron = r'<a\s*href="([^"]+)".*?>\s*<img\s*.*?src="([^"]+)" />[^>]+>[^>]+>[^>]+>[^>]+>'
patron += r'[^>]+>[^>]+>[^>]+>[^>]+>[^>]+>[^>]+>[^>]+>[^>]+>[^>]+>([^<]+)</p>'
@@ -132,7 +132,7 @@ def serietvaggiornate(item):
itemlist = []
data = httptools.downloadpage(item.url, headers=headers).data
blocco = scrapertools.get_match(data,
blocco = scrapertools.find_single_match(data,
r'<div\s*class="container container-title-serie-lastep container-scheda" meta-slug="lastep">(.*?)</div></div><div')
patron = r'<a\s*rel="nofollow" href="([^"]+)"[^>]+> <img\s*.*?src="([^"]+)"[^>]+>[^>]+>'
@@ -168,7 +168,7 @@ def categorie(item):
itemlist = []
data = httptools.downloadpage(item.url, headers=headers).data
blocco = scrapertools.get_match(data, r'<ul\s*class="dropdown-menu category">(.*?)</ul>')
blocco = scrapertools.find_single_match(data, r'<ul\s*class="dropdown-menu category">(.*?)</ul>')
patron = r'<li>\s*<a\s*href="([^"]+)"[^>]+>([^<]+)</a></li>'
matches = re.compile(patron, re.DOTALL).findall(blocco)
@@ -196,7 +196,7 @@ def lista_serie(item):
data = httptools.downloadpage(item.url, headers=headers).data
patron = r'<a\s*href="([^"]+)".*?>\s*<img\s*.*?src="([^"]+)" />[^>]+>[^>]+>[^>]+>[^>]+>[^>]+>([^<]+)</p></div>'
blocco = scrapertools.get_match(data,
blocco = scrapertools.find_single_match(data,
r'<div\s*class="col-xs-\d+ col-sm-\d+-\d+">(.*?)<div\s*class="container-fluid whitebg" style="">')
matches = re.compile(patron, re.DOTALL).findall(blocco)

View File

@@ -97,7 +97,7 @@ def categorias(item):
# Carica la pagina
data = httptools.downloadpage(item.url).data
bloque = scrapertools.get_match(data, '<ul class="genres scrolling">(.*?)</ul>')
bloque = scrapertools.find_single_match(data, '<ul class="genres scrolling">(.*?)</ul>')
# Estrae i contenuti
patron = '<li[^>]+><a href="(.*?)"[^>]+>(.*?)</a>'
@@ -226,7 +226,7 @@ def nuoviep(item):
# Carica la pagina
data = httptools.downloadpage(item.url).data
#blocco = scrapertools.get_match(data,
#blocco = scrapertools.find_single_match(data,
# r'<div class="items" style="margin-bottom:0px!important">(.*?)<div class="items" style="margin-bottom:0px!important">')
# Estrae i contenuti

View File

@@ -52,7 +52,7 @@ def categorie(item):
itemlist = []
data = httptools.downloadpage(item.url, headers=headers).data
bloque = scrapertools.get_match(data, '<section class="container directory">(.*?)<footer class="main">')
bloque = scrapertools.find_single_match(data, '<section class="container directory">(.*?)<footer class="main">')
patron = '<a class="label label-white" href="(.*?)">\s*(.*?)</a>'
matches = re.compile(patron, re.DOTALL).findall(bloque)

View File

@@ -233,7 +233,7 @@ def peliculas(item):
# Pagina successiva
try:
pagina_siguiente = scrapertools.get_match(data, '<a class="next page-numbers" href="([^"]+)"')
pagina_siguiente = scrapertools.find_single_match(data, '<a class="next page-numbers" href="([^"]+)"')
itemlist.append(
Item(channel=item.channel,
action="peliculas",
@@ -309,7 +309,7 @@ def peliculas_tv(item):
# Successivo
try:
pagina_siguiente = scrapertools.get_match(data, '<a class="next page-numbers" href="([^"]+)"')
pagina_siguiente = scrapertools.find_single_match(data, '<a class="next page-numbers" href="([^"]+)"')
itemlist.append(
Item(channel=item.channel,
action="peliculas_tv",
@@ -354,7 +354,7 @@ def pel_tv(item):
# Siguiente
try:
pagina_siguiente = scrapertools.get_match(data, '<a class="next page-numbers" href="([^"]+)"')
pagina_siguiente = scrapertools.find_single_match(data, '<a class="next page-numbers" href="([^"]+)"')
itemlist.append(
Item(channel=item.channel,
action="pel_tv",

View File

@@ -206,7 +206,7 @@ def episodios(item):
item.url = re.sub('\-\d+[^\d]+$', '-links', item.url)
data = httptools.downloadpage(item.url).data
data = scrapertools.decodeHtmlentities(data)
data = scrapertools.get_match(data, '<div class="entry-content">(.*?)<div class="clear"></div>')
data = scrapertools.find_single_match(data, '<div class="entry-content">(.*?)<div class="clear"></div>')
lang_titles = []
starts = []

View File

@@ -98,7 +98,7 @@ def categorie(item):
item.url = host
data = httptools.downloadpage(item.url, headers=headers).data
bloque = scrapertools.get_match(data, 'Genere(.*?)</select>')
bloque = scrapertools.find_single_match(data, 'Genere(.*?)</select>')
patron = '<option value="([^"]+)">(.*?)</option>'
matches = re.compile(patron, re.DOTALL).findall(bloque)
@@ -128,7 +128,7 @@ def peliculas_tv(item):
p = int(p)
data = httptools.downloadpage(item.url, headers=headers).data
bloque = scrapertools.get_match(data, 'Lista Serie Tv</h2>(.*?)</section>')
bloque = scrapertools.find_single_match(data, 'Lista Serie Tv</h2>(.*?)</section>')
patron = '<a href=\'(/serie/[^\']+)\'>([^<]+)</a>'
matches = re.compile(patron, re.DOTALL).findall(bloque)

View File

@@ -67,7 +67,7 @@ def categorias(item):
data = httptools.downloadpage(item.url).data
# Narrow search by selecting only the combo
bloque = scrapertools.get_match(data, '<option class="level-0" value="7">(.*?)<option class="level-0" value="8">')
bloque = scrapertools.find_single_match(data, '<option class="level-0" value="7">(.*?)<option class="level-0" value="8">')
# The categories are the options for the combo
patron = '<option class=[^=]+="([^"]+)">(.*?)<'

View File

@@ -42,7 +42,7 @@ def categorias(item):
# Carica la pagina
data = httptools.downloadpage(item.url).data
bloque = scrapertools.get_match(data, '<h2>Film Per Genere</h2><ul class="genres scrolling">(.*?)</ul>')
bloque = scrapertools.find_single_match(data, '<h2>Film Per Genere</h2><ul class="genres scrolling">(.*?)</ul>')
# Estrae i contenuti
patron = '<li[^>]+><a href="([^"]+)"[^>]+>([^<]+)<\/a>'

View File

@@ -240,7 +240,7 @@ def episodios(item):
itemlist = []
data = httptools.downloadpage(item.url, headers=headers).data
blocco = scrapertools.get_match(data, '<table>(.*?)</table>')
blocco = scrapertools.find_single_match(data, '<table>(.*?)</table>')
patron = "<tr><td><b>(.*?)(\d+)((?:x\d+| ))(.*?)<\/b>(.*?<tr>)"
matches = scrapertoolsV2.find_multiple_matches(blocco, patron)

View File

@@ -40,7 +40,7 @@ def categorie(item):
data = httptools.downloadpage(item.url).data
blocco = scrapertools.get_match(data, '<div class="menu-container">(.*?)</div>')
blocco = scrapertools.find_single_match(data, '<div class="menu-container">(.*?)</div>')
patron = r'<li[^>]+><a title="[^"]+" href="([^"]+)">([^<]+)</a></li>'
matches = re.compile(patron, re.DOTALL).findall(blocco)

View File

@@ -87,7 +87,7 @@ def play(item):
data = httptools.downloadpage(url).data
if "spankwire" in url :
data = httptools.downloadpage(item.url).data
data = scrapertools.get_match(data,'Copy Embed Code(.*?)For Desktop')
data = scrapertools.find_single_match(data,'Copy Embed Code(.*?)For Desktop')
patron = '<div class="shareDownload_container__item__dropdown">.*?<a href="([^"]+)"'
matches = scrapertools.find_multiple_matches(data, patron)
for scrapedurl in matches:

View File

@@ -103,7 +103,7 @@ def categorie(item):
itemlist = []
data = httptools.downloadpage(item.url, headers=headers).data
blocco = scrapertools.get_match(data, r'<h2>Sfoglia</h2>\s*<ul>(.*?)</ul>\s*</section>')
blocco = scrapertools.find_single_match(data, r'<h2>Sfoglia</h2>\s*<ul>(.*?)</ul>\s*</section>')
patron = r'<li><a href="([^"]+)">([^<]+)</a></li>'
matches = re.compile(patron, re.DOTALL).findall(blocco)

View File

@@ -221,7 +221,7 @@ def dooplay_get_links(item, host):
"type": type
})
dataAdmin = httptools.downloadpage(host + 'wp-admin/admin-ajax.php', post=postData,headers={'Referer': item.url}).data
link = scrapertoolsV2.get_match(dataAdmin, "<iframe.*src='([^']+)'")
link = scrapertoolsV2.find_single_match(dataAdmin, "<iframe.*src='([^']+)'")
ret.append({
'url': link,
'title': title,
@@ -249,18 +249,18 @@ def swzz_get_url(item):
if "/link/" in item.url:
data = httptools.downloadpage(item.url, headers=headers).data
if "link =" in data:
data = scrapertoolsV2.get_match(data, 'link = "([^"]+)"')
data = scrapertoolsV2.find_single_match(data, 'link = "([^"]+)"')
if 'http' not in data:
data = 'https:' + data
else:
match = scrapertoolsV2.get_match(data, r'<meta name="og:url" content="([^"]+)"')
match = scrapertoolsV2.get_match(data, r'URL=([^"]+)">') if not match else match
match = scrapertoolsV2.find_single_match(data, r'<meta name="og:url" content="([^"]+)"')
match = scrapertoolsV2.find_single_match(data, r'URL=([^"]+)">') if not match else match
if not match:
from lib import jsunpack
try:
data = scrapertoolsV2.get_match(data.replace('\n', ''), r"(eval\s?\(function\(p,a,c,k,e,d.*?)</script>")
data = scrapertoolsV2.find_single_match(data.replace('\n', ''), r"(eval\s?\(function\(p,a,c,k,e,d.*?)</script>")
data = jsunpack.unpack(data)
logger.debug("##### play /link/ unpack ##\n%s\n##" % data)

View File

@@ -97,7 +97,7 @@ def categorias(item):
# Carica la pagina
data = httptools.downloadpage(item.url, headers=headers).data
bloque = scrapertools.get_match(data,
bloque = scrapertools.find_single_match(data,
'<ul class="table-list">(.*?)</ul>')
# Estrae i contenuti

View File

@@ -48,7 +48,7 @@ def progetti(item):
itemlist = []
data = httptools.downloadpage(item.url, headers=headers).data
blocco = scrapertools.get_match(data, '<div id="pf_imageMenu1" class="imageMenu">(.*?)</div>')
blocco = scrapertools.find_single_match(data, '<div id="pf_imageMenu1" class="imageMenu">(.*?)</div>')
patron = '<a href="[^=]+=([\w]+)">([^<]+)</a>'
matches = re.compile(patron, re.DOTALL).findall(blocco)
@@ -142,7 +142,7 @@ def findvideos(item):
data = httptools.downloadpage(item.url, headers=headers).data
patronvideo = 'flashvars="file=([^&]+)&'
urlvideo = scrapertools.get_match(data, patronvideo)
urlvideo = scrapertools.find_single_match(data, patronvideo)
estensionevideo = urlvideo.split(".")[-1]

View File

@@ -22,7 +22,7 @@ def findhost():
data = httptools.downloadpage(cb01Url).data
global host, headers
host = scrapertoolsV2.get_match(data, r'<a class="?mega-menu-link"? href=(https://vedohd[^/"]+)')+'/'
host = scrapertoolsV2.find_single_match(data, r'<a class="?mega-menu-link"? href=(https://vedohd[^/"]+)')+'/'
if 'https' not in host: # in caso cb01 cambi, si spera di riuscire ad accedere da questo URL
host = "https://vedohd.pw/"
@@ -77,7 +77,7 @@ def findvideos(item):
for link in support.dooplay_get_links(item, host):
if link['title'] != 'Trailer':
logger.info(link['title'])
server, quality = scrapertoolsV2.get_match(link['title'], '([^ ]+) ?(HD|3D)?')
server, quality = scrapertoolsV2.find_single_match(link['title'], '([^ ]+) ?(HD|3D)?')
if quality:
title = server + " [COLOR blue][" + quality + "][/COLOR]"
else:

View File

@@ -17,32 +17,31 @@ def get_video_url(page_url, premium=False, user="", password="", video_password=
logger.info("[wstream.py] url=" + page_url)
video_urls = []
data = scrapertools.downloadpage(page_url, headers=headers).replace('https','http')
data = httptools.downloadpage(page_url, headers=headers).data.replace('https', 'http')
# logger.info("[wstream.py] data=" + data)
vid = scrapertools.find_multiple_matches(data,'download_video.*?>.*?<.*?<td>([^\,,\s]+)')
vid = scrapertools.find_multiple_matches(data, 'download_video.*?>.*?<.*?<td>([^\,,\s]+)')
headers.append(['Referer', page_url])
post_data = scrapertools.find_single_match(data, "</div>\s*<script type='text/javascript'>(eval.function.p,a,c,k,e,.*?)\s*</script>")
post_data = scrapertools.find_single_match(data,
"</div>\s*<script type='text/javascript'>(eval.function.p,a,c,k,e,.*?)\s*</script>")
if post_data != "":
from lib import jsunpack
data = jsunpack.unpack(post_data)
from lib import jsunpack
data = jsunpack.unpack(post_data)
media_url = scrapertools.find_multiple_matches(data, '(http.*?\.mp4)')
_headers = urllib.urlencode(dict(headers))
i=0
i = 0
for media_url in media_url:
video_urls.append([vid[i] + " mp4 [wstream] ", media_url + '|' + _headers])
i=i+1
i = i + 1
for video_url in video_urls:
logger.info("[wstream.py] %s - %s" % (video_url[0], video_url[1]))
return video_urls
return video_urls
def find_videos(data):
encontrados = set()
devuelve = []