Correcciones
This commit is contained in:
@@ -77,7 +77,7 @@
|
|||||||
"id": "intervenidos_channels_list",
|
"id": "intervenidos_channels_list",
|
||||||
"type": "text",
|
"type": "text",
|
||||||
"label": "Lista de canales y clones de NewPct1 intervenidos y orden de sustitución de URLs",
|
"label": "Lista de canales y clones de NewPct1 intervenidos y orden de sustitución de URLs",
|
||||||
"default": "('0', 'canal_org', 'canal_des', 'url_org', 'url_des', 'patron1', 'patron2', 'patron3', 'patron4', 'patron5', 'content_inc', 'content_exc', 'ow_force'), ('1', 'mejortorrent', 'mejortorrent1', 'http://www.mejortorrent.com/', 'https://mejortorrent1.com/', '(http.?:\/\/.*?\/)', 'http.?:\/\/.*?\/.*?-torrent.?-[^-]+-(?:[^-]+-)([^0-9]+-)', 'http.?:\/\/.*?\/.*?-torrent.?-[^-]+-(?:[^-]+-)[^0-9]+-\\d+-(Temporada-).html', 'http.?:\/\/.*?\/.*?-torrent.?-[^-]+-(?:[^-]+-)[^0-9]+-(\\d+)-', '', 'tvshow, season', '', 'force'), ('1', 'mejortorrent', 'mejortorrent1', 'http://www.mejortorrent.com/', 'https://mejortorrent1.com/', '(http.?:\/\/.*?\/)', 'http.?:\/\/.*?\/.*?-torrent.?-[^-]+-([^.]+).html', '', '', '', 'movie', '', 'force')",
|
"default": "('0', 'canal_org', 'canal_des', 'url_org', 'url_des', 'patron1', 'patron2', 'patron3', 'patron4', 'patron5', 'content_inc', 'content_exc', 'ow_force'), ('0', 'mejortorrent', 'mejortorrent1', 'http://www.mejortorrent.com/', 'https://mejortorrent1.com/', '(http.?:\/\/.*?\/)', 'http.?:\/\/.*?\/.*?-torrent.?-[^-]+-(?:[^-]+-)([^0-9]+-)', 'http.?:\/\/.*?\/.*?-torrent.?-[^-]+-(?:[^-]+-)[^0-9]+-\\d+-(Temporada-).html', 'http.?:\/\/.*?\/.*?-torrent.?-[^-]+-(?:[^-]+-)[^0-9]+-(\\d+)-', '', 'tvshow, season', '', 'force'), ('0', 'mejortorrent', 'mejortorrent1', 'http://www.mejortorrent.com/', 'https://mejortorrent1.com/', '(http.?:\/\/.*?\/)', 'http.?:\/\/.*?\/.*?-torrent.?-[^-]+-([^.]+).html', '', '', '', 'movie', '', 'force'), ('1', 'mejortorrent', 'mejortorrent', 'http://www.mejortorrent.com/', 'http://www.mejortorrent.org/', '', '', '', '', '', '*', '', 'force'), ('1', 'plusdede', 'megadede', 'https://www.plusdede.com', 'https://www.megadede.com', '', '', '', '', '', '*', '', 'auto')",
|
||||||
"enabled": true,
|
"enabled": true,
|
||||||
"visible": false
|
"visible": false
|
||||||
},
|
},
|
||||||
|
|||||||
@@ -147,7 +147,7 @@ def get_source(url):
|
|||||||
|
|
||||||
logger.info()
|
logger.info()
|
||||||
data = httptools.downloadpage(url).data
|
data = httptools.downloadpage(url).data
|
||||||
data = re.sub(r'"|\n|\r|\t| |<br>|\s{2,}', "", data)
|
data = re.sub(r'\n|\r|\t| |<br>|\s{2,}', "", data)
|
||||||
return data
|
return data
|
||||||
|
|
||||||
def list_all (item):
|
def list_all (item):
|
||||||
@@ -159,17 +159,20 @@ def list_all (item):
|
|||||||
else:
|
else:
|
||||||
contentType = 'pelicula'
|
contentType = 'pelicula'
|
||||||
action = 'findvideos'
|
action = 'findvideos'
|
||||||
if item.type not in ['normal', 'seccion', 'serie']:
|
if 'pagination' in item.url:
|
||||||
post = {'page':item.page, 'type':item.type,'slug':item.slug,'id':item.id}
|
post = {'page':item.page, 'type':item.type,'slug':item.slug,'id':item.id}
|
||||||
post = urllib.urlencode(post)
|
post = urllib.urlencode(post)
|
||||||
data =httptools.downloadpage(item.url, post=post, headers=CHANNEL_HEADERS).data
|
data =httptools.downloadpage(item.url, post=post, headers=CHANNEL_HEADERS).data
|
||||||
data = re.sub(r'"|\n|\r|\t| |<br>|\s{2,}', "", data)
|
data = re.sub(r'\n|\r|\t| |<br>|\s{2,}', "", data)
|
||||||
patron ='<a href=(.*?)><figure><img.*?src=(.*?) alt=.*?<p>(.*?)<\/p><span>(\d{4})<\/span>'
|
patron = '<a href="([^"]+)">.*?<figure><img.*?src="([^"]+)".*?'
|
||||||
|
patron +='<span class="year text-center">(\d{4})</span>.*?<p>([^<]+)</p>'
|
||||||
else:
|
else:
|
||||||
data = get_source(item.url)
|
data = get_source(item.url)
|
||||||
patron = 'item-%s><a href=(.*?)><figure><img.*?data-src=(.*?) alt=.*?<p>(.*?)<\/p><span>(\d{4})</span>'%contentType
|
patron = '<div class="item-pelicula pull-left"><a href="([^"]+)">.*?data-src="([^"]+)".*?'
|
||||||
|
patron += '<span class="year text-center">([^<]+)</span>.*?<p>([^<]+)</p>'
|
||||||
|
|
||||||
matches = scrapertools.find_multiple_matches(data, patron)
|
matches = scrapertools.find_multiple_matches(data, patron)
|
||||||
for scrapedurl, scrapedthumbnail, scrapedtitle, scrapedyear in matches:
|
for scrapedurl, scrapedthumbnail, scrapedyear, scrapedtitle in matches:
|
||||||
url = host+scrapedurl+'p001/'
|
url = host+scrapedurl+'p001/'
|
||||||
thumbnail = scrapedthumbnail
|
thumbnail = scrapedthumbnail
|
||||||
contentTitle=scrapedtitle
|
contentTitle=scrapedtitle
|
||||||
@@ -192,8 +195,8 @@ def list_all (item):
|
|||||||
tmdb.set_infoLabels_itemlist(itemlist, seekTmdb =True)
|
tmdb.set_infoLabels_itemlist(itemlist, seekTmdb =True)
|
||||||
#Paginacion
|
#Paginacion
|
||||||
|
|
||||||
next_page_valid = scrapertools.find_single_match(data, '<div class=butmore(?: site=series|) page=(.*?) id=(.*?) '
|
next_page_valid = scrapertools.find_single_match(data, '<div class="butmore" site=(?:""|"series") page="(\d+)" '
|
||||||
'type=(.*?) limit=.*?>')
|
'id="(.*?)" type="([^"]+)" limit="\d+">')
|
||||||
if item.type != 'normal' and (len(itemlist)>19 or next_page_valid):
|
if item.type != 'normal' and (len(itemlist)>19 or next_page_valid):
|
||||||
type = item.type
|
type = item.type
|
||||||
if item.type == 'serie':
|
if item.type == 'serie':
|
||||||
@@ -233,24 +236,18 @@ def seccion(item):
|
|||||||
data = get_source(item.url)
|
data = get_source(item.url)
|
||||||
page = "1"
|
page = "1"
|
||||||
if item.seccion == 'generos':
|
if item.seccion == 'generos':
|
||||||
patron = '<li><a href=(.*?)><i class=ion-cube><\/i>(.*?)<\/span>'
|
patron = '<li><a href="([^"]+)"><i class="ion-cube"></i>([^<]+)<'
|
||||||
type = 'genre'
|
type = 'genre'
|
||||||
pat = 'genero/'
|
pat = 'genero/'
|
||||||
elif item.seccion == 'anios':
|
elif item.seccion == 'anios':
|
||||||
patron = '<li><a href=(\/peliculas.*?)>(\d{4})<\/a>'
|
patron = '<li><a href="(\/peliculas.*?)">(\d{4})<\/a>'
|
||||||
type = 'year'
|
type = 'year'
|
||||||
pat = 'peliculas-'
|
pat = 'peliculas-'
|
||||||
matches = scrapertools.find_multiple_matches(data, patron)
|
matches = scrapertools.find_multiple_matches(data, patron)
|
||||||
for scrapedurl, scrapedtitle in matches:
|
for scrapedurl, scrapedtitle in matches:
|
||||||
title = scrapedtitle
|
title = scrapedtitle
|
||||||
if item.seccion == 'generos':
|
|
||||||
cant = re.sub(r'.*?<span class=cant-genre>','',scrapedtitle)
|
|
||||||
only_title = re.sub(r'<.*','',scrapedtitle).rstrip()
|
|
||||||
title = only_title+' (%s)'%cant
|
|
||||||
url = host+scrapedurl
|
url = host+scrapedurl
|
||||||
slug = scrapertools.find_single_match(scrapedurl, "%s(.*?)/" %pat)
|
slug = scrapertools.find_single_match(scrapedurl, "%s(.*?)/" %pat)
|
||||||
if item.seccion in ['generos', 'anios']:
|
|
||||||
url = host + "/pagination/"
|
|
||||||
itemlist.append(
|
itemlist.append(
|
||||||
Item(action="list_all",
|
Item(action="list_all",
|
||||||
channel=item.channel,
|
channel=item.channel,
|
||||||
|
|||||||
@@ -206,7 +206,7 @@ def episodesxseason(item):
|
|||||||
language=language,
|
language=language,
|
||||||
infoLabels=infoLabels
|
infoLabels=infoLabels
|
||||||
))
|
))
|
||||||
|
itemlist = filtertools.get_links(itemlist, item, list_language)
|
||||||
tmdb.set_infoLabels_itemlist(itemlist, seekTmdb=True)
|
tmdb.set_infoLabels_itemlist(itemlist, seekTmdb=True)
|
||||||
return itemlist
|
return itemlist
|
||||||
|
|
||||||
|
|||||||
@@ -56,21 +56,19 @@ def novedades(item):
|
|||||||
data = httptools.downloadpage(item.url).data
|
data = httptools.downloadpage(item.url).data
|
||||||
data = re.sub(r"\n|\r|\t|\s{2}| |<Br>|<BR>|<br>|<br/>|<br />|-\s", "", data)
|
data = re.sub(r"\n|\r|\t|\s{2}| |<Br>|<BR>|<br>|<br/>|<br />|-\s", "", data)
|
||||||
data = re.sub(r"<!--.*?-->", "", data)
|
data = re.sub(r"<!--.*?-->", "", data)
|
||||||
patron = '<a title="([^"]+)" href="([^"]+)".*?>'
|
data = scrapertools.find_single_match(data, "<div class='main section' id='main'>(.*?)</ul>")
|
||||||
patron += "<img.*?src='([^']+)'"
|
patron = "<div class='post-header'>(.*?)</span>"
|
||||||
matches = re.compile(patron, re.DOTALL).findall(data)
|
matches = re.compile(patron, re.DOTALL).findall(data)
|
||||||
|
|
||||||
for scrapedtitle, scrapedurl, scrapedthumb in matches:
|
for serie_data in matches:
|
||||||
# patron = "^(.*?)(?:Ya Disponible|Disponible|Disponbile|disponible|\(Actualizada\))$"
|
scrapedtitle = scrapertools.find_single_match(serie_data, "title='([^']+)'")
|
||||||
# match = re.compile(patron, re.DOTALL).findall(scrapedtitle)
|
scrapedurl = scrapertools.find_single_match(serie_data, 'href="([^"]+)"')
|
||||||
|
scrapedthumb = scrapertools.find_single_match(serie_data, "src='([^']+)'")
|
||||||
title = scrapertools.decodeHtmlentities(scrapedtitle)
|
title = scrapertools.decodeHtmlentities(scrapedtitle)
|
||||||
language=''
|
language=''
|
||||||
# language = scrapertools.find_multiple_matches(title,'(Vose|Español|Latino)')
|
title = title.replace ('Disponible','')
|
||||||
# for lang in language:
|
title = title.replace('Ya', '')
|
||||||
# title = title.replace(lang,'')
|
title = title.strip()
|
||||||
# title = title.replace ('Disponible','')
|
|
||||||
# title = title.replace('Ya', '')
|
|
||||||
# title = title.strip()
|
|
||||||
|
|
||||||
show = scrapertools.find_single_match(title, "^(.+?) \d+[x|X]\d+")
|
show = scrapertools.find_single_match(title, "^(.+?) \d+[x|X]\d+")
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user