From 64a16a0c1faaf8bb3b45c6a254496c823a1f800f Mon Sep 17 00:00:00 2001 From: Kingbox <37674310+lopezvg@users.noreply.github.com> Date: Wed, 16 May 2018 20:07:00 +0200 Subject: [PATCH] =?UTF-8?q?Clones=20de=20Newpct1:=20Nuevo=20Redise=C3=B1o?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Normalización de títulos de todos las funciones, tanto con “títulos inteligentes” como sin ellos. Cada tipo de título tiene la misma apariencia en todo el canal, eliminando palabras innecesarias - Normalización de calidades, con títulos más cortos y significativos - Normalización de info adicional, tipo “Saga, Colección, V. Extendida, etc.” - Se agregan idioma y tamaño del vídeo al título - Función de Buscar nueva, con paginación dinámica: ahora se comprimen los episodios de una serie en un solo título (por calidad de vídeo). Se cargan varias páginas web en una página Alfa, hasta llenarla - Información de página actual y total al pie - Página de Servidores simplificada, pero con más información práctica - Info de número total de capítulos de la última temporada - Uso extensivo de información de ThemovieDB.org - Mejoras internas --- plugin.video.alfa/channels/descargas2020.py | 1297 ++++++++++++----- plugin.video.alfa/channels/mispelisyseries.py | 1297 ++++++++++++----- plugin.video.alfa/channels/torrentlocura.py | 1297 ++++++++++++----- plugin.video.alfa/channels/torrentrapid.py | 1297 ++++++++++++----- plugin.video.alfa/channels/tumejortorrent.py | 1297 ++++++++++++----- plugin.video.alfa/channels/tvsinpagar.py | 1297 ++++++++++++----- 6 files changed, 5730 insertions(+), 2052 deletions(-) diff --git a/plugin.video.alfa/channels/descargas2020.py b/plugin.video.alfa/channels/descargas2020.py index 32d8e713..7d0af962 100644 --- a/plugin.video.alfa/channels/descargas2020.py +++ b/plugin.video.alfa/channels/descargas2020.py @@ -1,6 +1,9 @@ -# -*- coding: utf-8 -*- +# -*- coding: utf-8 -*- import re +import sys +import urllib +import urlparse from channelselector import get_thumb from core import httptools @@ -17,9 +20,13 @@ def mainlist(item): itemlist = [] - thumb_pelis=get_thumb("channels_movie.png") - thumb_series=get_thumb("channels_tvshow.png") - thumb_search = get_thumb("search.png") + thumb_pelis = get_thumb("channels_movie.png") + thumb_pelis_hd = get_thumb("channels_movie_hd.png") + thumb_series = get_thumb("channels_tvshow.png") + thumb_series_hd = get_thumb("channels_tvshow_hd.png") + thumb_series_az = get_thumb("channels_tvshow_az.png") + thumb_docus = get_thumb("channels_documentary.png") + thumb_buscar = get_thumb("search.png") itemlist.append(Item(channel=item.channel, action="submenu", title="Películas", url=host, extra="peliculas", thumbnail=thumb_pelis )) @@ -28,9 +35,9 @@ def mainlist(item): thumbnail=thumb_series)) itemlist.append(Item(channel=item.channel, action="submenu", title="Documentales", url=host, extra="varios", - thumbnail=thumb_series)) + thumbnail=thumb_docus)) itemlist.append( - Item(channel=item.channel, action="search", title="Buscar", url=host + "buscar", thumbnail=thumb_search)) + Item(channel=item.channel, action="search", title="Buscar", url=host + "buscar", thumbnail=thumb_buscar)) return itemlist @@ -40,13 +47,17 @@ def submenu(item): data = re.sub(r"\n|\r|\t|\s{2}|()", "", httptools.downloadpage(item.url).data) data = unicode(data, "iso-8859-1", errors="replace").encode("utf-8") - data = data.replace("'", "\"").replace("/series\"", "/series/\"") #Compatibilidad con mispelisy.series.com + data = data.replace("'", '"').replace('/series"', '/series/"') #Compatibilidad con mispelisy.series.com - patron = '
  • .*?(.*?)' + host_dom = host.replace("https://", "").replace("http://", "").replace("www.", "") + patron = '
  • .*?(.*?)' if "pelisyseries.com" in host and item.extra == "varios": #compatibilidad con mispelisy.series.com - data = ' Documentales' + data = ' Documentales' else: - data = scrapertools.get_match(data, patron) + if data: + data = scrapertools.get_match(data, patron) + else: + return itemlist patron = '<.*?href="([^"]+)".*?>([^>]+)' matches = re.compile(patron, re.DOTALL).findall(data) @@ -55,14 +66,14 @@ def submenu(item): title = scrapedtitle.strip() url = scrapedurl - itemlist.append(Item(channel=item.channel, action="listado", title=title, url=url, extra="pelilist")) + itemlist.append(Item(channel=item.channel, action="listado", title=title, url=url, extra=item.extra)) itemlist.append( - Item(channel=item.channel, action="alfabeto", title=title + " [A-Z]", url=url, extra="pelilist")) + Item(channel=item.channel, action="alfabeto", title=title + " [A-Z]", url=url, extra=item.extra)) if item.extra == "peliculas": - itemlist.append(Item(channel=item.channel, action="listado", title="Películas 4K", url=host + "peliculas-hd/4kultrahd/", extra="pelilist")) + itemlist.append(Item(channel=item.channel, action="listado", title="Películas 4K", url=host + "peliculas-hd/4kultrahd/", extra=item.extra)) itemlist.append( - Item(channel=item.channel, action="alfabeto", title="Películas 4K" + " [A-Z]", url=host + "peliculas-hd/4kultrahd/", extra="pelilist")) + Item(channel=item.channel, action="alfabeto", title="Películas 4K" + " [A-Z]", url=host + "peliculas-hd/4kultrahd/", extra=item.extra)) return itemlist @@ -75,9 +86,13 @@ def alfabeto(item): data = unicode(data, "iso-8859-1", errors="replace").encode("utf-8") patron = '' - data = scrapertools.get_match(data, patron) + if data: + data = scrapertools.get_match(data, patron) + else: + return itemlist patron = ']+>([^>]+)' + matches = re.compile(patron, re.DOTALL).findall(data) for scrapedurl, scrapedtitle in matches: @@ -92,304 +107,857 @@ def alfabeto(item): def listado(item): logger.info() itemlist = [] - url_next_page ='' + clase = "pelilist" # etiqueta para localizar zona de listado de contenidos + url_next_page ='' # Controlde paginación + cnt_tot = 30 # Poner el num. máximo de items por página + + if item.category: + del item.category + if item.totalItems: + del item.totalItems data = re.sub(r"\n|\r|\t|\s{2}|()", "", httptools.downloadpage(item.url).data) - #data = httptools.downloadpage(item.url).data - data = unicode(data, "iso-8859-1", errors="replace").encode("utf-8") - if item.modo != 'next' or item.modo =='': - patron = '' + #Establecemos los valores básicos en función del tipo de contenido + if item.extra == "peliculas": + item.action = "findvideos" + item.contentType = "movie" + pag = True #Sí hay paginación + elif item.extra == "series" and not "/miniseries" in item.url: + item.action = "episodios" + item.contentType = "tvshow" + pag = True + elif item.extra == "varios" or "/miniseries" in item.url: + item.action = "findvideos" + item.contentType = "movie" + pag = True + + #Selecciona el tramo de la página con el listado de contenidos + patron = '' + if data: fichas = scrapertools.get_match(data, patron) - page_extra = item.extra else: - fichas = data - page_extra = item.extra + return itemlist + page_extra = clase + #Scrapea los datos de cada vídeo. Título alternativo se mantiene, aunque no se usa de momento patron = 'Last<\/a><\/li>') + + if not item.post_num: + post_num = 1 + else: + post_num = int(item.post_num) + 1 + if not total_pag: + total_pag = 1 + #Calcula las páginas del canal por cada página de la web + total_pag = int(total_pag) * int((float(len(matches))/float(cnt_tot)) + 0.999999) + + # Preparamos la paginación. + if not item.cnt_pag: + cnt_pag = 0 + else: + cnt_pag = item.cnt_pag + del item.cnt_pag + + matches_cnt = len(matches) if item.next_page != 'b': - if len(matches) > 30: + if matches_cnt > cnt_pag + cnt_tot: url_next_page = item.url - matches = matches[:30] - next_page = 'b' + matches = matches[cnt_pag:cnt_pag+cnt_tot] + next_page = '' + if matches_cnt <= cnt_pag + (cnt_tot * 2): + if pag: + next_page = 'b' modo = 'continue' else: - matches = matches[30:] + matches = matches[cnt_pag:cnt_pag+cnt_tot] next_page = 'a' - patron_next_page = 'Next<\/a>' + patron_next_page = 'Next<\/a>' matches_next_page = re.compile(patron_next_page, re.DOTALL).findall(data) modo = 'continue' if len(matches_next_page) > 0: - url_next_page = matches_next_page[0] + url_next_page = urlparse.urljoin(item.url, matches_next_page[0]) modo = 'next' + + # Avanzamos el contador de líneas en una página + if item.next_page: + del item.next_page + if modo == 'next': + cnt_pag = 0 + else: + cnt_pag += cnt_tot + #Tratamos todos los contenidos, creardo una variable local de Item for scrapedurl, scrapedtitle, scrapedthumbnail, title_alt, calidad in matches: - url = scrapedurl - title = scrapedtitle.replace("�", "ñ").replace("ñ", "ñ").replace("Temp", " Temp").replace("Esp", " Esp").replace("Ing", " Ing").replace("Eng", " Eng").replace("Calidad", "") - title_alt = title_alt.replace("�", "ñ").replace("ñ", "ñ").replace("Temp", " Temp").replace("Esp", " Esp").replace("Ing", " Ing").replace("Eng", " Eng").replace("Calidad", "") - thumbnail = scrapedthumbnail - action = "findvideos" - extra = "" - context = "movie" - year = scrapertools.find_single_match(scrapedthumbnail, r'-(\d{4})') - if not year or year <= "1900": - year = '-' + item_local = item.clone() + if item_local.tipo: + del item_local.tipo + if item_local.totalItems: + del item_local.totalItems + if item.post_num: + del item.post_num - if ".com/serie" in url and "/miniseries" not in url: - action = "episodios" - extra = "serie" - context = "tvshow" - - title = scrapertools.find_single_match(title, '([^-]+)') - title = title.replace("Ver online", "", 1).replace("Descarga Serie HD", "", 1).replace("Ver en linea ", "", - 1).strip() - - else: - title = title.replace("Descargar torrent ", "", 1).replace("Descarga Gratis ", "", 1).replace("Descargar Estreno ", "", 1).replace("Pelicula en latino ", "", 1).replace("Descargar Pelicula ", "", 1).replace("Descargar", "", 1).replace("Descarga", "", 1).replace("Bajar", "", 1).strip() - if title.endswith("gratis"): title = title[:-7] - if title.endswith("torrent"): title = title[:-8] - if title.endswith("en HD"): title = title[:-6] - - if title == "": - title = title_alt - context_title = title_alt - show = title_alt - if not config.get_setting("unify"): #Si Titulos Inteligentes NO seleccionados: - if calidad: - title = title + ' [' + calidad + "]" + item_local.title = '' + item_local.context = "['buscar_trailer']" - if not 'array' in title: - itemlist.append(Item(channel=item.channel, action=action, title=title, url=url, thumbnail=thumbnail, - extra = extra, show = context_title, contentTitle=context_title, contentType=context, quality=calidad, - context=["buscar_trailer"], infoLabels= {'year':year})) + # Limpiamos títulos, Sacamos datos de calidad, audio y lenguaje + title = re.sub('\r\n', '', scrapedtitle).decode('iso-8859-1').encode('utf8').strip() + title_alt = re.sub('\r\n', '', title_alt).decode('iso-8859-1').encode('utf8').strip() + title = title.replace("á", "a", 1).replace("é", "e", 1).replace("í", "i", 1).replace("ó", "o", 1).replace("ú", "u", 1).replace("ü", "u", 1).replace("�", "ñ").replace("ñ", "ñ") + title_alt = title_alt.replace("á", "a", 1).replace("é", "e", 1).replace("í", "i", 1).replace("ó", "o", 1).replace("ú", "u", 1).replace("ü", "u", 1).replace("�", "ñ").replace("ñ", "ñ") + + item_local.quality = calidad + title_subs = [] + + #Determinamos y marcamos idiomas distintos del castellano + item_local.language = "" + if "[vos" in title.lower() or "v.o.s" in title.lower() or "vo" in title.lower() or ".com/pelicula/" in scrapedurl or ".com/series-vo" in scrapedurl or "-vo/" in scrapedurl or "vos" in calidad.lower() or "vose" in calidad.lower() or "v.o.s" in calidad.lower() or ".com/peliculas-vo" in item.url: + item_local.language = "VOS" + title = title.replace(" [Subs. integrados]", "").replace(" [subs. Integrados]", "").replace(" [VOSE", "").replace(" [VOS", "").replace(" (V.O.S.E)", "").replace(" VO", "") + if "latino" in title.lower() or "argentina" in title.lower() or "-latino/" in scrapedurl or "latino" in calidad.lower() or "argentina" in calidad.lower(): + item_local.language = "LAT" + + #Guardamos info de 3D en calidad y limpiamos + if "3d" in title.lower(): + if not "3d" in item_local.quality.lower(): + item_local.quality = item_local.quality + " 3D" + calidad3D = scrapertools.find_single_match(title, r'\s(3[d|D]\s\w+)') + if calidad3D: + item_local.quality = item_local.quality.replace("3D", calidad3D) + title = re.sub(r'\s3[d|D]\s\w+', '', title) + title = re.sub(r'\s3[d|D]', '', title) + title_alt = re.sub(r'\s3[d|D]\s\w+', '', title_alt) + title_alt = re.sub(r'\s3[d|D]', '', title_alt) + if "imax" in title.lower(): + item_local.quality = item_local.quality + " IMAX" + title = title.replace(" IMAX", "").replace(" imax", "") + title_alt = title_alt.replace(" IMAX", "").replace(" imax", "") + if "2d" in title.lower(): + title = title.replace("(2D)", "").replace("(2d)", "").replace("2D", "").replace("2d", "") + title_subs += ["[2D]"] + + #Extraemos info adicional del título y la guardamos para después de TMDB + if "temp" in title.lower() or "cap" in title.lower(): #Eliminamos Temporada, solo nos interesa la serie completa + title = re.sub(r' - [t|T]emp\w+ \d+x\d+', '', title) + title = re.sub(r' - [t|T]emp\w+ \d+', '', title) + title = re.sub(r' - [t|T]emp\w+.*?\d+', '', title) + title = re.sub(r' [t|T]emp.*?\d+x\d+', '', title) + title = re.sub(r' [t|T]emp.*?\d+', '', title) + title = re.sub(r' [c|C]ap.*?\d+', '', title) + if "audio" in title.lower(): #Reservamos info de audio para después de TMDB + title_subs += ['[%s]' % scrapertools.find_single_match(title, r'(\[[a|A]udio.*?\])')] + title = re.sub(r'\[[a|A]udio.*?\]', '', title) + if "[dual" in title.lower() or "multileng" in title.lower() or "multileng" in item_local.quality.lower(): + item_local.language = "DUAL" + title = re.sub(r'\[[D|d]ual.*?\]', '', title) + title = re.sub(r'\[[M|m]ultileng.*?\]', '', title) + item_local.quality = re.sub(r'\[[M|m]ultileng.*?\]', '', item_local.quality) + if "duolog" in title.lower(): + title_subs += ["[Saga]"] + title = title.replace(" Duologia", "").replace(" duologia", "").replace(" Duolog", "").replace(" duolog", "") + if "trilog" in title.lower(): + title_subs += ["[Saga]"] + title = title.replace(" Trilogia", "").replace(" trilogia", "").replace(" Trilog", "").replace(" trilog", "") + if "extendida" in title.lower(): + title_subs += ["[V. Extendida]"] + title = title.replace(" Version Extendida", "").replace(" (Version Extendida)", "").replace(" V. Extendida", "").replace(" VExtendida", "").replace(" V Extendida", "") + if "saga" in title.lower(): + title = title.replace(" Saga Completa", "").replace(" saga sompleta", "").replace(" Saga", "").replace(" saga", "") + title_subs += ["[Saga]"] + if "colecc" in title.lower() or "completa" in title.lower(): + title = title.replace(" Coleccion", "").replace(" coleccion", "").replace(" Colecci", "").replace(" colecci", "").replace(" Completa", "").replace(" completa", "").replace(" COMPLETA", "") + if scrapertools.find_single_match(title, r'(- [m|M].*?serie ?\w+)'): + title = re.sub(r'- [m|M].*?serie ?\w+', '', title) + title_subs += ["[Miniserie]"] - logger.debug("url: " + url + " / title: " + title + " / contxt title: " + context_title + " / context: " + context + " / calidad: " + calidad+ " / year: " + year) + #Limpiamos restos en título + title = title.replace("Castellano", "").replace("castellano", "").replace("inglés", "").replace("ingles", "").replace("Inglés", "").replace("Ingles", "").replace("Espa", "").replace("Ingl", "").replace("Engl", "").replace("Calidad", "").replace("de la Serie", "") + title_alt = title_alt.replace("Castellano", "").replace("castellano", "").replace("inglés", "").replace("ingles", "").replace("Inglés", "").replace("Ingles", "").replace("Espa", "").replace("Ingl", "").replace("Engl", "").replace("Calidad", "").replace("de la Serie", "") + + #Limpiamos cabeceras y colas del título + if not "torrentrapid.com" in host: + title = re.sub(r'Descargar\s', '', title) + else: + title = re.sub(r'Descargar\s\w+\s', '', title) + title = re.sub(r'Descargar\s\w+\-\w+', '', title) + title = re.sub(r'\(COMPLE.*?\)', '', title) + title = re.sub(r'\(\d{4}\)$', '', title) + title = re.sub(r'\d{4}$', '', title) + title = re.sub(r' \d+x\d+', '', title) + title = re.sub(r' x\d+', '', title) + title = title.replace("Ver online ", "").replace("Descarga Serie HD ", "").replace("Descargar Serie HD ", "").replace("Descarga Serie ", "").replace("Ver en linea ", "").replace("Ver en linea", "").replace("HD ", "").replace("(Proper)", "").replace("DVD", "").replace("- ES ", "").replace("ES ", "").replace("COMPLETA", "").strip() + title = title.replace("Descargar torrent ", "").replace("Descarga Gratis ", "").replace("Descargar Estreno ", "").replace("Pelicula en latino ", "").replace("Descargar Pelicula ", "").replace("Descargar Peliculas ", "").replace("Descargar Todas ", "").replace("Descargar ", "").replace("Descarga ", "").replace("Bajar ", "").replace("RIP ", "").replace("1080p ", "").replace("720p ", "").replace("DVD-Screener ", "").replace("Bonus Disc", "").replace("de Cine ", "").replace("latino", "").replace("Latino", "").replace("argentina", "").replace("Argentina", "").strip() + if title.endswith("torrent gratis"): title = title[:-15] + if title.endswith("gratis"): title = title[:-7] + if title.endswith("torrent"): title = title[:-8] + if title.endswith("en HD"): title = title[:-6] + if title.endswith(" -"): title = title[:-2] + if "en espa" in title: title = title[:-11] + + item_local.quality = item_local.quality.replace("gratis ", "") + if "HDR" in title: + title = title.replace(" HDR", "") + if not "HDR" in item_local.quality: + item_local.quality += " HDR" + + while title.endswith(' '): + title = title[:-1] + while title_alt.endswith(' '): + title_alt = title_alt[:-1] + while item_local.quality.endswith(' '): + item_local.quality = item_local.quality[:-1] + if not title: #Usamos solo el title_alt en caso de que no exista el título original + title = title_alt + if not title: + title = "SIN TITULO" + + #Limpieza final del título y guardado en las variables según su tipo de contenido + title = scrapertools.remove_htmltags(title) + item_local.title = title + if item_local.contentType == "movie": + item_local.contentTitle = title + else: + item_local.contentSerieName = title + + #Guardamos el resto de variables del vídeo + item_local.url = scrapedurl + item_local.thumbnail = scrapedthumbnail + item_local.contentThumbnail = scrapedthumbnail + + #Guardamos el año que puede venir en la url, por si luego no hay resultados desde TMDB + year = '' + if item_local.contentType == "movie": + year = scrapertools.find_single_match(scrapedurl, r'(\d{4})') + if year >= "1900" and year <= "2040" and year != "2020": + title_subs += [year] + item_local.infoLabels['year'] = '-' + + #Guarda la variable temporal que almacena la info adicional del título a ser restaurada después de TMDB + item_local.title_subs = title_subs + + #Agrega el item local a la lista itemlist + itemlist.append(item_local.clone()) + + #Pasamos a TMDB la lista completa Itemlist tmdb.set_infoLabels(itemlist, True) + + # Pasada para maquillaje de los títulos obtenidos desde TMDB + for item_local in itemlist: + title = item_local.title - if url_next_page: - itemlist.append(Item(channel=item.channel, action="listado", title=">> Página siguiente", - url=url_next_page, next_page=next_page, folder=True, - text_color='yellow', text_bold=True, modo = modo, plot = extra, - extra = page_extra)) + #Restauramos la info adicional guarda en la lista title_subs, y la borramos de Item + if len(item_local.title_subs) > 0: + title += " " + for title_subs in item_local.title_subs: + if "audio" in title_subs.lower(): + title = '%s [%s]' % (title, scrapertools.find_single_match(title_subs, r'[a|A]udio (.*?)')) + continue + if scrapertools.find_single_match(title_subs, r'(\d{4})'): + if not item_local.infoLabels['year'] or item_local.infoLabels['year'] == "-": + item_local.infoLabels['year'] = scrapertools.find_single_match(title_subs, r'(\d{4})') + continue + if not config.get_setting("unify"): #Si Titulos Inteligentes NO seleccionados: + title = '%s %s' % (title, title_subs) + else: + title = '%s -%s-' % (title, title_subs) + del item_local.title_subs + + # Si TMDB no ha encontrado el vídeo limpiamos el año + if item_local.infoLabels['year'] == "-": + item_local.infoLabels['year'] = '' + item_local.infoLabels['aired'] = '' + + # Preparamos el título para series, con los núm. de temporadas, si las hay + if item_local.contentType == "season" or item_local.contentType == "tvshow": + item_local.contentTitle= '' + + rating = '' + if item_local.infoLabels['rating'] and item_local.infoLabels['rating'] != '0.0': + rating = float(item_local.infoLabels['rating']) + rating = round(rating, 1) + + #Ahora maquillamos un poco los titulos dependiendo de si se han seleccionado títulos inteleigentes o no + if not config.get_setting("unify"): #Si Titulos Inteligentes NO seleccionados: + if item_local.contentType == "season" or item_local.contentType == "tvshow": + title = '%s [COLOR yellow][%s][/COLOR] [%s] [COLOR limegreen][%s][/COLOR] [COLOR red][%s][/COLOR]' % (title, scrapertools.find_single_match(str(item_local.infoLabels['aired']), r'\/(\d{4})'), rating, item_local.quality, item_local.language) + + elif item_local.contentType == "movie": + title = '%s [COLOR yellow][%s][/COLOR] [%s] [COLOR limegreen][%s][/COLOR] [COLOR red][%s][/COLOR]' % (title, str(item_local.infoLabels['year']), rating, item_local.quality, item_local.language) + + if config.get_setting("unify"): #Si Titulos Inteligentes SÍ seleccionados: + title = title.replace("[", "-").replace("]", "-") + + title = title.replace("--", "").replace(" []", "").replace("()", "").replace("(/)", "").replace("[/]", "") + title = re.sub(r'\s\[COLOR \w+\]\[\]\[\/COLOR\]', '', title) + title = re.sub(r'\s\[COLOR \w+\]\[\/COLOR\]', '', title) + item_local.title = title + + #logger.debug("url: " + item_local.url + " / title: " + item_local.title + " / content title: " + item_local.contentTitle + "/" + item_local.contentSerieName + " / calidad: " + item_local.quality + " / year: " + year) + #logger.debug(item_local) + + if len(itemlist) == 0: + itemlist.append(Item(channel=item.channel, action="mainlist", title="No se ha podido cargar el listado")) + else: + if url_next_page: + itemlist.append( + Item(channel=item.channel, action="listado", title="[COLOR gold][B]Pagina siguiente >> [/B][/COLOR]" + str(post_num) + " de " + str(total_pag), url=url_next_page, next_page=next_page, cnt_pag=cnt_pag, post_num=post_num, pag=pag, modo=modo, extra=item.extra)) + + #logger.debug(url_next_page + " / " + next_page + " / " + str(matches_cnt) + " / " + str(cnt_pag)+ " / " + str(total_pag) + " / " + str(pag) + " / " + modo + " / " + item.extra) + return itemlist def listado_busqueda(item): logger.info() itemlist = [] - data = re.sub(r"\n|\r|\t|\s{2,}", "", httptools.downloadpage(item.url, post=item.post).data) - data = unicode(data, "iso-8859-1", errors="replace").encode("utf-8") + cnt_tot = 40 # Poner el num. máximo de items por página. Dejamos que la web lo controle + cnt_title = 0 # Contador de líneas insertadas en Itemlist + cnt_pag = 0 # Contador de líneas leídas de Matches - list_chars = [["ñ", "ñ"]] + if item.cnt_pag: + cnt_pag = item.cnt_pag # Se guarda en la lista de páginas anteriores en Item + del item.cnt_pag - for el in list_chars: - data = re.sub(r"%s" % el[0], el[1], data) + if item.category: + del item.category + if item.totalItems: + del item.totalItems + if item.text_bold: + del item.text_bold + if item.text_color: + del item.text_color - try: - get, post = scrapertools.find_single_match(data, '