Update newpct1.py
This commit is contained in:
@@ -339,20 +339,20 @@ def episodios(item):
|
|||||||
infoLabels = item.infoLabels
|
infoLabels = item.infoLabels
|
||||||
data = re.sub(r"\n|\r|\t|\s{2,}", "", httptools.downloadpage(item.url).data)
|
data = re.sub(r"\n|\r|\t|\s{2,}", "", httptools.downloadpage(item.url).data)
|
||||||
data = unicode(data, "iso-8859-1", errors="replace").encode("utf-8")
|
data = unicode(data, "iso-8859-1", errors="replace").encode("utf-8")
|
||||||
logger.debug('data: %s'%data)
|
|
||||||
pattern = '<ul class="%s">(.*?)</ul>' % "pagination" # item.pattern
|
pattern = '<ul class="%s">(.*?)</ul>' % "pagination" # item.pattern
|
||||||
pagination = scrapertools.find_single_match(data, pattern)
|
pagination = scrapertools.find_single_match(data, pattern)
|
||||||
if pagination:
|
if pagination:
|
||||||
pattern = '<li><a href="([^"]+)">Last<\/a>'
|
pattern = '<li><a href="([^"]+)">Last<\/a>'
|
||||||
full_url = scrapertools.find_single_match(pagination, pattern)
|
full_url = scrapertools.find_single_match(pagination, pattern)
|
||||||
url, last_page = scrapertools.find_single_match(full_url, r'(.*?\/pg\/)(\d+)')
|
url, last_page = scrapertools.find_single_match(full_url, r'(.*?\/pg\/)(\d+)')
|
||||||
list_pages = []
|
list_pages = [item.url]
|
||||||
for x in range(1, int(last_page) + 1):
|
for x in range(2, int(last_page) + 1):
|
||||||
list_pages.append("%s%s" % (url, x))
|
response = httptools.downloadpage('%s%s'% (url,x))
|
||||||
|
if response.sucess:
|
||||||
|
list_pages.append("%s%s" % (url, x))
|
||||||
else:
|
else:
|
||||||
list_pages = [item.url]
|
list_pages = [item.url]
|
||||||
|
|
||||||
logger.debug ('pattern: %s'%pattern)
|
|
||||||
for index, page in enumerate(list_pages):
|
for index, page in enumerate(list_pages):
|
||||||
logger.debug("Loading page %s/%s url=%s" % (index, len(list_pages), page))
|
logger.debug("Loading page %s/%s url=%s" % (index, len(list_pages), page))
|
||||||
data = re.sub(r"\n|\r|\t|\s{2,}", "", httptools.downloadpage(page).data)
|
data = re.sub(r"\n|\r|\t|\s{2,}", "", httptools.downloadpage(page).data)
|
||||||
|
|||||||
Reference in New Issue
Block a user