fix next page

This commit is contained in:
Alhaziel
2019-06-09 18:51:24 +02:00
parent 7551aed8ba
commit 04e15d8ed0
2 changed files with 4 additions and 3 deletions
+2 -2
View File
@@ -172,7 +172,7 @@ def video(item):
log() log()
itemlist = [] itemlist = []
matches, data = support.match(item, r'<a href="([^"]+)" class[^>]+><img src="([^"]+)"(.*?)data-jtitle="([^"]+)" .*?>(.*?)<\/a>', headers=headers) matches, data = support.match(item, r'<a href="([^"]+)" class[^>]+><img src="([^"]+)"(.*?)data-jtitle="([^"]+)" .*?>(.*?)<\/a>', '<div class="widget-body">(.*?)<div id="sidebar"', headers=headers)
for scrapedurl, scrapedthumb ,scrapedinfo, scrapedoriginal, scrapedtitle in matches: for scrapedurl, scrapedthumb ,scrapedinfo, scrapedoriginal, scrapedtitle in matches:
# Cerca Info come anno o lingua nel Titolo # Cerca Info come anno o lingua nel Titolo
@@ -251,7 +251,7 @@ def video(item):
autorenumber.renumber(itemlist) autorenumber.renumber(itemlist)
# Next page # Next page
support.nextPage(itemlist, item, data, r'<a\sclass="page-link"\shref="([^"]+)"\srel="next"\saria-label="Successiva') support.nextPage(itemlist, item, data, r'href="([^"]+)" rel="next"', resub=['&amp;','&'])
return itemlist return itemlist
+2 -1
View File
@@ -531,13 +531,14 @@ def videolibrary(itemlist, item, typography='', function_level=1):
return itemlist return itemlist
def nextPage(itemlist, item, data='', patron='', function_level=1, next_page=''): def nextPage(itemlist, item, data='', patron='', function_level=1, next_page='', resub=[]):
# Function_level is useful if the function is called by another function. # Function_level is useful if the function is called by another function.
# If the call is direct, leave it blank # If the call is direct, leave it blank
if next_page == '': if next_page == '':
next_page = scrapertoolsV2.find_single_match(data, patron) next_page = scrapertoolsV2.find_single_match(data, patron)
if next_page != "": if next_page != "":
if resub: next_page = re.sub(resub[0], resub[1], next_page)
if 'http' not in next_page: if 'http' not in next_page:
next_page = scrapertoolsV2.find_single_match(item.url, 'https?://[a-z0-9.-]+') + next_page next_page = scrapertoolsV2.find_single_match(item.url, 'https?://[a-z0-9.-]+') + next_page
log('NEXT= ', next_page) log('NEXT= ', next_page)