fix next page

2019-06-09 18:51:24 +02:00
parent 7551aed8ba
commit 04e15d8ed0
2 changed files with 4 additions and 3 deletions
@@ -172,7 +172,7 @@ def video(item):
    log()
    itemlist = []
-    matches, data = support.match(item, r'<a href="([^"]+)" class[^>]+><img src="([^"]+)"(.*?)data-jtitle="([^"]+)" .*?>(.*?)<\/a>', headers=headers)
+    matches, data = support.match(item, r'<a href="([^"]+)" class[^>]+><img src="([^"]+)"(.*?)data-jtitle="([^"]+)" .*?>(.*?)<\/a>', '<div class="widget-body">(.*?)<div id="sidebar"', headers=headers)
    for scrapedurl, scrapedthumb ,scrapedinfo, scrapedoriginal, scrapedtitle in matches:
        # Cerca Info come anno o lingua nel Titolo
@@ -251,7 +251,7 @@ def video(item):
    autorenumber.renumber(itemlist)
    # Next page
-    support.nextPage(itemlist, item, data, r'<a\sclass="page-link"\shref="([^"]+)"\srel="next"\saria-label="Successiva')
+    support.nextPage(itemlist, item, data, r'href="([^"]+)" rel="next"', resub=['&amp;','&'])
    return itemlist
@@ -531,13 +531,14 @@ def videolibrary(itemlist, item, typography='', function_level=1):
    return itemlist
-def nextPage(itemlist, item, data='', patron='', function_level=1, next_page=''):
+def nextPage(itemlist, item, data='', patron='', function_level=1, next_page='', resub=[]):
    # Function_level is useful if the function is called by another function.
    # If the call is direct, leave it blank
    if next_page == '':
        next_page = scrapertoolsV2.find_single_match(data, patron)
    if next_page != "":
        if resub: next_page = re.sub(resub[0], resub[1], next_page)
        if 'http' not in next_page:
            next_page = scrapertoolsV2.find_single_match(item.url, 'https?://[a-z0-9.-]+') + next_page
        log('NEXT= ', next_page)