From f31aed89250bc5db26e1d89b4999a46ea7eb6aa8 Mon Sep 17 00:00:00 2001 From: marco Date: Sun, 14 Jun 2020 20:12:48 +0200 Subject: [PATCH] ottimizzazioni --- core/scrapertools.py | 5 ++++- core/support.py | 5 +++-- tests.py | 2 +- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/core/scrapertools.py b/core/scrapertools.py index 0ab6440c..669eb0eb 100644 --- a/core/scrapertools.py +++ b/core/scrapertools.py @@ -40,7 +40,10 @@ def printMatches(matches): def find_single_match(data, patron, index=0): try: - matches = re.findall(patron, data, flags=re.DOTALL) + if index == 0: + matches = re.search(patron, data, flags=re.DOTALL) + else: + matches = re.findall(patron, data, flags=re.DOTALL) return matches[index] except: return "" diff --git a/core/support.py b/core/support.py index 0b51b84e..17afbf28 100755 --- a/core/support.py +++ b/core/support.py @@ -489,8 +489,8 @@ def scrape(func): else: break - if (pagination and len(matches) <= pag * pagination) or not pagination: # next page with pagination - if patronNext and inspect.stack()[1][3] != 'newest': + if (pagination and len(matches) <= pag * pagination) or not pagination: # next page with pagination + if patronNext and inspect.stack()[1][3] not in ['newest', 'search']: nextPage(itemlist, item, data, patronNext, function) # next page for pagination @@ -1105,6 +1105,7 @@ def videolibrary(itemlist, item, typography='', function_level=1, function=''): def nextPage(itemlist, item, data='', patron='', function_or_level=1, next_page='', resub=[]): # Function_level is useful if the function is called by another function. # If the call is direct, leave it blank + log() action = inspect.stack()[function_or_level][3] if type(function_or_level) == int else function_or_level if next_page == '': next_page = scrapertools.find_single_match(data, patron) diff --git a/tests.py b/tests.py index 2df8e2ed..c4a75c56 100644 --- a/tests.py +++ b/tests.py @@ -70,7 +70,7 @@ class GenericChannelTest(unittest.TestCase): self.assertLess(len(resIt.fulltitle), 100, 'channel ' + self.ch + ' -> ' + it.title + ' might contain wrong titles\n' + resIt.fulltitle) if resIt.url: self.assertIsNotNone(re.match(validUrlRegex, resIt.url), 'channel ' + self.ch + ' -> ' + it.title + ' -> ' + resIt.title + ' might contain wrong url\n' + resIt.url) - if 'year' in resIt.infoLabels: + if 'year' in resIt.infoLabels and resIt.infoLabels['year']: msgYear = 'channel ' + self.ch + ' -> ' + it.title + ' might contain wrong infolabels year\n' + str(resIt.infoLabels['year']) self.assert_(type(resIt.infoLabels['year']) is int or resIt.infoLabels['year'].isdigit(), msgYear) self.assert_(int(resIt.infoLabels['year']) > 1900 and int(resIt.infoLabels['year']) < 2100, msgYear)