Migliorato support.match e sostituito ove usato

- Fix Animeforce - Fix AnimeSubITA
2020-01-15 18:44:03 +01:00
parent 89cbb8ae12
commit fce3bf2590
21 changed files with 271 additions and 165 deletions
@@ -180,7 +180,7 @@ def scrapeBlock(item, args, block, patron, headers, action, pagination, debug, t
    if debug:
        regexDbg(item, patron, headers, block)

-    known_keys = ['url', 'title', 'title2', 'season', 'episode', 'thumb', 'quality', 'year', 'plot', 'duration', 'genere', 'rating', 'type', 'lang']
+    known_keys = ['url', 'title', 'title2', 'season', 'episode', 'thumb', 'quality', 'year', 'plot', 'duration', 'genere', 'rating', 'type', 'lang', 'other']
    # Legenda known_keys per i groups nei patron
    # known_keys = ['url', 'title', 'title2', 'season', 'episode', 'thumb', 'quality',
    #                'year', 'plot', 'duration', 'genere', 'rating', 'type', 'lang']
@@ -301,7 +301,8 @@ def scrapeBlock(item, args, block, patron, headers, action, pagination, debug, t
                contentTitle= scraped['title'] if item.contentType or CT == 'movie' else '',
                contentLanguage = lang1,
                contentEpisodeNumber=episode if episode else '',
-                news= item.news if item.news else ''
+                news= item.news if item.news else '',
+                other = scraped['other'] if scraped['other'] else ''
            )

            for lg in list(set(listGroups).difference(known_keys)):
@@ -445,7 +446,7 @@ def scrape(func):
        if anime:
            if function == 'episodios' or item.action == 'episodios': autorenumber.renumber(itemlist, item, 'bold')
            else: autorenumber.renumber(itemlist)
-        if anime and autorenumber.check(item) == False and not scrapertools.find_single_match(itemlist[0].title, r'(\d+.\d+)'):
+        if anime and autorenumber.check(item) == False and len(itemlist)>0 and not scrapertools.find_single_match(itemlist[0].title, r'(\d+.\d+)'):
            pass
        else:
            if addVideolibrary and (item.infoLabels["title"] or item.fulltitle):
@@ -772,31 +773,91 @@ def typo(string, typography=''):
    return string


-def match(item, patron='', patronBlock='', headers='', url='', post=''):
+def match(item_url_string, **args):
+    '''
+    match is a function that combines httptools and scraper tools:
+    '''
+    log(item_url_string)
+
    matches = []
-    if type(item) == str:
-        data = item
+    url = None
+    # arguments allowed for scrape
+    patron = args.get('patron', None)
+    patronBlock = args.get('patronBlock', None)
+    patronBlocks = args.get('patronBlock', None)
+    debug = args.get('debug', False)
+    debugBlock = args.get('debugBlock', False)
+    string = args.get('string', False)
+    # remove scrape arguments
+    args = dict([(key, val) for key, val in args.items() if key not in ['patron', 'patronBlock', 'patronBlocks', 'debug', 'debugBlock', 'string']]) 
+    # dbg()
+    # check type of item_url_string
+    if type(item_url_string) == str:
+        if item_url_string.startswith('http') and not string: url = item_url_string
+        else : data = item_url_string
    else:
-        url = url if url else item.url
-        if post:
-            data = httptools.downloadpage(url, headers=headers, ignore_response_code=True, post=post).data.replace("'", '"')
-        else:
-            data = httptools.downloadpage(url, headers=headers, ignore_response_code=True).data.replace("'", '"')
+        # if item_url_string is an item use item.url as url
+        url = item_url_string.url
+
+    # if there is a url, download the page
+    if url:
+        if args.get('ignore_response_code', None) is None:
+            args['ignore_response_code'] = True
+        data = httptools.downloadpage(url, **args).data.replace("'", '"')
+
+    # format page data
    data = re.sub(r'\n|\t', ' ', data)
    data = re.sub(r'>\s\s*<', '><', data)
-    log('DATA= ', data)

+    # collect blocks of a page
    if patronBlock:
-        block = scrapertools.find_single_match(data, patronBlock)
-        log('BLOCK= ',block)
+        blocks = [scrapertools.find_single_match(data, patronBlock)]
+    elif patronBlocks:
+        blocks = scrapertools.find_multiple_matches(data, patronBlock)
    else:
-        block = data
+        blocks = [data]

+    # match
    if patron:
-        matches = scrapertools.find_multiple_matches(block, patron)
-        log('MATCHES= ',matches)
+        if type(patron) == str:  patron = [patron]
+        for b in blocks:
+            for p in patron:
+                matches += scrapertools.find_multiple_matches(b, p)

-    return matches, block
+    # debug mode
+    if config.dev_mode():
+        if debugBlock:
+            match_dbg(data, patronBlock)
+        if debug:
+            for block in blocks:
+                for p in patron:
+                    match_dbg(block, p)
+
+    # create a item
+    item = Item(data=data,
+                blocks=blocks,
+                block=blocks[0] if len(blocks) > 0 else '',
+                matches=matches,
+                match=matches[0] if len(matches) > 0 else '')
+
+    return item
+
+
+def match_dbg(data, patron):
+    import json, urllib2, webbrowser
+    url = 'https://regex101.com'
+    headers = {'content-type': 'application/json'}
+    data = {
+        'regex': patron,
+        'flags': 'gm',
+        'testString': data,
+        'delimiter': '"""',
+        'flavor': 'python'
+    }
+    r = urllib2.Request(url + '/api/regex', json.dumps(data, encoding='latin1'), headers=headers)
+    r = urllib2.urlopen(r).read()
+    permaLink = json.loads(r)['permalinkFragment']
+    webbrowser.open(url + "/r/" + permaLink)


 def download(itemlist, item, typography='', function_level=1, function=''):