Migliorato support.match e sostituito ove usato

- Fix Animeforce
 - Fix AnimeSubITA
This commit is contained in:
Alhaziel
2020-01-15 18:44:03 +01:00
parent 89cbb8ae12
commit fce3bf2590
21 changed files with 271 additions and 165 deletions
+79 -18
View File
@@ -180,7 +180,7 @@ def scrapeBlock(item, args, block, patron, headers, action, pagination, debug, t
if debug:
regexDbg(item, patron, headers, block)
known_keys = ['url', 'title', 'title2', 'season', 'episode', 'thumb', 'quality', 'year', 'plot', 'duration', 'genere', 'rating', 'type', 'lang']
known_keys = ['url', 'title', 'title2', 'season', 'episode', 'thumb', 'quality', 'year', 'plot', 'duration', 'genere', 'rating', 'type', 'lang', 'other']
# Legenda known_keys per i groups nei patron
# known_keys = ['url', 'title', 'title2', 'season', 'episode', 'thumb', 'quality',
# 'year', 'plot', 'duration', 'genere', 'rating', 'type', 'lang']
@@ -301,7 +301,8 @@ def scrapeBlock(item, args, block, patron, headers, action, pagination, debug, t
contentTitle= scraped['title'] if item.contentType or CT == 'movie' else '',
contentLanguage = lang1,
contentEpisodeNumber=episode if episode else '',
news= item.news if item.news else ''
news= item.news if item.news else '',
other = scraped['other'] if scraped['other'] else ''
)
for lg in list(set(listGroups).difference(known_keys)):
@@ -445,7 +446,7 @@ def scrape(func):
if anime:
if function == 'episodios' or item.action == 'episodios': autorenumber.renumber(itemlist, item, 'bold')
else: autorenumber.renumber(itemlist)
if anime and autorenumber.check(item) == False and not scrapertools.find_single_match(itemlist[0].title, r'(\d+.\d+)'):
if anime and autorenumber.check(item) == False and len(itemlist)>0 and not scrapertools.find_single_match(itemlist[0].title, r'(\d+.\d+)'):
pass
else:
if addVideolibrary and (item.infoLabels["title"] or item.fulltitle):
@@ -772,31 +773,91 @@ def typo(string, typography=''):
return string
def match(item, patron='', patronBlock='', headers='', url='', post=''):
def match(item_url_string, **args):
'''
match is a function that combines httptools and scraper tools:
'''
log(item_url_string)
matches = []
if type(item) == str:
data = item
url = None
# arguments allowed for scrape
patron = args.get('patron', None)
patronBlock = args.get('patronBlock', None)
patronBlocks = args.get('patronBlock', None)
debug = args.get('debug', False)
debugBlock = args.get('debugBlock', False)
string = args.get('string', False)
# remove scrape arguments
args = dict([(key, val) for key, val in args.items() if key not in ['patron', 'patronBlock', 'patronBlocks', 'debug', 'debugBlock', 'string']])
# dbg()
# check type of item_url_string
if type(item_url_string) == str:
if item_url_string.startswith('http') and not string: url = item_url_string
else : data = item_url_string
else:
url = url if url else item.url
if post:
data = httptools.downloadpage(url, headers=headers, ignore_response_code=True, post=post).data.replace("'", '"')
else:
data = httptools.downloadpage(url, headers=headers, ignore_response_code=True).data.replace("'", '"')
# if item_url_string is an item use item.url as url
url = item_url_string.url
# if there is a url, download the page
if url:
if args.get('ignore_response_code', None) is None:
args['ignore_response_code'] = True
data = httptools.downloadpage(url, **args).data.replace("'", '"')
# format page data
data = re.sub(r'\n|\t', ' ', data)
data = re.sub(r'>\s\s*<', '><', data)
log('DATA= ', data)
# collect blocks of a page
if patronBlock:
block = scrapertools.find_single_match(data, patronBlock)
log('BLOCK= ',block)
blocks = [scrapertools.find_single_match(data, patronBlock)]
elif patronBlocks:
blocks = scrapertools.find_multiple_matches(data, patronBlock)
else:
block = data
blocks = [data]
# match
if patron:
matches = scrapertools.find_multiple_matches(block, patron)
log('MATCHES= ',matches)
if type(patron) == str: patron = [patron]
for b in blocks:
for p in patron:
matches += scrapertools.find_multiple_matches(b, p)
return matches, block
# debug mode
if config.dev_mode():
if debugBlock:
match_dbg(data, patronBlock)
if debug:
for block in blocks:
for p in patron:
match_dbg(block, p)
# create a item
item = Item(data=data,
blocks=blocks,
block=blocks[0] if len(blocks) > 0 else '',
matches=matches,
match=matches[0] if len(matches) > 0 else '')
return item
def match_dbg(data, patron):
import json, urllib2, webbrowser
url = 'https://regex101.com'
headers = {'content-type': 'application/json'}
data = {
'regex': patron,
'flags': 'gm',
'testString': data,
'delimiter': '"""',
'flavor': 'python'
}
r = urllib2.Request(url + '/api/regex', json.dumps(data, encoding='latin1'), headers=headers)
r = urllib2.urlopen(r).read()
permaLink = json.loads(r)['permalinkFragment']
webbrowser.open(url + "/r/" + permaLink)
def download(itemlist, item, typography='', function_level=1, function=''):