decoratore scrape, aggiunto parametro flags e flagsBlock per modifica flag regex

This commit is contained in:
marco
2023-08-10 13:19:29 +02:00
parent 074adf7e73
commit 4d2968a308
3 changed files with 10 additions and 8 deletions
+1 -1
View File
@@ -52,7 +52,7 @@ def peliculas(item):
patron = r'>[^"<]+' patron = r'>[^"<]+'
else: else:
patron = r'>(?P<quality>[^"<]+)' patron = r'>(?P<quality>[^"<]+)'
patron += '(?i)<td[^>]+><a class="tab" href="(?P<url>[^"]+)"\s*>[^<]+<[^>]+>[^>]+>[^>]+>[^>]+>[^>]+>(?P<size>[^<]+)<[^>]+>[^>]+>[^>]+><form action="[^"]+/\d+/(?P<title>[^"]+)[^>]+>[^>]+>[^>]+>[^>]+>[^>]+>[^>]+>[^>]+>[^>]+>[^>]+>[^>]+>[^>]+>[^>]+>[^>]+>(?P<seed>[^<]+)' patron += '<td[^>]+><a class="tab" href="(?P<url>[^"]+)"\s*>[^<]+<[^>]+>[^>]+>[^>]+>[^>]+>[^>]+>(?P<size>[^<]+)<[^>]+>[^>]+>[^>]+><form action="[^"]+/\d+/(?P<title>[^"]+)[^>]+>[^>]+>[^>]+>[^>]+>[^>]+>[^>]+>[^>]+>[^>]+>[^>]+>[^>]+>[^>]+>[^>]+>[^>]+>(?P<seed>[^<]+)'
def itemHook(item): def itemHook(item):
if not sceneTitle: if not sceneTitle:
+2 -2
View File
@@ -63,8 +63,8 @@ def find_multiple_matches(text, pattern):
return re.findall(pattern, text, re.DOTALL) return re.findall(pattern, text, re.DOTALL)
def find_multiple_matches_groups(text, pattern): def find_multiple_matches_groups(text, pattern, flags):
r = re.compile(pattern) r = re.compile(pattern, flags)
return [m.groupdict() for m in r.finditer(text)] return [m.groupdict() for m in r.finditer(text)]
+7 -5
View File
@@ -225,11 +225,11 @@ def unifyEp(ep):
return ep return ep
def scrapeBlock(item, args, block, patron, headers, action, pagination, debug, typeContentDict, typeActionDict, blacklist, search, pag, function, lang, sceneTitle, group): def scrapeBlock(item, args, block, patron, headers, action, pagination, debug, typeContentDict, typeActionDict, blacklist, search, pag, function, lang, sceneTitle, group, flags):
itemlist = [] itemlist = []
if debug: if debug:
regexDbg(item, patron, headers, block) regexDbg(item, patron, headers, block)
matches = scrapertools.find_multiple_matches_groups(block, patron) matches = scrapertools.find_multiple_matches_groups(block, patron, flags)
logger.debug('MATCHES =', matches) logger.debug('MATCHES =', matches)
known_keys = ['url', 'title', 'title2', 'season', 'episode', 'episode2', 'thumb', 'quality', 'year', 'plot', 'duration', 'genere', 'rating', 'type', 'lang', 'other', 'size', 'seed'] known_keys = ['url', 'title', 'title2', 'season', 'episode', 'episode2', 'thumb', 'quality', 'year', 'plot', 'duration', 'genere', 'rating', 'type', 'lang', 'other', 'size', 'seed']
@@ -524,7 +524,9 @@ def scrape(func):
else: else:
headers = '' headers = ''
patronNext = args.get('patronNext', '') patronNext = args.get('patronNext', '')
flags = args.get('flags', re.IGNORECASE)
patronBlock = args.get('patronBlock', '') patronBlock = args.get('patronBlock', '')
flagsBlock = args.get('flagsBlock', re.IGNORECASE)
typeActionDict = args.get('typeActionDict', {}) typeActionDict = args.get('typeActionDict', {})
typeContentDict = args.get('typeContentDict', {}) typeContentDict = args.get('typeContentDict', {})
debug = args.get('debug', False) debug = args.get('debug', False)
@@ -549,13 +551,13 @@ def scrape(func):
if patronBlock: if patronBlock:
if debugBlock: if debugBlock:
regexDbg(item, patronBlock, headers, data) regexDbg(item, patronBlock, headers, data)
blocks = scrapertools.find_multiple_matches_groups(data, patronBlock) blocks = scrapertools.find_multiple_matches_groups(data, patronBlock, flagsBlock)
for bl in blocks: for bl in blocks:
# info(len(blocks),bl) # info(len(blocks),bl)
if 'season' in bl and bl['season']: if 'season' in bl and bl['season']:
item.season = bl['season'] item.season = bl['season']
blockItemlist, blockMatches = scrapeBlock(item, args, bl['block'], patron, headers, action, pagination, debug, blockItemlist, blockMatches = scrapeBlock(item, args, bl['block'], patron, headers, action, pagination, debug,
typeContentDict, typeActionDict, blacklist, search, pag, function, lang, sceneTitle, group) typeContentDict, typeActionDict, blacklist, search, pag, function, lang, sceneTitle, group, flags)
for it in blockItemlist: for it in blockItemlist:
if 'lang' in bl: if 'lang' in bl:
it.contentLanguage, it.title = scrapeLang(bl, it.contentLanguage, it.title) it.contentLanguage, it.title = scrapeLang(bl, it.contentLanguage, it.title)
@@ -566,7 +568,7 @@ def scrape(func):
matches.extend(blockMatches) matches.extend(blockMatches)
elif patron: elif patron:
itemlist, matches = scrapeBlock(item, args, data, patron, headers, action, pagination, debug, typeContentDict, itemlist, matches = scrapeBlock(item, args, data, patron, headers, action, pagination, debug, typeContentDict,
typeActionDict, blacklist, search, pag, function, lang, sceneTitle, group) typeActionDict, blacklist, search, pag, function, lang, sceneTitle, group, flags)
if 'itemlistHook' in args: if 'itemlistHook' in args:
try: try: