decoratore scrape, aggiunto parametro flags e flagsBlock per modifica flag regex
This commit is contained in:
@@ -52,7 +52,7 @@ def peliculas(item):
|
|||||||
patron = r'>[^"<]+'
|
patron = r'>[^"<]+'
|
||||||
else:
|
else:
|
||||||
patron = r'>(?P<quality>[^"<]+)'
|
patron = r'>(?P<quality>[^"<]+)'
|
||||||
patron += '(?i)<td[^>]+><a class="tab" href="(?P<url>[^"]+)"\s*>[^<]+<[^>]+>[^>]+>[^>]+>[^>]+>[^>]+>(?P<size>[^<]+)<[^>]+>[^>]+>[^>]+><form action="[^"]+/\d+/(?P<title>[^"]+)[^>]+>[^>]+>[^>]+>[^>]+>[^>]+>[^>]+>[^>]+>[^>]+>[^>]+>[^>]+>[^>]+>[^>]+>[^>]+>(?P<seed>[^<]+)'
|
patron += '<td[^>]+><a class="tab" href="(?P<url>[^"]+)"\s*>[^<]+<[^>]+>[^>]+>[^>]+>[^>]+>[^>]+>(?P<size>[^<]+)<[^>]+>[^>]+>[^>]+><form action="[^"]+/\d+/(?P<title>[^"]+)[^>]+>[^>]+>[^>]+>[^>]+>[^>]+>[^>]+>[^>]+>[^>]+>[^>]+>[^>]+>[^>]+>[^>]+>[^>]+>(?P<seed>[^<]+)'
|
||||||
|
|
||||||
def itemHook(item):
|
def itemHook(item):
|
||||||
if not sceneTitle:
|
if not sceneTitle:
|
||||||
|
|||||||
@@ -63,8 +63,8 @@ def find_multiple_matches(text, pattern):
|
|||||||
return re.findall(pattern, text, re.DOTALL)
|
return re.findall(pattern, text, re.DOTALL)
|
||||||
|
|
||||||
|
|
||||||
def find_multiple_matches_groups(text, pattern):
|
def find_multiple_matches_groups(text, pattern, flags):
|
||||||
r = re.compile(pattern)
|
r = re.compile(pattern, flags)
|
||||||
return [m.groupdict() for m in r.finditer(text)]
|
return [m.groupdict() for m in r.finditer(text)]
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
+7
-5
@@ -225,11 +225,11 @@ def unifyEp(ep):
|
|||||||
return ep
|
return ep
|
||||||
|
|
||||||
|
|
||||||
def scrapeBlock(item, args, block, patron, headers, action, pagination, debug, typeContentDict, typeActionDict, blacklist, search, pag, function, lang, sceneTitle, group):
|
def scrapeBlock(item, args, block, patron, headers, action, pagination, debug, typeContentDict, typeActionDict, blacklist, search, pag, function, lang, sceneTitle, group, flags):
|
||||||
itemlist = []
|
itemlist = []
|
||||||
if debug:
|
if debug:
|
||||||
regexDbg(item, patron, headers, block)
|
regexDbg(item, patron, headers, block)
|
||||||
matches = scrapertools.find_multiple_matches_groups(block, patron)
|
matches = scrapertools.find_multiple_matches_groups(block, patron, flags)
|
||||||
logger.debug('MATCHES =', matches)
|
logger.debug('MATCHES =', matches)
|
||||||
|
|
||||||
known_keys = ['url', 'title', 'title2', 'season', 'episode', 'episode2', 'thumb', 'quality', 'year', 'plot', 'duration', 'genere', 'rating', 'type', 'lang', 'other', 'size', 'seed']
|
known_keys = ['url', 'title', 'title2', 'season', 'episode', 'episode2', 'thumb', 'quality', 'year', 'plot', 'duration', 'genere', 'rating', 'type', 'lang', 'other', 'size', 'seed']
|
||||||
@@ -524,7 +524,9 @@ def scrape(func):
|
|||||||
else:
|
else:
|
||||||
headers = ''
|
headers = ''
|
||||||
patronNext = args.get('patronNext', '')
|
patronNext = args.get('patronNext', '')
|
||||||
|
flags = args.get('flags', re.IGNORECASE)
|
||||||
patronBlock = args.get('patronBlock', '')
|
patronBlock = args.get('patronBlock', '')
|
||||||
|
flagsBlock = args.get('flagsBlock', re.IGNORECASE)
|
||||||
typeActionDict = args.get('typeActionDict', {})
|
typeActionDict = args.get('typeActionDict', {})
|
||||||
typeContentDict = args.get('typeContentDict', {})
|
typeContentDict = args.get('typeContentDict', {})
|
||||||
debug = args.get('debug', False)
|
debug = args.get('debug', False)
|
||||||
@@ -549,13 +551,13 @@ def scrape(func):
|
|||||||
if patronBlock:
|
if patronBlock:
|
||||||
if debugBlock:
|
if debugBlock:
|
||||||
regexDbg(item, patronBlock, headers, data)
|
regexDbg(item, patronBlock, headers, data)
|
||||||
blocks = scrapertools.find_multiple_matches_groups(data, patronBlock)
|
blocks = scrapertools.find_multiple_matches_groups(data, patronBlock, flagsBlock)
|
||||||
for bl in blocks:
|
for bl in blocks:
|
||||||
# info(len(blocks),bl)
|
# info(len(blocks),bl)
|
||||||
if 'season' in bl and bl['season']:
|
if 'season' in bl and bl['season']:
|
||||||
item.season = bl['season']
|
item.season = bl['season']
|
||||||
blockItemlist, blockMatches = scrapeBlock(item, args, bl['block'], patron, headers, action, pagination, debug,
|
blockItemlist, blockMatches = scrapeBlock(item, args, bl['block'], patron, headers, action, pagination, debug,
|
||||||
typeContentDict, typeActionDict, blacklist, search, pag, function, lang, sceneTitle, group)
|
typeContentDict, typeActionDict, blacklist, search, pag, function, lang, sceneTitle, group, flags)
|
||||||
for it in blockItemlist:
|
for it in blockItemlist:
|
||||||
if 'lang' in bl:
|
if 'lang' in bl:
|
||||||
it.contentLanguage, it.title = scrapeLang(bl, it.contentLanguage, it.title)
|
it.contentLanguage, it.title = scrapeLang(bl, it.contentLanguage, it.title)
|
||||||
@@ -566,7 +568,7 @@ def scrape(func):
|
|||||||
matches.extend(blockMatches)
|
matches.extend(blockMatches)
|
||||||
elif patron:
|
elif patron:
|
||||||
itemlist, matches = scrapeBlock(item, args, data, patron, headers, action, pagination, debug, typeContentDict,
|
itemlist, matches = scrapeBlock(item, args, data, patron, headers, action, pagination, debug, typeContentDict,
|
||||||
typeActionDict, blacklist, search, pag, function, lang, sceneTitle, group)
|
typeActionDict, blacklist, search, pag, function, lang, sceneTitle, group, flags)
|
||||||
|
|
||||||
if 'itemlistHook' in args:
|
if 'itemlistHook' in args:
|
||||||
try:
|
try:
|
||||||
|
|||||||
Reference in New Issue
Block a user