Fix Scrapertools e Support

This commit is contained in:
Alhaziel01
2021-03-19 17:38:45 +01:00
parent 6b2a35175e
commit 18bd2b1ae7
2 changed files with 12 additions and 6 deletions

View File

@@ -104,6 +104,7 @@ def unescape(text):
from Fredrik Lundh
http://effbot.org/zone/re-sub.htm#unescape-html
"""
if not ('&' in text and ';' in text):
return text
@@ -129,13 +130,16 @@ def unescape(text):
import html.entities as htmlentitydefs
else:
import htmlentitydefs
text = unichr(htmlentitydefs.name2codepoint[text[1:-1]]).encode("utf-8")
ret = unichr(htmlentitydefs.name2codepoint[text[1:-1]]).encode("utf-8")
except KeyError:
logger.error("keyerror")
pass
except:
pass
return text # leave as is
# from core.support import dbg;dbg()
if type(ret) != str:
ret = ret.decode()
return ret # leave as is
return re.sub("&#?\w+;", fixup, str(text))

View File

@@ -895,12 +895,13 @@ def match(item_url_string, **args):
matches: all the matches
'''
matches = blocks = []
matches = []
blocks = []
url = None
# arguments allowed for scrape
patron = args.get('patron', None)
patronBlock = args.get('patronBlock', None)
patronBlocks = args.get('patronBlock', None)
patronBlocks = args.get('patronBlocks', None)
debug = args.get('debug', False)
debugBlock = args.get('debugBlock', False)
string = args.get('string', False)
@@ -934,8 +935,9 @@ def match(item_url_string, **args):
if patronBlock:
blocks = [scrapertools.find_single_match(data, patronBlock)]
elif patronBlocks:
if type(patronBlock) == str: patron = [patronBlock]
for p in patronBlock:
if type(patronBlocks) == str:
patronBlocks = [patronBlocks]
for p in patronBlocks:
blocks += scrapertools.find_multiple_matches(data, p)
else:
blocks = [data]