Fix Scrapertools e Support

This commit is contained in:
Alhaziel01
2021-03-19 17:38:45 +01:00
parent 6b2a35175e
commit 18bd2b1ae7
2 changed files with 12 additions and 6 deletions
+6 -2
View File
@@ -104,6 +104,7 @@ def unescape(text):
from Fredrik Lundh from Fredrik Lundh
http://effbot.org/zone/re-sub.htm#unescape-html http://effbot.org/zone/re-sub.htm#unescape-html
""" """
if not ('&' in text and ';' in text): if not ('&' in text and ';' in text):
return text return text
@@ -129,13 +130,16 @@ def unescape(text):
import html.entities as htmlentitydefs import html.entities as htmlentitydefs
else: else:
import htmlentitydefs import htmlentitydefs
text = unichr(htmlentitydefs.name2codepoint[text[1:-1]]).encode("utf-8") ret = unichr(htmlentitydefs.name2codepoint[text[1:-1]]).encode("utf-8")
except KeyError: except KeyError:
logger.error("keyerror") logger.error("keyerror")
pass pass
except: except:
pass pass
return text # leave as is # from core.support import dbg;dbg()
if type(ret) != str:
ret = ret.decode()
return ret # leave as is
return re.sub("&#?\w+;", fixup, str(text)) return re.sub("&#?\w+;", fixup, str(text))
+6 -4
View File
@@ -895,12 +895,13 @@ def match(item_url_string, **args):
matches: all the matches matches: all the matches
''' '''
matches = blocks = [] matches = []
blocks = []
url = None url = None
# arguments allowed for scrape # arguments allowed for scrape
patron = args.get('patron', None) patron = args.get('patron', None)
patronBlock = args.get('patronBlock', None) patronBlock = args.get('patronBlock', None)
patronBlocks = args.get('patronBlock', None) patronBlocks = args.get('patronBlocks', None)
debug = args.get('debug', False) debug = args.get('debug', False)
debugBlock = args.get('debugBlock', False) debugBlock = args.get('debugBlock', False)
string = args.get('string', False) string = args.get('string', False)
@@ -934,8 +935,9 @@ def match(item_url_string, **args):
if patronBlock: if patronBlock:
blocks = [scrapertools.find_single_match(data, patronBlock)] blocks = [scrapertools.find_single_match(data, patronBlock)]
elif patronBlocks: elif patronBlocks:
if type(patronBlock) == str: patron = [patronBlock] if type(patronBlocks) == str:
for p in patronBlock: patronBlocks = [patronBlocks]
for p in patronBlocks:
blocks += scrapertools.find_multiple_matches(data, p) blocks += scrapertools.find_multiple_matches(data, p)
else: else:
blocks = [data] blocks = [data]