Fix Scrapertools e Support
This commit is contained in:
@@ -104,6 +104,7 @@ def unescape(text):
|
|||||||
from Fredrik Lundh
|
from Fredrik Lundh
|
||||||
http://effbot.org/zone/re-sub.htm#unescape-html
|
http://effbot.org/zone/re-sub.htm#unescape-html
|
||||||
"""
|
"""
|
||||||
|
|
||||||
if not ('&' in text and ';' in text):
|
if not ('&' in text and ';' in text):
|
||||||
return text
|
return text
|
||||||
|
|
||||||
@@ -129,13 +130,16 @@ def unescape(text):
|
|||||||
import html.entities as htmlentitydefs
|
import html.entities as htmlentitydefs
|
||||||
else:
|
else:
|
||||||
import htmlentitydefs
|
import htmlentitydefs
|
||||||
text = unichr(htmlentitydefs.name2codepoint[text[1:-1]]).encode("utf-8")
|
ret = unichr(htmlentitydefs.name2codepoint[text[1:-1]]).encode("utf-8")
|
||||||
except KeyError:
|
except KeyError:
|
||||||
logger.error("keyerror")
|
logger.error("keyerror")
|
||||||
pass
|
pass
|
||||||
except:
|
except:
|
||||||
pass
|
pass
|
||||||
return text # leave as is
|
# from core.support import dbg;dbg()
|
||||||
|
if type(ret) != str:
|
||||||
|
ret = ret.decode()
|
||||||
|
return ret # leave as is
|
||||||
|
|
||||||
return re.sub("&#?\w+;", fixup, str(text))
|
return re.sub("&#?\w+;", fixup, str(text))
|
||||||
|
|
||||||
|
|||||||
+6
-4
@@ -895,12 +895,13 @@ def match(item_url_string, **args):
|
|||||||
matches: all the matches
|
matches: all the matches
|
||||||
'''
|
'''
|
||||||
|
|
||||||
matches = blocks = []
|
matches = []
|
||||||
|
blocks = []
|
||||||
url = None
|
url = None
|
||||||
# arguments allowed for scrape
|
# arguments allowed for scrape
|
||||||
patron = args.get('patron', None)
|
patron = args.get('patron', None)
|
||||||
patronBlock = args.get('patronBlock', None)
|
patronBlock = args.get('patronBlock', None)
|
||||||
patronBlocks = args.get('patronBlock', None)
|
patronBlocks = args.get('patronBlocks', None)
|
||||||
debug = args.get('debug', False)
|
debug = args.get('debug', False)
|
||||||
debugBlock = args.get('debugBlock', False)
|
debugBlock = args.get('debugBlock', False)
|
||||||
string = args.get('string', False)
|
string = args.get('string', False)
|
||||||
@@ -934,8 +935,9 @@ def match(item_url_string, **args):
|
|||||||
if patronBlock:
|
if patronBlock:
|
||||||
blocks = [scrapertools.find_single_match(data, patronBlock)]
|
blocks = [scrapertools.find_single_match(data, patronBlock)]
|
||||||
elif patronBlocks:
|
elif patronBlocks:
|
||||||
if type(patronBlock) == str: patron = [patronBlock]
|
if type(patronBlocks) == str:
|
||||||
for p in patronBlock:
|
patronBlocks = [patronBlocks]
|
||||||
|
for p in patronBlocks:
|
||||||
blocks += scrapertools.find_multiple_matches(data, p)
|
blocks += scrapertools.find_multiple_matches(data, p)
|
||||||
else:
|
else:
|
||||||
blocks = [data]
|
blocks = [data]
|
||||||
|
|||||||
Reference in New Issue
Block a user