get_channel_url migliorato
This commit is contained in:
+10
-1
@@ -377,7 +377,16 @@ def scrape(func):
|
||||
|
||||
log('PATRON= ', patron)
|
||||
if not data:
|
||||
data = httptools.downloadpage(item.url, headers=headers, ignore_response_code=True, session=item.session).data.replace("'", '"')
|
||||
page = httptools.downloadpage(item.url, headers=headers, ignore_response_code=True, session=item.session)
|
||||
# if url may be changed and channel has findhost to update
|
||||
if (not page.data or scrapertoolsV2.get_domain_from_url(page.url) != scrapertoolsV2.get_domain_from_url(item.url)) and 'findhost' in func.__globals__:
|
||||
host = func.__globals__['findhost']()
|
||||
from core import jsontools
|
||||
jsontools.update_node(host, func.__module__.split('.')[-1], 'url')
|
||||
item.url = item.url.replace(scrapertoolsV2.get_domain_from_url(item.url), scrapertoolsV2.get_domain_from_url(host))
|
||||
page = httptools.downloadpage(item.url, headers=headers, ignore_response_code=True,
|
||||
session=item.session)
|
||||
data = page.data.replace("'", '"')
|
||||
data = re.sub('\n|\t', ' ', data)
|
||||
data = re.sub(r'>\s+<', '> <', data)
|
||||
# replace all ' with " and eliminate newline, so we don't need to worry about
|
||||
|
||||
Reference in New Issue
Block a user