From edb61e7383684327926e305ddc47bb743424599c Mon Sep 17 00:00:00 2001 From: Alhaziel Date: Thu, 6 Feb 2020 19:10:53 +0100 Subject: [PATCH] Fix httptools e support per Matrix --- core/httptools.py | 4 ++++ core/support.py | 29 +++++++++++++++++------------ 2 files changed, 21 insertions(+), 12 deletions(-) diff --git a/core/httptools.py b/core/httptools.py index 538cf988..e585b9cb 100755 --- a/core/httptools.py +++ b/core/httptools.py @@ -383,6 +383,10 @@ def downloadpage(url, **opt): response['data'] = req.content response['url'] = req.url + + if type(response['data']) != str: + response['data'] = response['data'].decode('UTF-8') + if not response['data']: response['data'] = '' try: diff --git a/core/support.py b/core/support.py index 71f137cb..58898079 100755 --- a/core/support.py +++ b/core/support.py @@ -172,7 +172,8 @@ def scrapeLang(scraped, lang, longtitle): return language, longtitle def cleantitle(title): - cleantitle = scrapertools.htmlclean(scrapertools.decodeHtmlentities(title).replace('"', "'").replace('×', 'x').replace('–', '-')).strip() + if type(title) != str: title.decode('UTF-8') + cleantitle = title.replace('"', "'").replace('×', 'x').replace('–', '-').strip() return cleantitle def scrapeBlock(item, args, block, patron, headers, action, pagination, debug, typeContentDict, typeActionDict, blacklist, search, pag, function, lang): @@ -207,16 +208,20 @@ def scrapeBlock(item, args, block, patron, headers, action, pagination, debug, t for i, match in enumerate(matches): if pagination and (pag - 1) * pagination > i and not search: continue # pagination if pagination and i >= pag * pagination and not search: break # pagination - listGroups = match.keys() - match = match.values() + # listGroups = match.keys() + # match = match.values() - if len(listGroups) > len(match): # to fix a bug - match = list(match) - match.extend([''] * (len(listGroups) - len(match))) + # if len(listGroups) > len(match): # to fix a bug + # support.log() + # match = list(match) + # match.extend([''] * (len(listGroups) - len(match))) scraped = {} for kk in known_keys: - val = match[listGroups.index(kk)] if kk in listGroups else '' + # log('KK=',kk) + # log('LIST',list(listGroups)) + # log(match[dict_values]) + val = match[kk] if kk in match else '' if val and (kk == "url" or kk == 'thumb') and 'http' not in val: val = scrapertools.find_single_match(item.url, 'https?://[a-z0-9.-]+') + (val if val.startswith('/') else '/' + val) scraped[kk] = val @@ -309,8 +314,8 @@ def scrapeBlock(item, args, block, patron, headers, action, pagination, debug, t other = scraped['other'] if scraped['other'] else '' ) - for lg in list(set(listGroups).difference(known_keys)): - it.__setattr__(lg, match[listGroups.index(lg)]) + for lg in list(set(match.keys()).difference(known_keys)): + it.__setattr__(lg, match[lg]) if 'itemHook' in args: it = args['itemHook'](it) @@ -391,8 +396,8 @@ def scrape(func): jsontools.update_node(host, func.__module__.split('.')[-1], 'url') parse[1] = scrapertools.get_domain_from_url(host) item.url = urlparse.urlunparse(parse) - page = httptools.downloadpage(item.url, headers=headers, ignore_response_code=True, - session=item.session) + page = httptools.downloadpage(item.url, headers=headers, ignore_response_code=True, session=item.session) + data = page.data.replace("'", '"') data = re.sub('\n|\t', ' ', data) data = re.sub(r'>\s+<', '> <', data) @@ -1048,7 +1053,7 @@ def controls(itemlist, item, AutoPlay=True, CheckLinks=True, down_load=True): channel_node = autoplay_node.get(item.channel, {}) settings_node = channel_node.get('settings', {}) AP = get_setting('autoplay') or settings_node['active'] - HS = config.get_setting('hide_servers') or (settings_node['hide_servers'] if settings_node.has_key('hide_server') else False) + HS = config.get_setting('hide_servers') or (settings_node['hide_servers'] if 'hide_server' in settings_node else False) if CL and not AP: if get_setting('checklinks', item.channel):