Fix httptools e support per Matrix

2020-02-06 19:10:53 +01:00
parent e24f8782c2
commit edb61e7383
2 changed files with 21 additions and 12 deletions
@@ -383,6 +383,10 @@ def downloadpage(url, **opt):

    response['data'] = req.content
    response['url'] = req.url
+
+    if type(response['data']) != str:
+        response['data'] = response['data'].decode('UTF-8')
+
    if not response['data']:
        response['data'] = ''
    try:
@@ -172,7 +172,8 @@ def scrapeLang(scraped, lang, longtitle):
    return language, longtitle

 def cleantitle(title):
-    cleantitle = scrapertools.htmlclean(scrapertools.decodeHtmlentities(title).replace('"', "'").replace('×', 'x').replace('–', '-')).strip()
+    if type(title) != str: title.decode('UTF-8')
+    cleantitle = title.replace('"', "'").replace('×', 'x').replace('–', '-').strip()
    return cleantitle

 def scrapeBlock(item, args, block, patron, headers, action, pagination, debug, typeContentDict, typeActionDict, blacklist, search, pag, function, lang):
@@ -207,16 +208,20 @@ def scrapeBlock(item, args, block, patron, headers, action, pagination, debug, t
    for i, match in enumerate(matches):
        if pagination and (pag - 1) * pagination > i and not search: continue  # pagination
        if pagination and i >= pag * pagination and not search: break          # pagination
-        listGroups = match.keys()
-        match = match.values()
+        # listGroups = match.keys()
+        # match = match.values()

-        if len(listGroups) > len(match):  # to fix a bug
-            match = list(match)
-            match.extend([''] * (len(listGroups) - len(match)))
+        # if len(listGroups) > len(match):  # to fix a bug
+        #     support.log()
+        #     match = list(match)
+        #     match.extend([''] * (len(listGroups) - len(match)))

        scraped = {}
        for kk in known_keys:
-            val = match[listGroups.index(kk)] if kk in listGroups else ''
+            # log('KK=',kk)
+            # log('LIST',list(listGroups))
+            # log(match[dict_values])
+            val = match[kk] if kk in match else ''
            if val and (kk == "url" or kk == 'thumb') and 'http' not in val:
                val = scrapertools.find_single_match(item.url, 'https?://[a-z0-9.-]+') + (val if val.startswith('/') else '/' + val)
            scraped[kk] = val
@@ -309,8 +314,8 @@ def scrapeBlock(item, args, block, patron, headers, action, pagination, debug, t
                other = scraped['other'] if scraped['other'] else ''
            )

-            for lg in list(set(listGroups).difference(known_keys)):
-                it.__setattr__(lg, match[listGroups.index(lg)])
+            for lg in list(set(match.keys()).difference(known_keys)):
+                it.__setattr__(lg, match[lg])

            if 'itemHook' in args:
                it = args['itemHook'](it)
@@ -391,8 +396,8 @@ def scrape(func):
                jsontools.update_node(host, func.__module__.split('.')[-1], 'url')
                parse[1] = scrapertools.get_domain_from_url(host)
                item.url = urlparse.urlunparse(parse)
-                page = httptools.downloadpage(item.url, headers=headers, ignore_response_code=True,
-                                              session=item.session)
+                page = httptools.downloadpage(item.url, headers=headers, ignore_response_code=True, session=item.session)
+
            data = page.data.replace("'", '"')
            data = re.sub('\n|\t', ' ', data)
            data = re.sub(r'>\s+<', '> <', data)
@@ -1048,7 +1053,7 @@ def controls(itemlist, item, AutoPlay=True, CheckLinks=True, down_load=True):
    channel_node = autoplay_node.get(item.channel, {})
    settings_node = channel_node.get('settings', {})
    AP = get_setting('autoplay') or settings_node['active']
-    HS = config.get_setting('hide_servers') or (settings_node['hide_servers'] if settings_node.has_key('hide_server') else False)
+    HS = config.get_setting('hide_servers') or (settings_node['hide_servers'] if 'hide_server' in settings_node else False)

    if CL and not AP:
        if get_setting('checklinks', item.channel):