Merge remote-tracking branch 'origin/master'

2020-02-17 18:19:50 +01:00
parent 32f68499f0 1fd9338ea8
commit 3aca38a5eb
2 changed files with 62 additions and 42 deletions
--- a/channels/seriehd.py
+++ b/channels/seriehd.py
@@ -4,7 +4,7 @@
 # ------------------------------------------------------------


-from core import scrapertools, httptools, support
+from core import support
 from core.item import Item

 host = support.config.get_channel_url()
@@ -85,41 +85,38 @@ def peliculas(item):
 def episodios(item):
    def get_season(pageData, seas_url, season):
        data = ''
-        if pageData:  # per non riscaricare
-            episodes = pageData
-            pageData = ''
-        else:
-            episodes = httptools.downloadpage(seas_url).data
-        episodes = scrapertools.find_single_match(episodes, patron_episode)
-        for episode_url, episode in scrapertools.find_multiple_matches(episodes, patron_option):
+        episodes = support.match(pageData if pageData else seas_url, patronBlock=patron_episode, patron=patron_option).matches
+        for episode_url, episode in episodes:
            episode_url = support.urlparse.urljoin(item.url, episode_url)
            if '-' in episode: episode = episode.split('-')[0].zfill(2) + 'x' + episode.split('-')[1].zfill(2)
            title = season + "x" + episode.zfill(2) + ' - ' + item.fulltitle
            data += title + '|' + episode_url + '\n'
        return data

-    def itemlistHook(itemlist):
-        itemlist.sort(key=lambda item: item.title)
-        return itemlist
-    url = support.match(item,
-                        patron=r'<iframe id="iframeVid" width="[^"]+" height="[^"]+" src="([^"]+)" allowfullscreen').match
-    pageData = httptools.downloadpage(url).data
    patron_season = '<div class="buttons-bar seasons">(.*?)<div class="buttons'
    patron_episode = '<div class="buttons-bar episodes">(.*?)<div class="buttons'
    patron_option = r'<a href="([^"]+?)".*?>([^<]+?)</a>'
-    data = ''

-    seasons = scrapertools.find_single_match(pageData, patron_season)
+    url = support.match(item, patron=r'<iframe id="iframeVid" width="[^"]+" height="[^"]+" src="([^"]+)" allowfullscreen').match
+    seasons = support.match(url, patronBlock=patron_season, patron=patron_option)
+
+    data = ''
    from concurrent import futures
    with futures.ThreadPoolExecutor() as executor:
        thL = []
-        for seas_url, season in scrapertools.find_multiple_matches(seasons, patron_option):
-            thL.append(executor.submit(get_season, pageData, seas_url, season))
+        for i, season in enumerate(seasons.matches):
+            thL.append(executor.submit(get_season, seasons.data if i == 0 else '', season[0], season[1]))
        for res in futures.as_completed(thL):
            if res.result():
                data += res.result()
+
    patron = r'(?P<title>[^\|]+)\|(?P<url>[^\n]+)\n'
    action = 'findvideos'
+
+    def itemlistHook(itemlist):
+        itemlist.sort(key=lambda item: item.title)
+        return itemlist
+
    return locals()


--- a/core/support.py
+++ b/core/support.py
@@ -157,8 +157,7 @@ def scrapeLang(scraped, lang, longtitle):
    return language, longtitle

 def cleantitle(title):
-    if type(title) != str: title.decode('UTF-8')
-    cleantitle = title.replace('"', "'").replace('×', 'x').replace('–', '-').strip()
+    cleantitle = scrapertools.htmlclean(scrapertools.decodeHtmlentities(title).replace('"', "'").replace('×', 'x').replace('–', '-')).strip()
    return cleantitle

 def scrapeBlock(item, args, block, patron, headers, action, pagination, debug, typeContentDict, typeActionDict, blacklist, search, pag, function, lang):
@@ -193,20 +192,16 @@ def scrapeBlock(item, args, block, patron, headers, action, pagination, debug, t
    for i, match in enumerate(matches):
        if pagination and (pag - 1) * pagination > i and not search: continue  # pagination
        if pagination and i >= pag * pagination and not search: break          # pagination
-        # listGroups = match.keys()
-        # match = match.values()
+        listGroups = match.keys()
+        match = match.values()

-        # if len(listGroups) > len(match):  # to fix a bug
-        #     support.log()
-        #     match = list(match)
-        #     match.extend([''] * (len(listGroups) - len(match)))
+        if len(listGroups) > len(match):  # to fix a bug
+            match = list(match)
+            match.extend([''] * (len(listGroups) - len(match)))

        scraped = {}
        for kk in known_keys:
-            # log('KK=',kk)
-            # log('LIST',list(listGroups))
-            # log(match[dict_values])
-            val = match[kk] if kk in match else ''
+            val = match[listGroups.index(kk)] if kk in listGroups else ''
            if val and (kk == "url" or kk == 'thumb') and 'http' not in val:
                val = scrapertools.find_single_match(item.url, 'https?://[a-z0-9.-]+') + (val if val.startswith('/') else '/' + val)
            scraped[kk] = val
@@ -299,8 +294,8 @@ def scrapeBlock(item, args, block, patron, headers, action, pagination, debug, t
                other = scraped['other'] if scraped['other'] else ''
            )

-            for lg in list(set(match.keys()).difference(known_keys)):
-                it.__setattr__(lg, match[lg])
+            for lg in list(set(listGroups).difference(known_keys)):
+                it.__setattr__(lg, match[listGroups.index(lg)])

            if 'itemHook' in args:
                it = args['itemHook'](it)
@@ -381,8 +376,8 @@ def scrape(func):
                jsontools.update_node(host, func.__module__.split('.')[-1], 'url')
                parse[1] = scrapertools.get_domain_from_url(host)
                item.url = urlparse.urlunparse(parse)
-                page = httptools.downloadpage(item.url, headers=headers, ignore_response_code=True, session=item.session)
-
+                page = httptools.downloadpage(item.url, headers=headers, ignore_response_code=True,
+                                              session=item.session)
            data = page.data.replace("'", '"')
            data = re.sub('\n|\t', ' ', data)
            data = re.sub(r'>\s+<', '> <', data)
@@ -749,7 +744,7 @@ def typo(string, typography=''):
    if '{}' in string:
        string = '{' + re.sub(r'\s\{\}','',string) + '}'
    if 'submenu' in string:
-        string = "•• " + re.sub(r'\ssubmenu','',string)
+        string = u"\u2022\u2022 ".encode('utf-8') + re.sub(r'\ssubmenu','',string)
    if 'color' in string:
        color = scrapertools.find_single_match(string, 'color ([a-z]+)')
        if color == 'kod' or '': color = kod_color
@@ -763,7 +758,7 @@ def typo(string, typography=''):
    if '--' in string:
        string = ' - ' + re.sub(r'\s--','',string)
    if 'bullet' in string:
-        string = '[B]•[/B] ' + re.sub(r'\sbullet','',string)
+        string = '[B]' + u"\u2022".encode('utf-8') + '[/B] ' + re.sub(r'\sbullet','',string)

    return string

@@ -771,10 +766,33 @@ def typo(string, typography=''):
 def match(item_url_string, **args):
    '''
    match is a function that combines httptools and scraper tools:
+
+    supports all httptools and the following arggs:
+        @param item_url_string: if it's a titem download the page item.url, if it's a URL download the page, if it's a string pass it to scrapertools
+        @type  item_url_string: item or str
+        @param string: force item_url_string to be a string
+        @type  string: bool
+        @param patronBlock: find first element in patron
+        @type  patronBlock: str
+        @param patronBloks: find multiple matches
+        @type  patronBloks: str or list
+        @param debugBlock: regex101.com for debug
+        @type  debugBlock: bool
+        @param patron: find multiple matches on block, blocks or data
+        @type  patron: str or list
+        @param debug: regex101.com for debug
+        @type  debug: bool
+
+    Return a item with the following key:
+        data: data of the webpage
+        block: first block
+        blocks: all the blocks
+        match: first match
+        matches: all the matches
    '''
    log(item_url_string)

-    matches = []
+    matches = blocks = []
    url = None
    # arguments allowed for scrape
    patron = args.get('patron', None)
@@ -783,12 +801,15 @@ def match(item_url_string, **args):
    debug = args.get('debug', False)
    debugBlock = args.get('debugBlock', False)
    string = args.get('string', False)
+
    # remove scrape arguments
    args = dict([(key, val) for key, val in args.items() if key not in ['patron', 'patronBlock', 'patronBlocks', 'debug', 'debugBlock', 'string']]) 
-    # dbg()
+
    # check type of item_url_string
-    if type(item_url_string) == str:
-        if item_url_string.startswith('http') and not string: url = item_url_string
+    if string:
+        data = item_url_string
+    elif type(item_url_string) == str:
+        if item_url_string.startswith('http'): url = item_url_string
        else : data = item_url_string
    else:
        # if item_url_string is an item use item.url as url
@@ -808,7 +829,9 @@ def match(item_url_string, **args):
    if patronBlock:
        blocks = [scrapertools.find_single_match(data, patronBlock)]
    elif patronBlocks:
-        blocks = scrapertools.find_multiple_matches(data, patronBlock)
+        if type(patronBlock) == str:  patron = [patronBlock]
+        for p in patronBlock:
+            blocks += scrapertools.find_multiple_matches(data, p)
    else:
        blocks = [data]

@@ -1041,7 +1064,7 @@ def controls(itemlist, item, AutoPlay=True, CheckLinks=True, down_load=True):
    channel_node = autoplay_node.get(item.channel, {})
    settings_node = channel_node.get('settings', {})
    AP = get_setting('autoplay') or settings_node['active']
-    HS = config.get_setting('hide_servers') or (settings_node['hide_servers'] if 'hide_server' in settings_node else False)
+    HS = config.get_setting('hide_servers') or (settings_node['hide_servers'] if settings_node.has_key('hide_server') else False)

    if CL and not AP:
        if get_setting('checklinks', item.channel):