migliorie a support

2019-08-07 16:47:07 +02:00
parent b14673e4eb
commit 28474b0249
10 changed files with 230 additions and 282 deletions
@@ -45,9 +45,9 @@ def peliculas(item):

    action="findvideos"
    if item.args == "search":
-        patronBlock = r'</script> <div class="boxgrid caption">(.*?)<div id="right_bar">'
+        patronBlock = r'</script> <div class="boxgrid caption">(?P<block>.*)<div id="right_bar">'
    else:
-        patronBlock = r'<div class="cover_kapsul ml-mask">(.*?)<div class="page_nav">'
+        patronBlock = r'<div class="cover_kapsul ml-mask">(?P<block>.*)<div class="page_nav">'
    patron = r'<div class="cover boxcaption"> <h2>.<a href="(?P<url>[^"]+)">.*?<.*?src="(?P<thumb>[^"]+)"'\
         '.+?[^>]+>[^>]+<div class="trdublaj"> (?P<quality>[A-Z]+)<[^>]+>(?:.[^>]+>(?P<lang>.*?)<[^>]+>).*?'\
         '<p class="h4">(?P<title>.*?)</p>[^>]+> [^>]+> [^>]+>[^>]+>[^>]+>[^>]+>[^>]+>[^>]+> [^>]+> '\
@@ -69,13 +69,13 @@ def categorie(item):
    blacklist = 'altadefinizione01'

    if item.args == 'genres':
-        patronBlock = r'<ul class="kategori_list">(.*?)</ul>'
+        patronBlock = r'<ul class="kategori_list">(?P<block>.*)</ul>'
        patron = '<li><a href="(?P<url>[^"]+)">(?P<title>.*?)</a>'
    elif item.args == 'years':
-        patronBlock = r'<ul class="anno_list">(.*?)</ul>'
+        patronBlock = r'<ul class="anno_list">(?P<block>.*)</ul>'
        patron = '<li><a href="(?P<url>[^"]+)">(?P<title>.*?)</a>'
    elif item.args == 'orderalf':
-        patronBlock = r'<div class="movies-letter">(.*)<div class="clearfix">'
+        patronBlock = r'<div class="movies-letter">(?P<block>.*)<div class="clearfix">'
        patron = '<a title=.*?href="(?P<url>[^"]+)"><span>(?P<title>.*?)</span>'

    return locals()
@@ -41,9 +41,9 @@ def peliculas(item):

    action="findvideos"
    if item.args == "search":
-        patronBlock = r'</script> <div class="boxgrid caption">(.*?)<div id="right_bar">'
+        patronBlock = r'</script> <div class="boxgrid caption">(?P<block>.*)<div id="right_bar">'
    else:
-        patronBlock = r'<div class="cover_kapsul ml-mask">(.*?)<div class="page_nav">'
+        patronBlock = r'<div class="cover_kapsul ml-mask">(?P<block>.*)<div class="page_nav">'
    patron = r'<div class="cover boxcaption"> <h2>.<a href="(?P<url>[^"]+)">.*?<.*?src="(?P<thumb>[^"]+)"'\
         '.+?[^>]+>[^>]+<div class="trdublaj"> (?P<quality>[A-Z]+)<[^>]+>(?:.[^>]+>(?P<lang>.*?)<[^>]+>).*?'\
         '<p class="h4">(?P<title>.*?)</p>[^>]+> [^>]+> [^>]+>[^>]+>[^>]+>[^>]+>[^>]+>[^>]+> [^>]+> '\
@@ -65,10 +65,10 @@ def categorie(item):
    blacklist = 'Altadefinizione01'

    if item.args == 'genres':
-        patronBlock = r'<ul class="kategori_list">(.*?)</ul>'
+        patronBlock = r'<ul class="kategori_list">(?P<block>.*)</ul>'
        patron = '<li><a href="(?P<url>[^"]+)">(?P<title>.*?)</a>'
    elif item.args == 'years':
-        patronBlock = r'<ul class="anno_list">(.*?)</ul>'
+        patronBlock = r'<ul class="anno_list">(?P<block>.*)</ul>'
        patron = '<li><a href="(?P<url>[^"]+)">(?P<title>.*?)</a>'
    elif item.args == 'orderalf':
        patronBlock = r'<div class="movies-letter">(.*)<div class="clearfix">'
@@ -60,13 +60,13 @@ def genres(item):

    action = 'peliculas'
    if item.args == 'genres':
-        patronBlock = r'<ul class="listSubCat" id="Film">(.*?)</ul>'
+        patronBlock = r'<ul class="listSubCat" id="Film">(?P<block>.*)</ul>'
    elif item.args == 'years':
-        patronBlock = r'<ul class="listSubCat" id="Anno">(.*?)</ul>'
+        patronBlock = r'<ul class="listSubCat" id="Anno">(?P<block>.*)</ul>'
    elif item.args == 'quality':
-        patronBlock = r'<ul class="listSubCat" id="Qualita">(.*?)</ul>'
+        patronBlock = r'<ul class="listSubCat" id="Qualita">(?P<block>.*)</ul>'
    elif item.args == 'lucky': # sono i titoli random nella pagina, cambiano 1 volta al dì
-        patronBlock = r'FILM RANDOM.*?class="listSubCat">(.*?)</ul>'
+        patronBlock = r'FILM RANDOM.*?class="listSubCat">(?P<block>.*)</ul>'
        action = 'findvideos'

    patron = r'<li><a href="(?P<url>[^"]+)">(?P<title>[^<]+)<'
@@ -37,7 +37,7 @@ def menu(item):

    action='peliculas'
    patron = r'<li><a href="(?P<url>[^"]+)">(?P<title>[^<]+)</a></li>'
-    patronBlock= r'<ul class="listSubCat" id="'+ str(item.args) + '">(.*?)</ul>'
+    patronBlock= r'<ul class="listSubCat" id="'+ str(item.args) + '">(?P<block>.*)</ul>'

    return locals()

@@ -44,10 +44,9 @@ def mainlist(item):
        ('Anni', ['', 'menu', 'Film per Anno'])
    ]
    tvshow = ['/serietv/',
-        ('Aggiornamenti serie tv', ['/serietv/aggiornamento-quotidiano-serie-tv/', 'last']),
        ('Per Lettera', ['/serietv/', 'menu', 'Serie-Tv per Lettera']),
-        ('Per Genere', ['/serietv/aggiornamento-quotidiano-serie-tv/', 'menu', 'Serie-Tv per Genere']),
-        ('Per anno', ['/serietv/aggiornamento-quotidiano-serie-tv/', 'menu', 'Serie-Tv per Anno'])
+        ('Per Genere', ['/serietv/', 'menu', 'Serie-Tv per Genere']),
+        ('Per anno', ['/serietv/', 'menu', 'Serie-Tv per Anno'])
    ]

    return locals()
@@ -56,76 +55,39 @@ def mainlist(item):
@support.scrape
 def menu(item):
    findhost()
-    patronBlock = item.args + r'<span.*?><\/span>.*?<ul.*?>(.*?)<\/ul>'
+    patronBlock = item.args + r'<span.*?><\/span>.*?<ul.*?>(?P<block>.*?)<\/ul>'
    patronMenu = r'href="?(?P<url>[^">]+)"?>(?P<title>.*?)<\/a>'
    action = 'peliculas'

    return locals()


+@support.scrape
 def newest(categoria):
    findhost()
-    itemlist = []
+    debug = True
    item = Item()
    item.contentType = 'movie'
    item.url = host + '/lista-film-ultimi-100-film-aggiunti/'
-    return support.scrape(item, r'<a href=([^>]+)>([^<([]+)(?:\[([A-Z]+)\])?\s\(([0-9]{4})\)<\/a>',
-                   ['url', 'title', 'quality', 'year'],
-                   patronBlock=r'Ultimi 100 film aggiunti:.*?<\/td>')
+    patron = "<a href=(?P<url>[^>]+)>(?P<title>[^<([]+)(?:\[(?P<quality>[A-Z]+)\])?\s\((?P<year>[0-9]{4})\)<\/a>"
+    patronBlock = r'Ultimi 100 film aggiunti:.*?<\/td>'
+
+    return locals()


-def last(item):
-    support.log()
-    
-    itemlist = []
-    infoLabels = {}
-    quality = ''
-    PERPAGE = 20
-    page = 1
-    if item.page:
-        page = item.page
+def search(item, text):
+    support.log(item.url, "search", text)

-    if item.contentType == 'tvshow':
-        matches = support.match(item, r'<a href="([^">]+)".*?>([^(:(|[)]+)([^<]+)<\/a>', '<article class="sequex-post-content.*?</article>', headers)[0]
-    else:
-        matches = support.match(item, r'<a href=([^>]+)>([^(:(|[)]+)([^<]+)<\/a>', r'<strong>Ultimi 100 film Aggiornati:<\/a><\/strong>(.*?)<td>', headers)[0]
+    try:
+        item.url = item.url + "/?s=" + text.replace(' ', '+')
+        return peliculas(item)

-    for i, (url, title, info) in enumerate(matches):
-        if (page - 1) * PERPAGE > i: continue
-        if i >= page * PERPAGE: break
-        add = True
-        title = title.rstrip()
-        if item.contentType == 'tvshow':
-            for i in itemlist:
-                if i.url == url: # togliamo i doppi
-                    add = False
-        else:
-            infoLabels['year'] = scrapertoolsV2.find_single_match(info, r'\(([0-9]+)\)')
-            quality = scrapertoolsV2.find_single_match(info, r'\[([A-Z]+)\]')
-
-        if quality:
-            longtitle = title + support.typo(quality,'_ [] color kod')
-        else:
-            longtitle = title
-
-        if add:
-            itemlist.append(
-                    Item(channel=item.channel,
-                        action='findvideos' if item.contentType == 'movie' else 'episodios',
-                        contentType=item.contentType,
-                        title=longtitle,
-                        fulltitle=title,
-                        show=title,
-                        quality=quality,
-                        url=url,
-                        infoLabels=infoLabels
-                        )
-                )
-    support.pagination(itemlist, item, page, PERPAGE)
-
-    tmdb.set_infoLabels_itemlist(itemlist, seekTmdb=True)
-
-    return itemlist
+    # Continua la ricerca in caso di errore
+    except:
+        import sys
+        for line in sys.exc_info():
+            logger.error("%s" % line)
+        return []


@support.scrape
@@ -137,51 +99,19 @@ def peliculas(item):
        patron = r'div class="card-image">.*?<img src="(?P<thumb>[^ ]+)" alt.*?<a href="(?P<url>[^ >]+)">(?P<title>[^<[(]+)<\/a>.*?<strong><span style="[^"]+">(?P<genre>[^<>0-9(]+)\((?P<year>[0-9]{4}).*?</(?:p|div)>(?P<plot>.*?)</div'
        action = 'episodios'

-    patronBlock=[r'<div class="?sequex-page-left"?>(.*?)<aside class="?sequex-page-right"?>',
-                                              '<div class="?card-image"?>.*?(?=<div class="?card-image"?>|<div class="?rating"?>)']
+    # patronBlock=[r'<div class="?sequex-page-left"?>(?P<block>.*?)<aside class="?sequex-page-right"?>',
+    #                                           '<div class="?card-image"?>.*?(?=<div class="?card-image"?>|<div class="?rating"?>)']
    patronNext='<a class="?page-link"? href="?([^>]+)"?><i class="fa fa-angle-right">'

    return locals()


+@support.scrape
 def episodios(item):
-    itemlist = []
+    patronBlock = r'(?P<block><div class="sp-head[a-z ]*?" title="Espandi">\s*STAGIONE [0-9]+ - (?P<lang>[^\s]+)(?: - (?P<quality>[^-<]+))?.*?[^<>]*?</div>.*?)<div class="spdiv">\[riduci\]</div>'
+    patron = '(?:<p>)(?P<episode>[0-9]+(?:&#215;|×)[0-9]+)(?P<url>.*?)(?:</p>|<br)'

-    data = httptools.downloadpage(item.url).data
-    matches = scrapertoolsV2.find_multiple_matches(data,
-                                                   r'(<div class="sp-head[a-z ]*?" title="Espandi">[^<>]*?</div>.*?)<div class="spdiv">\[riduci\]</div>')
-
-    for match in matches:
-        support.log(match)
-        blocks = scrapertoolsV2.find_multiple_matches(match, '(?:<p>)(.*?)(?:</p>|<br)')
-        season = scrapertoolsV2.find_single_match(match, r'title="Espandi">.*?STAGIONE\s+\d+([^<>]+)').strip()
-
-        for block in blocks:
-            episode = scrapertoolsV2.find_single_match(block, r'([0-9]+(?:&#215;|×)[0-9]+)').strip()
-            seasons_n = scrapertoolsV2.find_single_match(block, r'<strong>STAGIONE\s+\d+([^<>]+)').strip()
-
-            if seasons_n:
-                season = seasons_n
-
-            if not episode: continue
-
-            season = re.sub(r'&#8211;|–', "-", season)
-            itemlist.append(
-                Item(channel=item.channel,
-                     action="findvideos",
-                     contentType='episode',
-                     title="[B]" + episode + "[/B] " + season,
-                     fulltitle=episode + " " + season,
-                     show=episode + " " + season,
-                     url=block,
-                     extra=item.extra,
-                     thumbnail=item.thumbnail,
-                     infoLabels=item.infoLabels
-                     ))
-
-    support.videolibrary(itemlist, item)
-
-    return itemlist
+    return locals()


 def findvideos(item):
@@ -43,7 +43,7 @@ def mainlist(item):
    return locals()

 def genres(item):
-    return support.scrape2(item, patronBlock=r'<div id="bordobar" class="dropdown-menu(.*?)</li>', patron=r'<a class="dropdown-item" href="([^"]+)" title="([A-z]+)"', listGroups=['url', 'title'], action='video')
+    return support.scrape2(item, patronBlock=r'<div id="bordobar" class="dropdown-menu(?P<block>.*)</li>', patron=r'<a class="dropdown-item" href="([^"]+)" title="([A-z]+)"', listGroups=['url', 'title'], action='video')


 def peliculas(item):
@@ -208,7 +208,7 @@ def findvideos(item):
    itemlist = []

    # data = httptools.downloadpage(item.url, headers=headers).data
-    patronBlock = '<div class="entry-content">(.*?)<footer class="entry-footer">'
+    patronBlock = '<div class="entry-content">(?P<block>.*)<footer class="entry-footer">'
    # bloque = scrapertools.find_single_match(data, patronBlock)

    patron = r'<a href="([^"]+)">'
@@ -75,7 +75,7 @@ def newest(categoria):
 def genre(item):
    patronMenu = '<a href="(?P<url>[^"]+)">(?P<title>[^<]+)</a>'
    blacklist = ['Serie TV','Serie TV Americane','Serie TV Italiane','altadefinizione']
-    patronBlock = '<ul class="sub-menu">(.*?)</ul>'
+    patronBlock = '<ul class="sub-menu">(?P<block>.*)</ul>'
   
    return locals()

@@ -41,17 +41,17 @@ def peliculas(item):
    if item.args == 'search':
        patron = r'<h2 class="entry-title"><a href="(?P<url>[^"]+)" rel="bookmark">(?P<title>[^<]+)</a>'
    elif item.args == 'last':
-        patronBlock = 'Aggiornamenti</h2>(.*?)</ul>'
+        patronBlock = 'Aggiornamenti</h2>(?P<block>.*)</ul>'
        patron = '<a href="(?P<url>[^"]+)">(?P<title>[^<]+)</a>'
    elif item.args == 'most_view':
-        patronBlock = 'I piu visti</h2>(.*?)</ul>'
+        patronBlock = 'I piu visti</h2>(?P<block>.*)</ul>'
        patron = '<a href="(?P<url>[^"]+)" title="(?P<title>[^"]+)"'
    elif item.args == 'new':
-        patronBlock = '<main[^>]+>(.*?)</main>'
+        patronBlock = '<main[^>]+>(?P<block>.*)</main>'
        patron = '<a href="(?P<url>[^"]+)" rel="bookmark">(?P<title>[^<]+)</a>[^>]+>[^>]+>[^>]+><img.*?src="(?P<thumb>[^"]+)".*?<p>(?P<plot>[^<]+)</p>'
        patronNext = '<a class="next page-numbers" href="([^"]+)">'
    else:
-        patronBlock = '"lcp_catlist"[^>]+>(.*?)</ul>'
+        patronBlock = '"lcp_catlist"[^>]+>(?P<block>.*)</ul>'
        patron = r'<li ><a href="(?P<url>[^"]+)" title="[^>]+">(?P<title>[^<|\(]+)?(?:\([^\d]*(?P<year>\d+)\))?[^<]*</a>'
        
    action = 'findvideos' if item.contentType == 'movie' else 'check'
@@ -130,7 +130,7 @@ def regexDbg(item, patron, headers, data=''):
        'regex': patron,
        'flags': 'gm',
        'testString': html,
-        'delimiter': '"',
+        'delimiter': '"""',
        'flavor': 'python'
    }
    r = urllib2.Request(url + '/api/regex', json.dumps(data), headers=headers)
@@ -141,15 +141,138 @@ def regexDbg(item, patron, headers, data=''):

 def scrape2(item, patron = '', listGroups = [], headers="", blacklist="", data="", patronBlock="",
           patronNext="", action="findvideos", addVideolibrary = True, typeContentDict={}, typeActionDict={}):
-    m = re.search(r'\((?!\?)', patron)
+    m = re.search(r'(?<!\\|\[)\((?!\?)', patron)
    n = 0
    while m:
        patron = patron[:m.end()] + '?P<' + listGroups[n] + '>' + patron[m.end():]
-        m = re.search(r'\((?!\?)', patron)
+        m = re.search(r'(?<!\\|\[)\((?!\?)', patron)
        n += 1
    regexDbg(item, patron, headers)


+def scrapeLang(scraped, lang, longtitle):
+    ##    Aggiunto/modificato per gestire i siti che hanno i video
+    ##    in ita e subita delle serie tv nella stessa pagina
+    if scraped['lang']:
+        if 'sub' in scraped['lang'].lower():
+            lang = 'Sub-ITA'
+        else:
+            lang = 'ITA'
+    if lang != '':
+        longtitle += typo(lang, '_ [] color kod')
+
+    return lang, longtitle
+
+
+def scrapeBlock(item, args, block, patron, headers, action, pagination, debug, typeContentDict, typeActionDict, blacklist, pag):
+    itemlist = []
+
+    matches = scrapertoolsV2.find_multiple_matches_groups(block, patron)
+    log('MATCHES =', matches)
+
+    if debug:
+        regexDbg(item, patron, headers, block)
+
+    known_keys = ['url', 'title', 'title2', 'episode', 'thumb', 'quality', 'year', 'plot', 'duration', 'genere',
+                  'rating', 'type', 'lang']  # by greko aggiunto episode
+    lang = ''  # aggiunto per gestire i siti con pagine di serietv dove si hanno i video in ita e in subita
+
+    for i, match in enumerate(matches):
+        if pagination and (pag - 1) * pagination > i: continue  # pagination
+        if pagination and i >= pag * pagination: break  # pagination
+        listGroups = match.keys()
+        match = match.values()
+
+        if len(listGroups) > len(match):  # to fix a bug
+            match = list(match)
+            match.extend([''] * (len(listGroups) - len(match)))
+
+        scraped = {}
+        for kk in known_keys:
+            val = match[listGroups.index(kk)] if kk in listGroups else ''
+            if val and (kk == "url" or kk == 'thumb') and 'http' not in val:
+                val = scrapertoolsV2.find_single_match(item.url, 'https?://[a-z0-9.-]+') + val
+            scraped[kk] = val
+
+        if scraped['title']:
+            title = scrapertoolsV2.htmlclean(scrapertoolsV2.decodeHtmlentities(scraped['title'])
+                                             .replace('"', "'").replace('×', 'x').replace('–',
+                                                                                          '-')).strip()  # fix by greko da " a '
+        else:
+            title = ''
+
+        plot = scrapertoolsV2.htmlclean(scrapertoolsV2.decodeHtmlentities(scraped["plot"]))
+
+        longtitle = typo(title, 'bold')
+        if scraped['quality']: longtitle = longtitle + typo(scraped['quality'], '_ [] color kod')
+        if scraped['episode']:
+            scraped['episode'] = re.sub(r'\s-\s|-|x|&#8211|&#215;', 'x', scraped['episode'])
+            longtitle = typo(scraped['episode'] + ' - ', 'bold') + longtitle
+        if scraped['title2']:
+            title2 = scrapertoolsV2.htmlclean(scrapertoolsV2.decodeHtmlentities(scraped['title2'])
+                                              .replace('"', "'").replace('×', 'x').replace('–', '-')).strip()
+            longtitle = longtitle + typo(title2, 'bold _ -- _')
+
+        lang, longitle = scrapeLang(scraped, lang, longtitle)
+
+        # if title is set, probably this is a list of episodes or video sources
+        if item.infoLabels["title"]:
+            infolabels = item.infoLabels
+        else:
+            infolabels = {}
+            if scraped['year']:
+                infolabels['year'] = scraped['year']
+            if scraped["plot"]:
+                infolabels['plot'] = plot
+            if scraped['duration']:
+                matches = scrapertoolsV2.find_multiple_matches(scraped['duration'],
+                                                               r'([0-9])\s*?(?:[hH]|:|\.|,|\\|\/|\||\s)\s*?([0-9]+)')
+                for h, m in matches:
+                    scraped['duration'] = int(h) * 60 + int(m)
+                if not matches:
+                    scraped['duration'] = scrapertoolsV2.find_single_match(scraped['duration'], r'(\d+)')
+                infolabels['duration'] = int(scraped['duration']) * 60
+            if scraped['genere']:
+                genres = scrapertoolsV2.find_multiple_matches(scraped['genere'], '[A-Za-z]+')
+                infolabels['genere'] = ", ".join(genres)
+            if scraped["rating"]:
+                infolabels['rating'] = scrapertoolsV2.decodeHtmlentities(scraped["rating"])
+
+        if typeContentDict:
+            for name, variants in typeContentDict.items():
+                if scraped['type'] in variants:
+                    item.contentType = name
+        if typeActionDict:
+            for name, variants in typeActionDict.items():
+                if scraped['type'] in variants:
+                    action = name
+
+        if (scraped["title"] and scraped["title"] not in blacklist) or longtitle:
+            it = Item(
+                channel=item.channel,
+                action=action,
+                contentType='episode' if (
+                            action == 'findvideos' and item.contentType == 'tvshow') else item.contentType,
+                title=longtitle,
+                fulltitle=item.fulltitle if (action == 'findvideos' and item.contentType != 'movie') else title,
+                show=item.show if (action == 'findvideos' and item.contentType != 'movie') else title,
+                quality=scraped["quality"],
+                url=scraped["url"],
+                infoLabels=infolabels,
+                thumbnail=scraped["thumb"],
+                args=item.args,
+                contentSerieName=title if (action == 'episodios' and item.contentType != 'movie') else '',
+                contentLanguage=lang
+            )
+
+            for lg in list(set(listGroups).difference(known_keys)):
+                it.__setattr__(lg, match[listGroups.index(lg)])
+
+            if 'itemHook' in args:
+                it = args['itemHook'](it)
+            itemlist.append(it)
+    return itemlist, matches
+

 def scrape(func):
    # args is a dict containing the foolowing keys:
@@ -188,7 +311,7 @@ def scrape(func):
        action = args['action'] if 'action' in args else 'findvideos'
        anime = args['anime'] if 'anime' in args else ''
        addVideolibrary = args['addVideolibrary'] if 'addVideolibrary' in args else True
-        blacklist = args['blacklist'] if 'blacklist' in args else ''
+        blacklist = args['blacklist'] if 'blacklist' in args else []
        data = args['data'] if 'data' in args else ''
        patron = args['patron'] if 'patron' in args else args['patronMenu'] if 'patronMenu' in args else ''
        headers = args['headers'] if 'headers' in args else func.__globals__['headers']
@@ -200,6 +323,9 @@ def scrape(func):
        if 'pagination' in args: pagination = args['pagination'] if args['pagination'] else 20
        else: pagination = ''

+        pag = item.page if item.page else 1  # pagination
+        matches = []
+
        log('PATRON= ', patron)
        if not data:
            data = httptools.downloadpage(item.url, headers=headers, ignore_response_code=True).data.replace("'", '"')
@@ -207,175 +333,67 @@ def scrape(func):
            # replace all ' with " and eliminate newline, so we don't need to worry about
            log('DATA =', data)

-            block = data
+        if patronBlock:
+            blocks = scrapertoolsV2.find_multiple_matches_groups(data, patronBlock)
+            block = ""
+            for bl in blocks:
+                blockItemlist, blockMatches = scrapeBlock(item, args, bl['block'], patron, headers, action, pagination, debug,
+                                            typeContentDict, typeActionDict, blacklist, pag)
+                for it in blockItemlist:
+                    if 'lang' in bl:
+                        it.contentLanguage, it.title = scrapeLang(bl, it.contentLanguage, it.title)
+                    if 'quality' in bl and bl['quality']:
+                        it.quality = bl['quality']
+                        it.title = it.title + typo(bl['quality'], '_ [] color kod')
+                log('BLOCK ', '=', block)
+                itemlist.extend(blockItemlist)
+                matches.extend(blockMatches)
+        elif patron:
+            itemlist, matches = scrapeBlock(item, args, data, patron, headers, action, pagination, debug, typeContentDict,
+                                   typeActionDict, blacklist, pag)

-            if patronBlock:
-                if type(patronBlock) == str:
-                    patronBlock = [patronBlock]
+        checkHost(item, itemlist)

-                for n, regex in enumerate(patronBlock):
-                    blocks = scrapertoolsV2.find_multiple_matches(block, regex)
-                    block = ""
-                    for b in blocks:
-                        block += "\n" + str(b)
-                    log('BLOCK ', n, '=', block)
-        else:
-            block = data
-        if patron:
-            matches = scrapertoolsV2.find_multiple_matches_groups(block, patron)
-            log('MATCHES =', matches)
+        if (item.contentType == "tvshow" and (action != "findvideos" and action != "play")) \
+            or (item.contentType == "episode" and action != "play") \
+            or (item.contentType == "movie" and action != "play") :
+            tmdb.set_infoLabels_itemlist(itemlist, seekTmdb=True)
+        # else:                                     # Si perde item show :(
+        #     for it in itemlist:
+        #         it.infoLabels = item.infoLabels

-            if debug:
-                regexDbg(item, patron, headers, block)
+        if 'itemlistHook' in args:
+            itemlist = args['itemlistHook'](itemlist)

-            known_keys = ['url', 'title', 'title2', 'episode', 'thumb', 'quality', 'year', 'plot', 'duration', 'genere',
-                          'rating', 'type', 'lang']  # by greko aggiunto episode
-            lang = '' # aggiunto per gestire i siti con pagine di serietv dove si hanno i video in ita e in subita
-            
-            pag = item.page if item.page else 1  # pagination
+        if patronNext:
+            nextPage(itemlist, item, data, patronNext, 2)

-            for i, match in enumerate(matches):
-                if pagination and (pag - 1) * pagination > i: continue  # pagination
-                if pagination and i >= pag  * pagination: break  # pagination
-                listGroups = match.keys()
-                match = match.values()
+        # next page for pagination
+        if pagination and len(matches) >= pag * pagination:
+            itemlist.append(
+                Item(channel=item.channel,
+                     action = item.action,
+                     contentType=item.contentType,
+                     title=typo(config.get_localized_string(30992), 'color kod bold'),
+                     url=item.url,
+                     args=item.args,
+                     page=pag + 1,
+                     thumbnail=thumb()))

-                if len(listGroups) > len(match):  # to fix a bug
-                    match = list(match)
-                    match.extend([''] * (len(listGroups) - len(match)))
+        if anime:
+            from specials import autorenumber
+            if inspect.stack()[1][3] == 'episodios' or item.action == 'episodios': autorenumber.renumber(itemlist, item, 'bold')
+            else: autorenumber.renumber(itemlist)

-                scraped = {}
-                for kk in known_keys:
-                    val = match[listGroups.index(kk)] if kk in listGroups else ''
-                    if val and (kk == "url" or kk == 'thumb') and 'http' not in val:
-                        val = scrapertoolsV2.find_single_match(item.url, 'https?://[a-z0-9.-]+') + val
-                    scraped[kk] = val
+        if addVideolibrary and (item.infoLabels["title"] or item.fulltitle):
+            item.fulltitle = item.infoLabels["title"]
+            videolibrary(itemlist, item)

-                if scraped['title']:
-                    title = scrapertoolsV2.htmlclean(scrapertoolsV2.decodeHtmlentities(scraped['title'])
-                                                     .replace('"',"'").replace('×', 'x').replace('–','-')).strip()  # fix by greko da " a '
-                else:
-                    title = ''
-                    
-                plot = scrapertoolsV2.htmlclean(scrapertoolsV2.decodeHtmlentities(scraped["plot"]))
+        if 'patronMenu' in args:
+            itemlist = thumb(itemlist, genre=True)

-                longtitle = typo(title, 'bold')
-                if scraped['quality']: longtitle = longtitle + typo(scraped['quality'], '_ [] color kod')
-                if scraped['episode']:
-                    scraped['episode'] = re.sub(r'\s-\s|-|x|&#8211', 'x', scraped['episode'])
-                    longtitle = typo(scraped['episode'] + ' - ', 'bold') + longtitle
-                if scraped['title2']:
-                    title2 = scrapertoolsV2.htmlclean(scrapertoolsV2.decodeHtmlentities(scraped['title2'])
-                                                      .replace('"', "'").replace('×', 'x').replace('–','-')).strip()
-                    longtitle = longtitle + typo(title2, 'bold _ -- _')
-                    
-                ##    Aggiunto/modificato per gestire i siti che hanno i video
-                ##    in ita e subita delle serie tv nella stessa pagina                             
-                if scraped['lang']:              
-                    if 'sub' in scraped['lang'].lower():
-                        lang = 'Sub-ITA'
-                    else:
-                        lang = 'ITA'                      
-                if lang != '':
-                        longtitle += typo(lang, '_ [] color kod')
-
-                # if title is set, probably this is a list of episodes or video sources
-                if item.infoLabels["title"] or item.fulltitle:  
-                    infolabels = item.infoLabels
-                else:
-                    infolabels = {}
-                    if scraped['year']:
-                        infolabels['year'] = scraped['year']
-                    if scraped["plot"]:
-                        infolabels['plot'] = plot
-                    if scraped['duration']:
-                        matches = scrapertoolsV2.find_multiple_matches(scraped['duration'],
-                                                                       r'([0-9])\s*?(?:[hH]|:|\.|,|\\|\/|\||\s)\s*?([0-9]+)')
-                        for h, m in matches:
-                            scraped['duration'] = int(h) * 60 + int(m)
-                        if not matches:
-                            scraped['duration'] = scrapertoolsV2.find_single_match(scraped['duration'], r'(\d+)')
-                        infolabels['duration'] = int(scraped['duration']) * 60
-                    if scraped['genere']:
-                        genres = scrapertoolsV2.find_multiple_matches(scraped['genere'], '[A-Za-z]+')
-                        infolabels['genere'] = ", ".join(genres)
-                    if scraped["rating"]:
-                        infolabels['rating'] = scrapertoolsV2.decodeHtmlentities(scraped["rating"])
-
-                if typeContentDict:
-                    for name, variants in typeContentDict.items():
-                        if scraped['type'] in variants:
-                            item.contentType = name
-                if typeActionDict:
-                    for name, variants in typeActionDict.items():
-                        if scraped['type'] in variants:
-                            action = name
-
-                if scraped["title"]:
-                    if scraped["title"] not in blacklist:
-                        it = Item(
-                            channel=item.channel,
-                            action=action,
-                            contentType= 'episode' if (action == 'findvideos' and item.contentType == 'tvshow') else item.contentType,
-                            title=longtitle,
-                            fulltitle=item.fulltitle if (action == 'findvideos' and item.contentType != 'movie') else title,
-                            show=item.show if (action == 'findvideos' and item.contentType != 'movie') else title,
-                            quality=scraped["quality"],
-                            url=scraped["url"],
-                            infoLabels=infolabels,
-                            thumbnail=scraped["thumb"],
-                            args=item.args,
-                            contentSerieName = title if (action == 'episodios' and item.contentType != 'movie') else ''
-                        )
-                        
-                        for lg in list(set(listGroups).difference(known_keys)):
-                            it.__setattr__(lg, match[listGroups.index(lg)])
-
-                        if 'itemHook' in args:
-                            it = args['itemHook'](it)
-                        itemlist.append(it)
-            checkHost(item, itemlist)
-           
-            if (item.contentType == "tvshow" and (action != "findvideos" and action != "play")) \
-                or (item.contentType == "episode" and action != "play") \
-                or (item.contentType == "movie" and action != "play") :            
-                tmdb.set_infoLabels_itemlist(itemlist, seekTmdb=True)
-            # else:                                     # Si perde item show :(
-            #     for it in itemlist:
-            #         it.infoLabels = item.infoLabels
-                
-            if 'itemlistHook' in args:
-                itemlist = args['itemlistHook'](itemlist)
-
-            if patronNext:
-                nextPage(itemlist, item, data, patronNext, 2)
-
-            # next page for pagination
-            if pagination and len(matches) >= pag * pagination:
-                itemlist.append(
-                    Item(channel=item.channel,
-                         action = item.action,
-                         contentType=item.contentType,
-                         title=typo(config.get_localized_string(30992), 'color kod bold'),
-                         url=item.url,
-                         args=item.args,
-                         page=pag + 1,
-                         thumbnail=thumb()))
-
-            if anime:
-                from specials import autorenumber
-                if inspect.stack()[1][3] == 'episodios' or item.action == 'episodios': autorenumber.renumber(itemlist, item, 'bold')
-                else: autorenumber.renumber(itemlist)
-                
-            if addVideolibrary and (item.infoLabels["title"] or item.fulltitle):
-                item.fulltitle = item.infoLabels["title"]
-                videolibrary(itemlist, item)
-
-            if 'patronMenu' in args:
-                itemlist = thumb(itemlist, genre=True)
-                
-            if 'fullItemlistHook' in args:
-                itemlist = args['fullItemlistHook'](itemlist)
+        if 'fullItemlistHook' in args:
+            itemlist = args['fullItemlistHook'](itemlist)

        return itemlist

@@ -604,7 +622,7 @@ def menu(func):
                             args=var[2] if len(var) > 2 else '',
                             contentType= var[3] if len(var) > 3 else 'movie',)
                # add search menu for category
-                if 'search' not in args: menuItem(itemlist, filename, 'Cerca ' + title + '… submenu bold', 'search', host, args=name)
+                if 'search' not in args: menuItem(itemlist, filename, 'Cerca ' + title + '… submenu bold', 'search', host + url, args=name)

        # Make EXTRA MENU (on bottom)
        for name, var in args.items():