fix: test su toonitalia lang Sub-ITA

per quei siti che si sa che sono tutti titoli sottotitolati ma non è possibile prendere ?P<lang>
2019-09-06 21:51:03 +02:00
parent 30d51a5f6a
commit 88767b8aa3
2 changed files with 20 additions and 13 deletions
--- a/channels/toonitalia.py
+++ b/channels/toonitalia.py
@@ -22,7 +22,7 @@ def mainlist(item):
           ('Popolari', ['', 'peliculas', 'most_view', 'tvshow'])]   
    tvshow = '/lista-serie-tv/'
    anime =['/lista-anime-2/',
-               ('Sub-Ita',['/lista-anime-sub-ita/']),
+               ('Sub-Ita',['/lista-anime-sub-ita/', 'peliculas', 'sub']),
               ('Film Animati',['/lista-film-animazione/','peliculas', '', 'movie'])]    
    search = ''

@@ -31,6 +31,7 @@ def mainlist(item):

@support.scrape
 def peliculas(item):
+##    import web_pdb; web_pdb.set_trace()
    pagination = ''
    anime = True
    blacklist = ['-Film Animazione disponibili in attesa di recensione ']
@@ -39,18 +40,26 @@ def peliculas(item):
        patron = r'<h2 class="entry-title"><a href="(?P<url>[^"]+)" rel="bookmark">(?P<title>[^<]+)</a>'
    elif item.args == 'last':
        patronBlock = 'Aggiornamenti</h2>(?P<block>.*)</ul>'
-        patron = '<a href="(?P<url>[^"]+)">(?P<title>[^<]+)</a>'
+        patron = r'<a href="(?P<url>[^"]+)">(?P<title>[^<]+)</a>'
    elif item.args == 'most_view':
        patronBlock = 'I piu visti</h2>(?P<block>.*)</ul>'
-        patron = '<a href="(?P<url>[^"]+)" title="(?P<title>[^"]+)"'
+        patron = r'<a href="(?P<url>[^"]+)" title="(?P<title>[^"]+)"'
    elif item.args == 'new':
        patronBlock = '<main[^>]+>(?P<block>.*)</main>'
-        patron = '<a href="(?P<url>[^"]+)" rel="bookmark">(?P<title>[^<]+)</a>[^>]+>[^>]+>[^>]+><img.*?src="(?P<thumb>[^"]+)".*?<p>(?P<plot>[^<]+)</p>'
-        patronNext = '<a class="next page-numbers" href="([^"]+)">'
+        patron = r'<a href="(?P<url>[^"]+)" rel="bookmark">(?P<title>[^<]+)</a>[^>]+>[^>]+>[^>]+><img.*?src="(?P<thumb>[^"]+)".*?<p>(?P<plot>[^<]+)</p>'
+        patronNext = '<a class="next page-numbers" href="([^"]+)">'       
    else:
        patronBlock = '"lcp_catlist"[^>]+>(?P<block>.*)</ul>'
        patron = r'<li ><a href="(?P<url>[^"]+)" title="[^>]+">(?P<title>[^<|\(]+)?(?:\([^\d]*(?P<year>\d+)\))?[^<]*</a>'
-        
+
+    if item.args == 'sub':
+        def itemHook(item):
+            #corregge l'esatta lang per quelle pagine in cui c'è
+            #solo sub-ita
+            item.title = item.title.replace('[ITA]','[Sub-ITA]')
+            item.contentLanguage = 'Sub-ITA'
+            return item
+
    action = 'findvideos' if item.contentType == 'movie' else 'episodios'

    return locals()
@@ -61,9 +70,9 @@ def episodios(item):
    anime = True
    data = support.httptools.downloadpage(item.url, headers=headers).data
    if 'https://vcrypt.net' in data:
-        patron = '(?:<br /> |<p>)(?P<title>[^<]+)<a href="(?P<url>[^"]+)"'
+        patron = r'(?:<br /> |<p>)(?P<title>[^<]+)<a href="(?P<url>[^"]+)"'
    else:
-        patron = '<br /> <a href="(?P<url>[^"]+)" target="_blank" rel="noopener[^>]+>(?P<title>[^<]+)</a>'
+        patron = r'<br /> <a href="(?P<url>[^"]+)" target="_blank" rel="noopener[^>]+>(?P<title>[^<]+)</a>'

    def itemHook(item):
        item.title = item.title.replace('_',' ').replace('–','-')
--- a/core/support.py
+++ b/core/support.py
@@ -155,13 +155,12 @@ def scrapeLang(scraped, lang, longtitle):
    ##    in ita e subita delle serie tv nella stessa pagina
    # altrimenti dopo un sub-ita mette tutti quelli a seguire in sub-ita
    # e credo sia utile per filtertools
-    lang = 'ITA' 
+    lang = 'ITA'
    if scraped['lang']:
        if 'sub' in scraped['lang'].lower():
            lang = 'Sub-ITA'
-        elif 'ita' in scraped['lang'].lower():
-            lang = 'ITA'
-
+##        elif 'ita' in scraped['lang'].lower():
+##            lang = 'ITA'
    longtitle += typo(lang, '_ [] color kod')

    return lang, longtitle
@@ -181,7 +180,6 @@ def scrapeBlock(item, args, block, patron, headers, action, pagination, debug, t

    known_keys = ['url', 'title', 'title2', 'episode', 'thumb', 'quality', 'year', 'plot', 'duration', 'genere', 'rating', 'type', 'lang']
    lang = ''  # aggiunto per gestire i siti con pagine di serietv dove si hanno i video in ita e in subita
-
    for i, match in enumerate(matches):
        if pagination and (pag - 1) * pagination > i: continue  # pagination
        if pagination and i >= pag * pagination: break          # pagination