From faac026cec68e726614bfea9473fbe8301ee838a Mon Sep 17 00:00:00 2001
From: thepasto <thepasto@gmail.com>
Date: Thu, 2 May 2019 18:41:32 +0200
Subject: [PATCH] Some improvements in scrape: - avoid unwanted calls to tmdb,
 (i.e episodes list) based on action - add custom properties to Item not in
 know_keys list

---
 channels/support.py | 91 ++++++++++++++++++++++++---------------------
 1 file changed, 49 insertions(+), 42 deletions(-)

diff --git a/channels/support.py b/channels/support.py
index 379b44a5..cbbbdf30 100644
--- a/channels/support.py
+++ b/channels/support.py
@@ -135,25 +135,24 @@ def scrape(item, patron = '', listGroups = [], headers="", blacklist="", data=""
         matches = scrapertoolsV2.find_multiple_matches(block, patron)
         log('MATCHES =', matches)
 
+        known_keys = ['url', 'title', 'thumb', 'quality', 'year', 'plot', 'duration', 'genere', 'rating']
         for match in matches:
             if len(listGroups) > len(match):  # to fix a bug
                 match = list(match)
-                match.extend([''] * (len(listGroups)-len(match)))
+                match.extend([''] * (len(listGroups) - len(match)))
 
-            scrapedurl = url_host+match[listGroups.index('url')] if 'url' in listGroups else ''
-            scrapedtitle = match[listGroups.index('title')] if 'title' in listGroups else ''
-            scrapedthumb = match[listGroups.index('thumb')] if 'thumb' in listGroups else ''
-            scrapedquality = match[listGroups.index('quality')] if 'quality' in listGroups else ''
-            scrapedyear = match[listGroups.index('year')] if 'year' in listGroups else ''
-            scrapedplot = match[listGroups.index('plot')] if 'plot' in listGroups else ''
-            scrapedduration = match[listGroups.index('duration')] if 'duration' in listGroups else ''
-            scrapedgenre = match[listGroups.index('genre')] if 'genre' in listGroups else ''
-            scrapedrating = match[listGroups.index('rating')] if 'rating' in listGroups else ''
+            scraped = {}
+            for kk in known_keys:
+                val = match[listGroups.index(kk)] if kk in listGroups else ''
+                if kk == "url":
+                    val = url_host + val
+                scraped[kk] = val
 
-            title = scrapertoolsV2.decodeHtmlentities(scrapedtitle)
-            plot = scrapertoolsV2.decodeHtmlentities(scrapedplot)
-            if scrapedquality:
-                longtitle = '[B]' + title + '[/B] [COLOR blue][' + scrapedquality + '][/COLOR]'
+            title = scrapertoolsV2.decodeHtmlentities(scraped["title"]).strip()
+            plot = scrapertoolsV2.htmlclean(scrapertoolsV2.decodeHtmlentities(scraped["plot"]))
+
+            if scraped["quality"]:
+                longtitle = '[B]' + title + '[/B] [COLOR blue][' + scraped["quality"] + '][/COLOR]'
             else:
                 longtitle = '[B]' + title + '[/B]'
 
@@ -161,40 +160,48 @@ def scrape(item, patron = '', listGroups = [], headers="", blacklist="", data=""
                 infolabels = item.infoLabels
             else:
                 infolabels = {}
-                if scrapedyear:
-                    infolabels['year'] = scrapedyear
-                if scrapedplot:
+                if scraped["year"]:
+                    infolabels['year'] = scraped["year"]
+                if scraped["plot"]:
                     infolabels['plot'] = plot
-                if scrapedduration:
-                    matches = scrapertoolsV2.find_multiple_matches(scrapedduration, r'([0-9])\s*?(?:[hH]|:|\.|,|\\|\/|\||\s)\s*?([0-9]+)')
-                    scrapertoolsV2.printMatches(matches)
+                if scraped["duration"]:
+                    matches = scrapertoolsV2.find_multiple_matches(scraped["duration"],r'([0-9])\s*?(?:[hH]|:|\.|,|\\|\/|\||\s)\s*?([0-9]+)')
                     for h, m in matches:
-                        scrapedduration = int(h) * 60 + int(m)
+                        scraped["duration"] = int(h) * 60 + int(m)
                     if not matches:
-                        scrapedduration = scrapertoolsV2.find_single_match(scrapedduration, r'(\d+)')
-                    infolabels['duration'] = int(scrapedduration) * 60
-                if scrapedgenre:
-                    genres = scrapertoolsV2.find_multiple_matches(scrapedgenre, '[A-Za-z]+')
-                    infolabels['genre'] = ", ".join(genres)
-                if scrapedrating:
-                    infolabels['rating'] = scrapertoolsV2.decodeHtmlentities(scrapedrating)
+                        scraped["duration"] = scrapertoolsV2.find_single_match(scraped["duration"], r'(\d+)')
+                    infolabels['duration'] = int(scraped["duration"]) * 60
+                if scraped["genere"]:
+                    genres = scrapertoolsV2.find_multiple_matches(scraped["genere"], '[A-Za-z]+')
+                    infolabels['genere'] = ", ".join(genres)
+                if scraped["rating"]:
+                    infolabels['rating'] = scrapertoolsV2.decodeHtmlentities(scraped["rating"])
 
-            if not scrapedtitle in blacklist:
-                itemlist.append(
-                    Item(channel=item.channel,
-                         action=action,
-                         contentType=item.contentType,
-                         title=longtitle,
-                         fulltitle=title,
-                         show=title,
-                         quality=scrapedquality,
-                         url=scrapedurl,
-                         infoLabels=infolabels,
-                         thumbnail=scrapedthumb
-                         )
+            if scraped["title"] not in blacklist:
+                it = Item(
+                    channel=item.channel,
+                    action=action,
+                    contentType=item.contentType,
+                    title=longtitle,
+                    fulltitle=title,
+                    show=title,
+                    quality=scraped["quality"],
+                    url=scraped["url"],
+                    infoLabels=infolabels,
+                    thumbnail=scraped["thumb"]
                 )
 
-        tmdb.set_infoLabels_itemlist(itemlist, seekTmdb=True)
+                for lg in list(set(listGroups).difference(known_keys)):
+                    it.__setattr__(lg, match[listGroups.index(lg)])
+
+                itemlist.append(it)
+
+        if (item.contentType == "episode" and (action != "findvideos" and action != "play")) \
+                or (item.contentType == "movie" and action != "play"):
+            tmdb.set_infoLabels_itemlist(itemlist, seekTmdb=True)
+        else:
+            for it in itemlist:
+                it.infoLabels = item.infoLabels
 
         if patronNext:
             nextPage(itemlist, item, data, patronNext, 2)