gnula: buscador fix

2018-02-26 12:30:38 -05:00
parent 566d1c87be
commit 561b4c3ab7
1 changed files with 27 additions and 30 deletions
@@ -7,7 +7,8 @@ from core.item import Item
 from platformcode import config, logger
 host = "http://gnula.nu/"
-host_search = "https://www.googleapis.com/customsearch/v1element?key=AIzaSyCVAXiUzRYsML1Pv6RwSG1gunmMikTzQqY&rsz=small&num=10&hl=es&prettyPrint=false&source=gcsc&gss=.es&sig=45e50696e04f15ce6310843f10a3a8fb&cx=014793692610101313036:vwtjajbclpq&q=%s&cse_tok=%s&googlehost=www.google.com&callback=google.search.Search.apiary10745&nocache=1519145965573&start=0"
+host_search = "https://www.googleapis.com/customsearch/v1element?key=AIzaSyCVAXiUzRYsML1Pv6RwSG1gunmMikTzQqY&rsz=small&num=20&hl=es&prettyPrint=false&source=gcsc&gss=.es&sig=45e50696e04f15ce6310843f10a3a8fb&cx=014793692610101313036:vwtjajbclpq&q=%s&cse_tok=%s&googlehost=www.google.com&callback=google.search.Search.apiary10745&nocache=1519145965573&start=0"
 item_per_page = 20
 def mainlist(item):
@@ -51,35 +52,31 @@ def search(item, texto):
 def sub_search(item):
    logger.info()
    itemlist = []
-    data = httptools.downloadpage(item.url).data
+    while True:
-    patron =  '(?s)clicktrackUrl":".*?q=(.*?)".*?'
+        data = httptools.downloadpage(item.url).data
-    patron += 'title":"([^"]+)".*?'
+        if len(data) < 500 :
-    patron += 'cseImage":{"src":"([^"]+)"'
+            break
-    matches = scrapertools.find_multiple_matches(data, patron)
+        page = int(scrapertools.find_single_match(item.url, ".*?start=(\d+)")) + item_per_page
-    for scrapedurl, scrapedtitle, scrapedthumbnail in matches:
+        item.url = scrapertools.find_single_match(item.url, "(.*?start=)") + str(page)
-        scrapedurl = scrapertools.find_single_match(scrapedurl, ".*?online/")
+        patron =  '(?s)clicktrackUrl":".*?q=(.*?)".*?'
-        scrapedtitle = scrapedtitle.decode("unicode-escape").replace(" online", "").replace("<b>", "").replace("</b>", "")
+        patron += 'title":"([^"]+)".*?'
-        if "ver-" not in scrapedurl:
+        patron += 'cseImage":{"src":"([^"]+)"'
-            continue
+        matches = scrapertools.find_multiple_matches(data, patron)
-        year = scrapertools.find_single_match(scrapedtitle, "\d{4}")
+        for scrapedurl, scrapedtitle, scrapedthumbnail in matches:
-        contentTitle = scrapedtitle.replace("(%s)" %year,"").replace("Ver","").strip()
+            scrapedurl = scrapertools.find_single_match(scrapedurl, ".*?online/")
-        itemlist.append(Item(action = "findvideos",
+            scrapedtitle = scrapedtitle.decode("unicode-escape").replace(" online", "").replace("<b>", "").replace("</b>", "")
-                             channel = item.channel,
+            if "ver-" not in scrapedurl:
-                             contentTitle = contentTitle,
+                continue
-                             infoLabels = {"year":year},
+            year = scrapertools.find_single_match(scrapedtitle, "\d{4}")
-                             title = scrapedtitle,
+            contentTitle = scrapedtitle.replace("(%s)" %year,"").replace("Ver","").strip()
-                             thumbnail = scrapedthumbnail,
+            itemlist.append(Item(action = "findvideos",
-                             url = scrapedurl
+                                 channel = item.channel,
-                             ))
+                                 contentTitle = contentTitle,
-    if itemlist:
+                                 infoLabels = {"year":year},
-        page = int(scrapertools.find_single_match(item.url, ".*?start=(\d+)")) + 10
+                                 title = scrapedtitle,
-        npage = (page / 10) + 1
+                                 thumbnail = scrapedthumbnail,
-        item_page = scrapertools.find_single_match(item.url, "(.*?start=)") + str(page)
+                                 url = scrapedurl,
-        itemlist.append(Item(action = "sub_search",
+                                 ))
                             channel = item.channel,
                             title = "[COLOR green]Página %s[/COLOR]" %npage,
                             url = item_page
                             ))
    return itemlist