', start)
+ data = data[start:end]
+
+ patron_res = '
(.*?)
'
+ patron_mir = '
(.*?)
'
+ patron_media = r'
'
+
+ res = scrapertoolsV2.find_single_match(data, patron_res)
+
+ itemlist = []
+
+ for res_url, res_video in scrapertoolsV2.find_multiple_matches(res, '
([^<]+?)'):
+
+ data = httptools.downloadpage(urlparse.urljoin(url, res_url)).data.replace('\n', '')
+
+ mir = scrapertoolsV2.find_single_match(data, patron_mir)
+
+ for mir_url, server in scrapertoolsV2.find_multiple_matches(mir, '([^<]+?)'):
+
+ data = httptools.downloadpage(urlparse.urljoin(url, mir_url)).data.replace('\n', '')
+ for media_label, media_url in scrapertoolsV2.find_multiple_matches(data, patron_media):
+ itemlist.append(Item(channel=item.channel,
+ action="play",
+ title=item.title+" ["+color(server, 'orange')+"]"+" - "+color(res_video, 'limegreen'),
+ fulltitle=item.fulltitle,
+ quality=res_video,
+ show=item.show,
+ thumbnail=item.thumbnail,
+ contentType=item.contentType,
+ server=server,
+ url=url_decode(media_url)))
+ log("video -> ", res_video)
+
+ return itemlist
+
+
+def url_decode(url_enc):
+ lenght = len(url_enc)
+ if lenght % 2 == 0:
+ len2 = lenght / 2
+ first = url_enc[0:len2]
+ last = url_enc[len2:lenght]
+ url_enc = last + first
+ reverse = url_enc[::-1]
+ return base64.b64decode(reverse)
+
+ last_car = url_enc[lenght - 1]
+ url_enc[lenght - 1] = ' '
+ url_enc = url_enc.strip()
+ len1 = len(url_enc)
+ len2 = len1 / 2
+ first = url_enc[0:len2]
+ last = url_enc[len2:len1]
+ url_enc = last + first
+ reverse = url_enc[::-1]
+ reverse = reverse + last_car
+ return base64.b64decode(reverse)
+
+
+def color(text, color):
+ return "[COLOR " + color + "]" + text + "[/COLOR]"
+
+
+def scrape(item, patron = '', listGroups = [], headers="", blacklist="", data="", patron_block="",
+ patronNext="", action="findvideos", addVideolibrary = True):
+ # patron: the patron to use for scraping page, all capturing group must match with listGroups
+ # listGroups: a list containing the scraping info obtained by your patron, in order
+ # accepted values are: url, title, thumb, quality, year, plot, duration, genre, rating
+
+ # header: values to pass to request header
+ # blacklist: titles that you want to exclude(service articles for example)
+ # data: if you want to pass data manually, maybe because you need some custom replacement
+ # patron_block: patron to get parts of the page (to scrape with patron attribute),
+ # if you need a "block inside another block" you can create a list, please note that all matches
+ # will be packed as string
+ # patronNext: patron for scraping next page link
+ # action: if you want results perform an action different from "findvideos", useful when scraping film by genres
+ # url_host: string to prepend to scrapedurl, useful when url don't contain host
+ # example usage:
+ # import support
+ # itemlist = []
+ # patron = 'blablabla'
+ # headers = [['Referer', host]]
+ # blacklist = 'Request a TV serie!'
+ # return support.scrape(item, itemlist, patron, ['thumb', 'quality', 'url', 'title', 'year', 'plot'],
+ # headers=headers, blacklist=blacklist)
+
+ itemlist = []
+
+ if not data:
+ data = httptools.downloadpage(item.url, headers=headers).data.replace("'", '"')
+ data = re.sub('\n|\t', ' ', data)
+ # replace all ' with " and eliminate newline, so we don't need to worry about
+ log('DATA =', data)
+
+ block = data
+
+ if patron_block:
+ if type(patron_block) == str:
+ patron_block = [patron_block]
+
+ for n, regex in enumerate(patron_block):
+ blocks = scrapertoolsV2.find_multiple_matches(block, regex)
+ block = ""
+ for b in blocks:
+ block += "\n" + b
+ log('BLOCK ', n, '=', block)
+ else:
+ block = data
+ if patron and listGroups:
+ matches = scrapertoolsV2.find_multiple_matches(block, patron)
+ log('MATCHES =', matches)
+
+ known_keys = ['url', 'title', 'episode', 'thumb', 'quality', 'year', 'plot', 'duration', 'genere', 'rating'] #by greko aggiunto episode
+ for match in matches:
+ if len(listGroups) > len(match): # to fix a bug
+ match = list(match)
+ match.extend([''] * (len(listGroups) - len(match)))
+
+ scraped = {}
+ for kk in known_keys:
+ val = match[listGroups.index(kk)] if kk in listGroups else ''
+ if val and (kk == "url" or kk == 'thumb') and 'http' not in val:
+ val = scrapertoolsV2.find_single_match(item.url, 'https?://[a-z0-9.-]+') + val
+ scraped[kk] = val
+
+ title = scrapertoolsV2.decodeHtmlentities(scraped["title"]).strip()
+ plot = scrapertoolsV2.htmlclean(scrapertoolsV2.decodeHtmlentities(scraped["plot"]))
+
+ if scraped["quality"] and scraped["episode"]: # by greko aggiunto episode
+ longtitle = '[B]' + title + '[/B] - [B]' + scraped["episode"] + '[/B][COLOR blue][' + scraped["quality"] + '][/COLOR]' # by greko aggiunto episode
+ elif scraped["episode"]: # by greko aggiunto episode
+ longtitle = '[B]' + title + '[/B] - [B]' + scraped["episode"] + '[/B]' # by greko aggiunto episode
+ else:
+ longtitle = '[B]' + title + '[/B]'
+
+ if item.infoLabels["title"] or item.fulltitle: # if title is set, probably this is a list of episodes or video sources
+ infolabels = item.infoLabels
+ else:
+ infolabels = {}
+ if scraped["year"]:
+ infolabels['year'] = scraped["year"]
+ if scraped["plot"]:
+ infolabels['plot'] = plot
+ if scraped["duration"]:
+ matches = scrapertoolsV2.find_multiple_matches(scraped["duration"],r'([0-9])\s*?(?:[hH]|:|\.|,|\\|\/|\||\s)\s*?([0-9]+)')
+ for h, m in matches:
+ scraped["duration"] = int(h) * 60 + int(m)
+ if not matches:
+ scraped["duration"] = scrapertoolsV2.find_single_match(scraped["duration"], r'(\d+)')
+ infolabels['duration'] = int(scraped["duration"]) * 60
+ if scraped["genere"]:
+ genres = scrapertoolsV2.find_multiple_matches(scraped["genere"], '[A-Za-z]+')
+ infolabels['genere'] = ", ".join(genres)
+ if scraped["rating"]:
+ infolabels['rating'] = scrapertoolsV2.decodeHtmlentities(scraped["rating"])
+
+ if scraped["title"] not in blacklist:
+ it = Item(
+ channel=item.channel,
+ action=action,
+ contentType=item.contentType,
+ title=longtitle,
+ fulltitle=title,
+ show=title,
+ quality=scraped["quality"],
+ url=scraped["url"],
+ infoLabels=infolabels,
+ thumbnail=scraped["thumb"],
+ args=item.args
+ )
+
+ for lg in list(set(listGroups).difference(known_keys)):
+ it.__setattr__(lg, match[listGroups.index(lg)])
+
+ itemlist.append(it)
+
+ if (item.contentType == "episode" and (action != "findvideos" and action != "play")) \
+ or (item.contentType == "movie" and action != "play"):
+ tmdb.set_infoLabels_itemlist(itemlist, seekTmdb=True)
+ else:
+ for it in itemlist:
+ it.infoLabels = item.infoLabels
+
+ if patronNext:
+ nextPage(itemlist, item, data, patronNext, 2)
+
+ if addVideolibrary and (item.infoLabels["title"] or item.fulltitle):
+ item.fulltitle = item.infoLabels["title"]
+ videolibrary(itemlist, item)
+
+ return itemlist
+
+
+def dooplay_get_links(item, host):
+ # get links from websites using dooplay theme and dooplay_player
+ # return a list of dict containing these values: url, title and server
+
+ data = httptools.downloadpage(item.url).data.replace("'", '"')
+ patron = r'([^<>]+)(?:([^<>]+))?'
+ matches = scrapertoolsV2.find_multiple_matches(data, patron)
+
+ ret = []
+
+ for type, post, nume, title, server in matches:
+ postData = urllib.urlencode({
+ "action": "doo_player_ajax",
+ "post": post,
+ "nume": nume,
+ "type": type
+ })
+ dataAdmin = httptools.downloadpage(host + 'wp-admin/admin-ajax.php', post=postData,headers={'Referer': item.url}).data
+ link = scrapertoolsV2.find_single_match(dataAdmin, "([^<>]+).*?([^<>]+).*?([0-9]{4}).*?([^<>]+)'
+ patronNext = '